Revert "[Bugfix] Limit profiling run sequence length by max_model_len (#14785) (#14892)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
Cyrus Leung
2025-03-17 00:13:46 +08:00
committed by GitHub
parent e53b1350f2
commit f6137adbcb
5 changed files with 0 additions and 9 deletions

View File

@@ -330,11 +330,6 @@ class InputRegistry:
from vllm.multimodal import MultiModalKwargs from vllm.multimodal import MultiModalKwargs
from vllm.multimodal.profiling import MultiModalProfiler from vllm.multimodal.profiling import MultiModalProfiler
if seq_len > model_config.max_model_len:
raise AssertionError(
f"Profiling attempted with sequence length ({seq_len}) "
f"greater than model length ({model_config.max_model_len})")
if mm_registry.has_processor(model_config): if mm_registry.has_processor(model_config):
tokenizer = cached_tokenizer_from_config(model_config) tokenizer = cached_tokenizer_from_config(model_config)
processor = mm_registry.create_processor(model_config, processor = mm_registry.create_processor(model_config,

View File

@@ -281,7 +281,6 @@ class EncoderDecoderModelRunner(GPUModelRunnerBase[EncoderDecoderModelInput]):
for group_id in range(max_num_seqs): for group_id in range(max_num_seqs):
seq_len = (max_num_batched_tokens // max_num_seqs + seq_len = (max_num_batched_tokens // max_num_seqs +
(group_id < max_num_batched_tokens % max_num_seqs)) (group_id < max_num_batched_tokens % max_num_seqs))
seq_len = min(seq_len, self.model_config.max_model_len)
batch_size += seq_len batch_size += seq_len
decoder_dummy_data = self.input_registry \ decoder_dummy_data = self.input_registry \

View File

@@ -1302,7 +1302,6 @@ class GPUModelRunnerBase(ModelRunnerBase[TModelInputForGPU]):
for group_id in range(max_num_seqs): for group_id in range(max_num_seqs):
seq_len = (max_num_batched_tokens // max_num_seqs + seq_len = (max_num_batched_tokens // max_num_seqs +
(group_id < max_num_batched_tokens % max_num_seqs)) (group_id < max_num_batched_tokens % max_num_seqs))
seq_len = min(seq_len, self.model_config.max_model_len)
batch_size += seq_len batch_size += seq_len
dummy_data = self.input_registry \ dummy_data = self.input_registry \

View File

@@ -148,7 +148,6 @@ class OpenVINOModelRunner(ModelRunnerBase):
seq_len = min( seq_len = min(
seq_data.get_len(), seq_data.get_len(),
computed_len + seq_group_metadata.token_chunk_size, computed_len + seq_group_metadata.token_chunk_size,
self.model_config.max_model_len,
) )
if is_prompt: if is_prompt:
tokens = seq_data.get_token_ids()[computed_len:seq_len] tokens = seq_data.get_token_ids()[computed_len:seq_len]

View File

@@ -466,7 +466,6 @@ class XPUModelRunner(ModelRunnerBase[ModelInputForXPUWithSamplingMetadata]):
for group_id in range(max_num_seqs): for group_id in range(max_num_seqs):
seq_len = (max_num_batched_tokens // max_num_seqs + seq_len = (max_num_batched_tokens // max_num_seqs +
(group_id < max_num_batched_tokens % max_num_seqs)) (group_id < max_num_batched_tokens % max_num_seqs))
seq_len = min(seq_len, self.model_config.max_model_len)
batch_size += seq_len batch_size += seq_len
dummy_data = self.input_registry \ dummy_data = self.input_registry \