Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
@@ -330,11 +330,6 @@ class InputRegistry:
|
|||||||
from vllm.multimodal import MultiModalKwargs
|
from vllm.multimodal import MultiModalKwargs
|
||||||
from vllm.multimodal.profiling import MultiModalProfiler
|
from vllm.multimodal.profiling import MultiModalProfiler
|
||||||
|
|
||||||
if seq_len > model_config.max_model_len:
|
|
||||||
raise AssertionError(
|
|
||||||
f"Profiling attempted with sequence length ({seq_len}) "
|
|
||||||
f"greater than model length ({model_config.max_model_len})")
|
|
||||||
|
|
||||||
if mm_registry.has_processor(model_config):
|
if mm_registry.has_processor(model_config):
|
||||||
tokenizer = cached_tokenizer_from_config(model_config)
|
tokenizer = cached_tokenizer_from_config(model_config)
|
||||||
processor = mm_registry.create_processor(model_config,
|
processor = mm_registry.create_processor(model_config,
|
||||||
|
|||||||
@@ -281,7 +281,6 @@ class EncoderDecoderModelRunner(GPUModelRunnerBase[EncoderDecoderModelInput]):
|
|||||||
for group_id in range(max_num_seqs):
|
for group_id in range(max_num_seqs):
|
||||||
seq_len = (max_num_batched_tokens // max_num_seqs +
|
seq_len = (max_num_batched_tokens // max_num_seqs +
|
||||||
(group_id < max_num_batched_tokens % max_num_seqs))
|
(group_id < max_num_batched_tokens % max_num_seqs))
|
||||||
seq_len = min(seq_len, self.model_config.max_model_len)
|
|
||||||
batch_size += seq_len
|
batch_size += seq_len
|
||||||
|
|
||||||
decoder_dummy_data = self.input_registry \
|
decoder_dummy_data = self.input_registry \
|
||||||
|
|||||||
@@ -1302,7 +1302,6 @@ class GPUModelRunnerBase(ModelRunnerBase[TModelInputForGPU]):
|
|||||||
for group_id in range(max_num_seqs):
|
for group_id in range(max_num_seqs):
|
||||||
seq_len = (max_num_batched_tokens // max_num_seqs +
|
seq_len = (max_num_batched_tokens // max_num_seqs +
|
||||||
(group_id < max_num_batched_tokens % max_num_seqs))
|
(group_id < max_num_batched_tokens % max_num_seqs))
|
||||||
seq_len = min(seq_len, self.model_config.max_model_len)
|
|
||||||
batch_size += seq_len
|
batch_size += seq_len
|
||||||
|
|
||||||
dummy_data = self.input_registry \
|
dummy_data = self.input_registry \
|
||||||
|
|||||||
@@ -148,7 +148,6 @@ class OpenVINOModelRunner(ModelRunnerBase):
|
|||||||
seq_len = min(
|
seq_len = min(
|
||||||
seq_data.get_len(),
|
seq_data.get_len(),
|
||||||
computed_len + seq_group_metadata.token_chunk_size,
|
computed_len + seq_group_metadata.token_chunk_size,
|
||||||
self.model_config.max_model_len,
|
|
||||||
)
|
)
|
||||||
if is_prompt:
|
if is_prompt:
|
||||||
tokens = seq_data.get_token_ids()[computed_len:seq_len]
|
tokens = seq_data.get_token_ids()[computed_len:seq_len]
|
||||||
|
|||||||
@@ -466,7 +466,6 @@ class XPUModelRunner(ModelRunnerBase[ModelInputForXPUWithSamplingMetadata]):
|
|||||||
for group_id in range(max_num_seqs):
|
for group_id in range(max_num_seqs):
|
||||||
seq_len = (max_num_batched_tokens // max_num_seqs +
|
seq_len = (max_num_batched_tokens // max_num_seqs +
|
||||||
(group_id < max_num_batched_tokens % max_num_seqs))
|
(group_id < max_num_batched_tokens % max_num_seqs))
|
||||||
seq_len = min(seq_len, self.model_config.max_model_len)
|
|
||||||
batch_size += seq_len
|
batch_size += seq_len
|
||||||
|
|
||||||
dummy_data = self.input_registry \
|
dummy_data = self.input_registry \
|
||||||
|
|||||||
Reference in New Issue
Block a user