Remove default values from InitVars so that they're not stored (#29859)
Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
(cherry picked from commit 951445a52d)
This commit is contained in:
committed by
Kevin H. Luu
parent
d8c6210eea
commit
85fb2e3120
@@ -1128,7 +1128,11 @@ def test_estimate_max_model_len(model_id, max_model_len, want_estimated_max_len)
|
||||
dtype="float16",
|
||||
max_model_len=max_model_len,
|
||||
)
|
||||
scheduler_config = SchedulerConfig(max_num_batched_tokens=32768)
|
||||
scheduler_config = SchedulerConfig(
|
||||
max_num_batched_tokens=32768,
|
||||
max_model_len=model_config.max_model_len,
|
||||
is_encoder_decoder=model_config.is_encoder_decoder,
|
||||
)
|
||||
|
||||
vllm_config = VllmConfig(
|
||||
model_config=model_config,
|
||||
@@ -1163,7 +1167,10 @@ def test_get_max_concurrency_for_kv_cache_config():
|
||||
max_model_len=max_model_len,
|
||||
)
|
||||
scheduler_config = SchedulerConfig(
|
||||
max_num_batched_tokens=1024, enable_chunked_prefill=True
|
||||
max_num_batched_tokens=1024,
|
||||
enable_chunked_prefill=True,
|
||||
max_model_len=model_config.max_model_len,
|
||||
is_encoder_decoder=model_config.is_encoder_decoder,
|
||||
)
|
||||
|
||||
vllm_config = VllmConfig(
|
||||
|
||||
@@ -1508,6 +1508,12 @@ def create_scheduler_with_priority(
|
||||
Returns:
|
||||
{class}`Scheduler` instance with priority scheduling
|
||||
"""
|
||||
model_config = ModelConfig(
|
||||
model=model,
|
||||
trust_remote_code=True,
|
||||
dtype="float16",
|
||||
seed=42,
|
||||
)
|
||||
if max_model_len is None:
|
||||
max_model_len = max_num_batched_tokens
|
||||
scheduler_config = SchedulerConfig(
|
||||
@@ -1517,14 +1523,9 @@ def create_scheduler_with_priority(
|
||||
long_prefill_token_threshold=long_prefill_token_threshold,
|
||||
disable_chunked_mm_input=disable_chunked_mm_input,
|
||||
enable_chunked_prefill=True,
|
||||
is_encoder_decoder=model_config.is_encoder_decoder,
|
||||
policy="priority", # Enable priority scheduling
|
||||
)
|
||||
model_config = ModelConfig(
|
||||
model=model,
|
||||
trust_remote_code=True,
|
||||
dtype="float16",
|
||||
seed=42,
|
||||
)
|
||||
# Cache config, optionally force APC
|
||||
cache_config = CacheConfig(
|
||||
block_size=block_size,
|
||||
|
||||
@@ -69,6 +69,13 @@ def create_scheduler(
|
||||
Returns:
|
||||
{class}`Scheduler` instance
|
||||
"""
|
||||
model_config = ModelConfig(
|
||||
model=model,
|
||||
trust_remote_code=True,
|
||||
dtype="float16",
|
||||
seed=42,
|
||||
skip_tokenizer_init=skip_tokenizer_init,
|
||||
)
|
||||
if max_model_len is None:
|
||||
max_model_len = max_num_batched_tokens
|
||||
scheduler_config = SchedulerConfig(
|
||||
@@ -79,13 +86,7 @@ def create_scheduler(
|
||||
disable_chunked_mm_input=disable_chunked_mm_input,
|
||||
enable_chunked_prefill=enable_chunked_prefill,
|
||||
async_scheduling=async_scheduling,
|
||||
)
|
||||
model_config = ModelConfig(
|
||||
model=model,
|
||||
trust_remote_code=True,
|
||||
dtype="float16",
|
||||
seed=42,
|
||||
skip_tokenizer_init=skip_tokenizer_init,
|
||||
is_encoder_decoder=model_config.is_encoder_decoder,
|
||||
)
|
||||
# Cache config, optionally force APC
|
||||
cache_config = CacheConfig(
|
||||
|
||||
Reference in New Issue
Block a user