Remove default values from InitVars so that they're not stored (#29859)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
(cherry picked from commit 951445a52d)
This commit is contained in:
Harry Mellor
2025-12-02 12:16:37 +00:00
committed by Kevin H. Luu
parent d8c6210eea
commit 85fb2e3120
17 changed files with 139 additions and 77 deletions

View File

@@ -79,16 +79,17 @@ def initialize_kv_cache(runner: GPUModelRunner):
def get_vllm_config():
scheduler_config = SchedulerConfig(
max_num_seqs=10,
max_num_batched_tokens=512,
max_model_len=512,
)
model_config = ModelConfig(
model="facebook/opt-125m",
dtype="float16",
seed=42,
)
scheduler_config = SchedulerConfig(
max_num_seqs=10,
max_num_batched_tokens=512,
max_model_len=512,
is_encoder_decoder=model_config.is_encoder_decoder,
)
cache_config = CacheConfig(
block_size=BLOCK_SIZE,
gpu_memory_utilization=0.9,
@@ -784,14 +785,15 @@ def test_hybrid_attention_mamba_tensor_shapes(monkeypatch):
initialize_model_parallel(tensor_model_parallel_size=1)
torch.set_default_dtype(torch.float16)
model_config = ModelConfig(
model="ibm-granite/granite-4.0-tiny-preview",
dtype="float16",
)
scheduler_config = SchedulerConfig(
max_num_seqs=10,
max_num_batched_tokens=512,
max_model_len=512,
)
model_config = ModelConfig(
model="ibm-granite/granite-4.0-tiny-preview",
dtype="float16",
is_encoder_decoder=model_config.is_encoder_decoder,
)
cache_config = CacheConfig(
block_size=BLOCK_SIZE,