[Misc] unify variable for LLM instance (#20996)

Signed-off-by: Andy Xie <andy.xning@gmail.com>
2025-07-21 19:18:33 +08:00
parent e6b90a2805
commit d97841078b
53 changed files with 237 additions and 236 deletions
--- a/tests/quantization/test_gptq_dynamic.py
+++ b/tests/quantization/test_gptq_dynamic.py
@@ -39,7 +39,7 @@ def test_gptq_with_dynamic(vllm_runner, model_id: str, use_marlin_kernel: bool,
    linear_method_cls = GPTQMarlinLinearMethod if use_marlin_kernel else (
        GPTQLinearMethod)

-    for name, submodule in (vllm_model.model.llm_engine.model_executor.
+    for name, submodule in (vllm_model.llm.llm_engine.model_executor.
                            driver_worker.model_runner.model.named_modules()):
        if name == "lm_head":
            assert isinstance(submodule.quant_method, linear_method_cls)
--- a/tests/quantization/test_quark.py
+++ b/tests/quantization/test_quark.py
@@ -107,11 +107,11 @@ def test_quark_fp8_parity(vllm_runner):
    }
    with (vllm_runner(quark_model_id, **llm_kwargs) as
          quark_handle, vllm_runner(fp8_model_id, **llm_kwargs) as fp8_handle):
-        quark_model = (quark_handle.model.llm_engine.model_executor.
+        quark_model = (quark_handle.llm.llm_engine.model_executor.
                       driver_worker.model_runner.model)
        quark_state_dict = quark_model.state_dict()

-        fp8_model = (fp8_handle.model.llm_engine.model_executor.driver_worker.
+        fp8_model = (fp8_handle.llm.llm_engine.model_executor.driver_worker.
                     model_runner.model)
        fp8_state_dict = fp8_model.state_dict()

--- a/tests/quantization/test_register_quantization_config.py
+++ b/tests/quantization/test_register_quantization_config.py
@@ -111,7 +111,7 @@ def test_custom_quant(vllm_runner, model, monkeypatch):
                     quantization="custom_quant",
                     enforce_eager=True) as llm:

-        model = llm.model.llm_engine.model_executor.driver_worker.model_runner.model  # noqa: E501
+        model = llm.llm.llm_engine.model_executor.driver_worker.model_runner.model  # noqa: E501
        layer = model.model.layers[0]
        qkv_proj = layer.self_attn.qkv_proj