[Misc][Refactor] Generalize linear_method to be quant_method (#4373)

2024-04-26 13:41:14 -07:00
parent 603ad84815
commit a62aaf1df5
45 changed files with 759 additions and 713 deletions
--- a/tests/quantization/test_fp8.py
+++ b/tests/quantization/test_fp8.py
@@ -20,5 +20,5 @@ def test_load_fp16_model(vllm_runner) -> None:

    model = llm.model.llm_engine.model_executor.driver_worker.model_runner.model
    fc1 = model.model.decoder.layers[0].fc1
-    assert isinstance(fc1.linear_method, Fp8LinearMethod)
+    assert isinstance(fc1.quant_method, Fp8LinearMethod)
    assert fc1.weight.dtype == torch.float8_e4m3fn