[Misc][Refactor] Generalize linear_method to be quant_method (#4373)
This commit is contained in:
@@ -20,5 +20,5 @@ def test_load_fp16_model(vllm_runner) -> None:
|
||||
|
||||
model = llm.model.llm_engine.model_executor.driver_worker.model_runner.model
|
||||
fc1 = model.model.decoder.layers[0].fc1
|
||||
assert isinstance(fc1.linear_method, Fp8LinearMethod)
|
||||
assert isinstance(fc1.quant_method, Fp8LinearMethod)
|
||||
assert fc1.weight.dtype == torch.float8_e4m3fn
|
||||
|
||||
Reference in New Issue
Block a user