[ Misc ] fbgemm checkpoints (#6559)

This commit is contained in:
Robert Shaw
2024-07-20 12:36:57 -04:00
committed by GitHub
parent 9042d68362
commit 683e3cb9c4
24 changed files with 234 additions and 47 deletions

View File

@@ -66,8 +66,8 @@ class Fp8Config(QuantizationConfig):
return cls(is_checkpoint_fp8_serialized=is_checkpoint_fp8_serialized,
activation_scheme=activation_scheme)
def get_quant_method(
self, layer: torch.nn.Module) -> Optional["QuantizeMethodBase"]:
def get_quant_method(self, layer: torch.nn.Module,
prefix: str) -> Optional["QuantizeMethodBase"]:
from vllm.attention.layer import Attention # Avoid circular import
if isinstance(layer, LinearBase):