Support FP8 block quant for CompressedTensorsW8A16Fp8 (#33280)

Signed-off-by: mgoin <mgoin64@gmail.com>
This commit is contained in:
Michael Goin
2026-01-30 11:15:20 -05:00
committed by GitHub
parent f857a03f6b
commit fd0e377244
4 changed files with 74 additions and 64 deletions

View File

@@ -400,7 +400,6 @@ class Fp8LinearMethod(LinearMethodBase):
None,
weight_loader,
)
set_weight_attrs(scale, {"scale_type": "weight_scale"})
layer.register_parameter("weight_scale", scale)
else:
assert not self.act_q_static
@@ -412,7 +411,6 @@ class Fp8LinearMethod(LinearMethodBase):
self.weight_block_size,
weight_loader,
)
set_weight_attrs(scale, {"scale_type": "weight_scale"})
# The weight_scale_inv name is intentional for deepseekv3
layer.register_parameter("weight_scale_inv", scale)