[FP8] Extend per-token-group quantization support to QuantFP8 (#24342)

Signed-off-by: Tahsin Tunan <tahsintunan@gmail.com>
Signed-off-by: Luka Govedič <lgovedic@redhat.com>
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
Co-authored-by: Luka Govedič <lgovedic@redhat.com>
This commit is contained in:
Tahsin Tunan
2025-09-17 07:31:06 +06:00
committed by GitHub
parent 493b10f8bf
commit cef32104b4
5 changed files with 444 additions and 61 deletions

View File

@@ -34,6 +34,15 @@ class GroupShape(_GroupShape):
PER_TENSOR: ClassVar['GroupShape']
PER_TOKEN: ClassVar['GroupShape']
def is_per_tensor(self) -> bool:
return self.row == -1 and self.col == -1
def is_per_token(self) -> bool:
return self.row == 1 and self.col == -1
def is_per_group(self) -> bool:
return self.row == 1 and self.col >= 1
GroupShape.PER_TENSOR = GroupShape(-1, -1)
GroupShape.PER_TOKEN = GroupShape(1, -1)