[ROCm][Quantization] Add asymmetric INT8 quantization support to TritonInt8ScaledMMLinearKernel (#38501)

Signed-off-by: Andreas Karatzas <akaratza@amd.com>
This commit is contained in:
Andreas Karatzas
2026-04-05 20:42:10 -05:00
committed by GitHub
parent 9570654c6d
commit 780ba37458
7 changed files with 133 additions and 14 deletions

View File

@@ -1,6 +1,9 @@
# For vllm script, with -t option (tensor parallel size)
# bash .buildkite/lm-eval-harness/run-lm-eval-gsm-vllm-baseline.sh -m RedHatAI/Qwen2.5-VL-3B-Instruct-FP8-Dynamic -l 1319 -t 1
model_name: "RedHatAI/Qwen2.5-VL-3B-Instruct-FP8-Dynamic"
required_gpu_arch:
- gfx942
- gfx950
tasks:
- name: "gsm8k"
metrics: