[Perf] Vectorize static / dynamic INT8 quant kernels (#19233)

Signed-off-by: yewentao256 <zhyanwentao@126.com>
2025-06-12 09:51:41 -04:00
parent 1129e2b1ab
commit b6efafd9e4
4 changed files with 411 additions and 97 deletions
--- a/tests/kernels/quantization/test_int8_quant.py
+++ b/tests/kernels/quantization/test_int8_quant.py
@@ -11,6 +11,7 @@ from vllm.platforms import current_platform

 DTYPES = [torch.half, torch.bfloat16, torch.float]
 HIDDEN_SIZES = [16, 67, 768, 5137, 8193]  # Arbitrary values for testing
+HIDDEN_SIZES += list(range(1024, 1033))  # vectorized conversion edge cases
 NUM_TOKENS = [1, 7, 83, 4096]  # Arbitrary values for testing
 SEEDS = [0]
 SCALE = [0.1, 2.1]