[Perf] Vectorize static / dynamic INT8 quant kernels (#19233)

Signed-off-by: yewentao256 <zhyanwentao@126.com>
This commit is contained in:
Wentao Ye
2025-06-12 09:51:41 -04:00
committed by GitHub
parent 1129e2b1ab
commit b6efafd9e4
4 changed files with 411 additions and 97 deletions

View File

@@ -11,6 +11,7 @@ from vllm.platforms import current_platform
DTYPES = [torch.half, torch.bfloat16, torch.float]
HIDDEN_SIZES = [16, 67, 768, 5137, 8193] # Arbitrary values for testing
HIDDEN_SIZES += list(range(1024, 1033)) # vectorized conversion edge cases
NUM_TOKENS = [1, 7, 83, 4096] # Arbitrary values for testing
SEEDS = [0]
SCALE = [0.1, 2.1]