[Perf] Vectorize static / dynamic INT8 quant kernels (#19233)
Signed-off-by: yewentao256 <zhyanwentao@126.com>
This commit is contained in:
@@ -11,6 +11,7 @@ from vllm.platforms import current_platform
|
||||
|
||||
DTYPES = [torch.half, torch.bfloat16, torch.float]
|
||||
HIDDEN_SIZES = [16, 67, 768, 5137, 8193] # Arbitrary values for testing
|
||||
HIDDEN_SIZES += list(range(1024, 1033)) # vectorized conversion edge cases
|
||||
NUM_TOKENS = [1, 7, 83, 4096] # Arbitrary values for testing
|
||||
SEEDS = [0]
|
||||
SCALE = [0.1, 2.1]
|
||||
|
||||
Reference in New Issue
Block a user