diff --git a/tests/kernels/core/test_fused_quant_layernorm.py b/tests/kernels/core/test_fused_quant_layernorm.py index 418c700bb..63b5a37d3 100644 --- a/tests/kernels/core/test_fused_quant_layernorm.py +++ b/tests/kernels/core/test_fused_quant_layernorm.py @@ -15,7 +15,6 @@ VEC_HIDDEN_SIZES = range(1024, 1030) # Avoid combinatorial explosion with full Cartesian product NUM_TOKENS_HIDDEN_SIZES = [ *[(1, i) for i in [1, 64, *VEC_HIDDEN_SIZES, 5120, 5137]], - *[(83, i) for i in [1, 1033, 2048, 5120]], *[(2048, i) for i in [1, 64, *VEC_HIDDEN_SIZES, 5137]], *[(4096, i) for i in [1, 64, 5137]], ] diff --git a/tests/kernels/core/test_layernorm.py b/tests/kernels/core/test_layernorm.py index 7553d45e0..aaa13c066 100644 --- a/tests/kernels/core/test_layernorm.py +++ b/tests/kernels/core/test_layernorm.py @@ -11,19 +11,7 @@ from vllm.platforms import current_platform DTYPES = [torch.half, torch.bfloat16, torch.float] NUM_TOKENS = [7, 83, 4096] # Arbitrary values for testing -HIDDEN_SIZES = [ - 8, - 768, - 769, - 770, - 771, - 5120, - 5124, - 5125, - 5126, - 8192, - 8199, -] # Arbitrary values for testing +HIDDEN_SIZES = [8, 768, 769, 5120, 5125, 8192] # Arbitrary values for testing ADD_RESIDUAL = [False, True] SEEDS = [0] CUDA_DEVICES = [f"cuda:{i}" for i in range(1 if torch.cuda.device_count() == 1 else 2)] @@ -118,7 +106,7 @@ def test_poly_norm( @pytest.mark.parametrize("hidden_size", HIDDEN_SIZES) @pytest.mark.parametrize("add_residual", ADD_RESIDUAL) @pytest.mark.parametrize("dtype", DTYPES) -@pytest.mark.parametrize("quant_scale", [1.0, 0.01, 10.0]) +@pytest.mark.parametrize("quant_scale", [0.01, 1.0, 10.0]) @pytest.mark.parametrize("seed", SEEDS) @pytest.mark.parametrize("device", CUDA_DEVICES) @pytest.mark.parametrize("strided_input", [False, True]) diff --git a/tests/kernels/core/test_permute_cols.py b/tests/kernels/core/test_permute_cols.py index 1e264735c..08fdd0e05 100644 --- a/tests/kernels/core/test_permute_cols.py +++ b/tests/kernels/core/test_permute_cols.py @@ -9,7 +9,7 @@ from vllm._custom_ops import permute_cols @pytest.mark.parametrize("shape", [(1, 512), (544, 4096), (67, 8192)]) -@pytest.mark.parametrize("dtype", [torch.bfloat16, torch.float16]) +@pytest.mark.parametrize("dtype", [torch.bfloat16]) def test_permute_cols(shape, dtype): x = torch.randn(shape, dtype=dtype).cuda() perm = torch.randperm(x.shape[1]).to(torch.int).cuda() diff --git a/tests/kernels/core/test_pos_encoding.py b/tests/kernels/core/test_pos_encoding.py index e1ddc5de0..c35ee5016 100644 --- a/tests/kernels/core/test_pos_encoding.py +++ b/tests/kernels/core/test_pos_encoding.py @@ -12,8 +12,8 @@ from vllm.model_executor.layers.rotary_embedding import get_rope from vllm.platforms import current_platform IS_NEOX_STYLE = [True, False] -DTYPES = [torch.half, torch.bfloat16, torch.float] -HEAD_SIZES = [64, 80, 112, 120, 256] +DTYPES = [torch.bfloat16, torch.float] +HEAD_SIZES = [64, 80, 120, 256] ROTARY_DIMS = [None, 32] # None means rotary dim == head size NUM_HEADS = [17] # Arbitrary values for testing BATCH_SIZES = [5] # Arbitrary values for testing