diff --git a/tests/samplers/test_beam_search.py b/tests/samplers/test_beam_search.py index 78f5ab3e2..830332298 100644 --- a/tests/samplers/test_beam_search.py +++ b/tests/samplers/test_beam_search.py @@ -20,7 +20,7 @@ MM_BEAM_WIDTHS = [2] MODELS = ["TinyLlama/TinyLlama-1.1B-Chat-v1.0"] -@pytest.mark.skip_v1 # FIXME: This fails on V1 right now. +@pytest.mark.skip_v1 # V1 engine does not yet support beam search @pytest.mark.parametrize("model", MODELS) @pytest.mark.parametrize("dtype", ["half"]) @pytest.mark.parametrize("max_tokens", MAX_TOKENS) @@ -62,7 +62,7 @@ def test_beam_search_single_input( ) -@pytest.mark.skip_v1 # FIXME: This fails on V1 right now. +@pytest.mark.skip_v1 # V1 engine does not yet support beam search @pytest.mark.parametrize("model", MODELS) @pytest.mark.parametrize("dtype", ["half"]) @pytest.mark.parametrize("max_tokens", MAX_TOKENS) diff --git a/tests/v1/sample/test_topk_topp_sampler.py b/tests/v1/sample/test_topk_topp_sampler.py index a61f5af42..6a3ec704b 100644 --- a/tests/v1/sample/test_topk_topp_sampler.py +++ b/tests/v1/sample/test_topk_topp_sampler.py @@ -48,7 +48,11 @@ def test_topk_impl_equivalence(): assert torch.allclose(result1, result2) -@pytest.mark.skip(reason="FIXME: This test is failing right now.") +@pytest.mark.skip( + reason="FlashInfer top-k/top-p renorm comparison fails; " + "needs investigation of tolerance threshold or " + "interface differences between Python and FlashInfer implementations" +) def test_flashinfer_sampler(): """ This test verifies that the FlashInfer top-k and top-p sampling