[NVFP4] Support NVFP4 dense models from modelopt and compressed-tensors on AMD Instinct MI300, MI355X and Hopper through emulation (#35733)

Signed-off-by: Felix Marty <Felix.Marty@amd.com> Signed-off-by: fxmarty-amd <felmarty@amd.com> Co-authored-by: Kyle Sayers <kylesayrs@gmail.com>
2026-04-07 00:18:27 +02:00
parent 9c81f35b1a
commit 00d7b497b3
10 changed files with 191 additions and 58 deletions
--- a/tests/quantization/test_compressed_tensors.py
+++ b/tests/quantization/test_compressed_tensors.py
@@ -366,9 +366,6 @@ def test_compressed_tensors_kv_cache_fp8_per_attn_head(vllm_runner):
        assert output


-@pytest.mark.skipif(
-    not current_platform.is_cuda(), reason="This test is skipped on non-CUDA platform."
-)
@pytest.mark.parametrize(
    "args",
    [
@@ -398,7 +395,7 @@ def test_compressed_tensors_nvfp4(vllm_runner, args):
            assert qkv_proj.scheme.group_size == 16

        llm.apply_model(check_model)
-        output = llm.generate_greedy("Hello my name is", max_tokens=4)
+        output = llm.generate_greedy(["Hello my name is"], max_tokens=4)
        print(output)
        assert output