[Quantization][Deprecation] Remove RTN (#32697)

Signed-off-by: Robert Shaw <robshaw@redhat.com> Co-authored-by: Robert Shaw <robshaw@redhat.com>
2026-01-21 11:34:42 -05:00
parent 6c20e89c02
commit 4e31b7f228
4 changed files with 0 additions and 730 deletions
--- a/tests/quantization/test_rtn.py
+++ b/tests/quantization/test_rtn.py
@@ -1,39 +0,0 @@
-# SPDX-License-Identifier: Apache-2.0
-# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-# Copyright © 2025, Oracle and/or its affiliates.
-"""Tests RTN quantization startup and generation,
-doesn't test correctness
-"""
-
-import pytest
-
-from tests.quantization.utils import is_quant_method_supported
-
-MODELS = [
-    "ai21labs/Jamba-tiny-dev",  # MoE model
-]
-
-
-@pytest.mark.skipif(
-    not is_quant_method_supported("rtn"),
-    reason="RTN is not supported on this GPU type.",
-)
-@pytest.mark.parametrize("model", MODELS)
-@pytest.mark.parametrize("dtype", ["bfloat16"])
-@pytest.mark.parametrize("max_tokens", [10])
-def test_model_rtn_startup(
-    hf_runner,
-    vllm_runner,
-    example_prompts,
-    model: str,
-    dtype: str,
-    max_tokens: int,
-) -> None:
-    with vllm_runner(
-        model,
-        enforce_eager=True,
-        dtype=dtype,
-        quantization="rtn",
-        allow_deprecated_quantization=True,
-    ) as vllm_model:
-        vllm_model.generate_greedy(example_prompts, max_tokens)