[Misc] Remove deprecated items that are due for removal (#36006)

Signed-off-by: Martin Hickey <martin.hickey@ie.ibm.com>
2026-03-05 06:14:50 +00:00
parent 57c629e9c1
commit c3598d02fa
3 changed files with 0 additions and 51 deletions
--- a/vllm/config/cache.py
+++ b/vllm/config/cache.py
@@ -92,24 +92,6 @@ class CacheConfig:
    benefits before turning this on.\n
    - "xxhash_cbor" combines canonical CBOR serialization with xxHash for
    reproducible hashing. Requires the optional ``xxhash`` package."""
-    cpu_offload_gb: float = Field(default=0, ge=0)
-    """The space in GiB to offload to CPU, per GPU. Default is 0, which means
-    no offloading. Intuitively, this argument can be seen as a virtual way to
-    increase the GPU memory size. For example, if you have one 24 GB GPU and
-    set this to 10, virtually you can think of it as a 34 GB GPU. Then you can
-    load a 13B model with BF16 weight, which requires at least 26GB GPU memory.
-    Note that this requires fast CPU-GPU interconnect, as part of the model is
-    loaded from CPU memory to GPU memory on the fly in each model forward pass.
-
-    DEPRECATED: This field is deprecated and will be removed in v0.16.
-    Please use OffloadConfig.uva.cpu_offload_gb instead.
-    """
-    cpu_offload_params: set[str] = Field(default_factory=set)
-    """The set of parameter name segments to target for CPU offloading.
-
-    DEPRECATED: This field is deprecated and will be removed in v0.16.
-    Please use OffloadConfig.uva.cpu_offload_params instead.
-    """
    calculate_kv_scales: bool = False
    """This enables dynamic calculation of `k_scale` and `v_scale` when
    kv_cache_dtype is fp8. If `False`, the scales will be loaded from the model
--- a/vllm/config/compilation.py
+++ b/vllm/config/compilation.py
@@ -381,13 +381,6 @@ class CompilationConfig:
        certain small batchsizes, where inductor is good at optimizing.
    """

-    # Top-level Compilation control
-    level: int = Field(default=None)
-    """
-    Level is deprecated and will be removed in the next release,
-    either 0.12.0 or 0.11.2 whichever is soonest.
-    Please use mode. Currently all levels are mapped to mode.
-    """
    # Top-level Compilation control
    mode: CompilationMode = Field(default=None)
    """The compilation approach used for torch.compile-based compilation of the
@@ -801,17 +794,6 @@ class CompilationConfig:
        return handler(value)

    def __post_init__(self) -> None:
-        if self.level is not None:
-            logger.warning(
-                "Level is deprecated and will be removed in the next release,"
-                "either 0.12.0 or 0.11.2 whichever is soonest."
-                "Use mode instead."
-                "If both level and mode are given,"
-                "only mode will be used."
-            )
-            if self.mode is None:
-                self.mode = self.level
-
        count_none = self.custom_ops.count("none")
        count_all = self.custom_ops.count("all")
        assert count_none + count_all <= 1, "Can only specify 'none' or 'all'"