diff --git a/vllm/config/cache.py b/vllm/config/cache.py index 39ceb3920..d3ce9c067 100644 --- a/vllm/config/cache.py +++ b/vllm/config/cache.py @@ -92,24 +92,6 @@ class CacheConfig: benefits before turning this on.\n - "xxhash_cbor" combines canonical CBOR serialization with xxHash for reproducible hashing. Requires the optional ``xxhash`` package.""" - cpu_offload_gb: float = Field(default=0, ge=0) - """The space in GiB to offload to CPU, per GPU. Default is 0, which means - no offloading. Intuitively, this argument can be seen as a virtual way to - increase the GPU memory size. For example, if you have one 24 GB GPU and - set this to 10, virtually you can think of it as a 34 GB GPU. Then you can - load a 13B model with BF16 weight, which requires at least 26GB GPU memory. - Note that this requires fast CPU-GPU interconnect, as part of the model is - loaded from CPU memory to GPU memory on the fly in each model forward pass. - - DEPRECATED: This field is deprecated and will be removed in v0.16. - Please use OffloadConfig.uva.cpu_offload_gb instead. - """ - cpu_offload_params: set[str] = Field(default_factory=set) - """The set of parameter name segments to target for CPU offloading. - - DEPRECATED: This field is deprecated and will be removed in v0.16. - Please use OffloadConfig.uva.cpu_offload_params instead. - """ calculate_kv_scales: bool = False """This enables dynamic calculation of `k_scale` and `v_scale` when kv_cache_dtype is fp8. If `False`, the scales will be loaded from the model diff --git a/vllm/config/compilation.py b/vllm/config/compilation.py index 9cc2cbb49..8f3808166 100644 --- a/vllm/config/compilation.py +++ b/vllm/config/compilation.py @@ -381,13 +381,6 @@ class CompilationConfig: certain small batchsizes, where inductor is good at optimizing. """ - # Top-level Compilation control - level: int = Field(default=None) - """ - Level is deprecated and will be removed in the next release, - either 0.12.0 or 0.11.2 whichever is soonest. - Please use mode. Currently all levels are mapped to mode. - """ # Top-level Compilation control mode: CompilationMode = Field(default=None) """The compilation approach used for torch.compile-based compilation of the @@ -801,17 +794,6 @@ class CompilationConfig: return handler(value) def __post_init__(self) -> None: - if self.level is not None: - logger.warning( - "Level is deprecated and will be removed in the next release," - "either 0.12.0 or 0.11.2 whichever is soonest." - "Use mode instead." - "If both level and mode are given," - "only mode will be used." - ) - if self.mode is None: - self.mode = self.level - count_none = self.custom_ops.count("none") count_all = self.custom_ops.count("all") assert count_none + count_all <= 1, "Can only specify 'none' or 'all'" diff --git a/vllm/multimodal/processing/processor.py b/vllm/multimodal/processing/processor.py index 84720a554..002c48c77 100644 --- a/vllm/multimodal/processing/processor.py +++ b/vllm/multimodal/processing/processor.py @@ -1074,21 +1074,6 @@ class BaseMultiModalProcessor(ABC, Generic[_I]): mm_items.get_all_counts(), ) - for modality, prompt_updates in mm_prompt_updates.items(): - for item_idx, item_prompt_updates in enumerate(prompt_updates): - if len(item_prompt_updates) > 1: - logger.warning_once( - "Detected %d prompt updates for `mm_items[%r][%s]`. " - "Multiple prompt updates per item is now " - "deprecated and may be removed in v0.13. " - "Instead, please specify dynamic update targets " - "in the same prompt update definition by passing " - "a function to `PromptUpdate.target`.", - len(prompt_updates), - modality, - item_idx, - ) - return mm_prompt_updates def _find_mm_placeholders(