[Misc] Remove deprecated items that are due for removal (#36006)
Signed-off-by: Martin Hickey <martin.hickey@ie.ibm.com>
This commit is contained in:
@@ -92,24 +92,6 @@ class CacheConfig:
|
||||
benefits before turning this on.\n
|
||||
- "xxhash_cbor" combines canonical CBOR serialization with xxHash for
|
||||
reproducible hashing. Requires the optional ``xxhash`` package."""
|
||||
cpu_offload_gb: float = Field(default=0, ge=0)
|
||||
"""The space in GiB to offload to CPU, per GPU. Default is 0, which means
|
||||
no offloading. Intuitively, this argument can be seen as a virtual way to
|
||||
increase the GPU memory size. For example, if you have one 24 GB GPU and
|
||||
set this to 10, virtually you can think of it as a 34 GB GPU. Then you can
|
||||
load a 13B model with BF16 weight, which requires at least 26GB GPU memory.
|
||||
Note that this requires fast CPU-GPU interconnect, as part of the model is
|
||||
loaded from CPU memory to GPU memory on the fly in each model forward pass.
|
||||
|
||||
DEPRECATED: This field is deprecated and will be removed in v0.16.
|
||||
Please use OffloadConfig.uva.cpu_offload_gb instead.
|
||||
"""
|
||||
cpu_offload_params: set[str] = Field(default_factory=set)
|
||||
"""The set of parameter name segments to target for CPU offloading.
|
||||
|
||||
DEPRECATED: This field is deprecated and will be removed in v0.16.
|
||||
Please use OffloadConfig.uva.cpu_offload_params instead.
|
||||
"""
|
||||
calculate_kv_scales: bool = False
|
||||
"""This enables dynamic calculation of `k_scale` and `v_scale` when
|
||||
kv_cache_dtype is fp8. If `False`, the scales will be loaded from the model
|
||||
|
||||
@@ -381,13 +381,6 @@ class CompilationConfig:
|
||||
certain small batchsizes, where inductor is good at optimizing.
|
||||
"""
|
||||
|
||||
# Top-level Compilation control
|
||||
level: int = Field(default=None)
|
||||
"""
|
||||
Level is deprecated and will be removed in the next release,
|
||||
either 0.12.0 or 0.11.2 whichever is soonest.
|
||||
Please use mode. Currently all levels are mapped to mode.
|
||||
"""
|
||||
# Top-level Compilation control
|
||||
mode: CompilationMode = Field(default=None)
|
||||
"""The compilation approach used for torch.compile-based compilation of the
|
||||
@@ -801,17 +794,6 @@ class CompilationConfig:
|
||||
return handler(value)
|
||||
|
||||
def __post_init__(self) -> None:
|
||||
if self.level is not None:
|
||||
logger.warning(
|
||||
"Level is deprecated and will be removed in the next release,"
|
||||
"either 0.12.0 or 0.11.2 whichever is soonest."
|
||||
"Use mode instead."
|
||||
"If both level and mode are given,"
|
||||
"only mode will be used."
|
||||
)
|
||||
if self.mode is None:
|
||||
self.mode = self.level
|
||||
|
||||
count_none = self.custom_ops.count("none")
|
||||
count_all = self.custom_ops.count("all")
|
||||
assert count_none + count_all <= 1, "Can only specify 'none' or 'all'"
|
||||
|
||||
@@ -1074,21 +1074,6 @@ class BaseMultiModalProcessor(ABC, Generic[_I]):
|
||||
mm_items.get_all_counts(),
|
||||
)
|
||||
|
||||
for modality, prompt_updates in mm_prompt_updates.items():
|
||||
for item_idx, item_prompt_updates in enumerate(prompt_updates):
|
||||
if len(item_prompt_updates) > 1:
|
||||
logger.warning_once(
|
||||
"Detected %d prompt updates for `mm_items[%r][%s]`. "
|
||||
"Multiple prompt updates per item is now "
|
||||
"deprecated and may be removed in v0.13. "
|
||||
"Instead, please specify dynamic update targets "
|
||||
"in the same prompt update definition by passing "
|
||||
"a function to `PromptUpdate.target`.",
|
||||
len(prompt_updates),
|
||||
modality,
|
||||
item_idx,
|
||||
)
|
||||
|
||||
return mm_prompt_updates
|
||||
|
||||
def _find_mm_placeholders(
|
||||
|
||||
Reference in New Issue
Block a user