[Misc] Remove deprecated items that are due for removal (#36006)
Signed-off-by: Martin Hickey <martin.hickey@ie.ibm.com>
This commit is contained in:
@@ -92,24 +92,6 @@ class CacheConfig:
|
||||
benefits before turning this on.\n
|
||||
- "xxhash_cbor" combines canonical CBOR serialization with xxHash for
|
||||
reproducible hashing. Requires the optional ``xxhash`` package."""
|
||||
cpu_offload_gb: float = Field(default=0, ge=0)
|
||||
"""The space in GiB to offload to CPU, per GPU. Default is 0, which means
|
||||
no offloading. Intuitively, this argument can be seen as a virtual way to
|
||||
increase the GPU memory size. For example, if you have one 24 GB GPU and
|
||||
set this to 10, virtually you can think of it as a 34 GB GPU. Then you can
|
||||
load a 13B model with BF16 weight, which requires at least 26GB GPU memory.
|
||||
Note that this requires fast CPU-GPU interconnect, as part of the model is
|
||||
loaded from CPU memory to GPU memory on the fly in each model forward pass.
|
||||
|
||||
DEPRECATED: This field is deprecated and will be removed in v0.16.
|
||||
Please use OffloadConfig.uva.cpu_offload_gb instead.
|
||||
"""
|
||||
cpu_offload_params: set[str] = Field(default_factory=set)
|
||||
"""The set of parameter name segments to target for CPU offloading.
|
||||
|
||||
DEPRECATED: This field is deprecated and will be removed in v0.16.
|
||||
Please use OffloadConfig.uva.cpu_offload_params instead.
|
||||
"""
|
||||
calculate_kv_scales: bool = False
|
||||
"""This enables dynamic calculation of `k_scale` and `v_scale` when
|
||||
kv_cache_dtype is fp8. If `False`, the scales will be loaded from the model
|
||||
|
||||
@@ -381,13 +381,6 @@ class CompilationConfig:
|
||||
certain small batchsizes, where inductor is good at optimizing.
|
||||
"""
|
||||
|
||||
# Top-level Compilation control
|
||||
level: int = Field(default=None)
|
||||
"""
|
||||
Level is deprecated and will be removed in the next release,
|
||||
either 0.12.0 or 0.11.2 whichever is soonest.
|
||||
Please use mode. Currently all levels are mapped to mode.
|
||||
"""
|
||||
# Top-level Compilation control
|
||||
mode: CompilationMode = Field(default=None)
|
||||
"""The compilation approach used for torch.compile-based compilation of the
|
||||
@@ -801,17 +794,6 @@ class CompilationConfig:
|
||||
return handler(value)
|
||||
|
||||
def __post_init__(self) -> None:
|
||||
if self.level is not None:
|
||||
logger.warning(
|
||||
"Level is deprecated and will be removed in the next release,"
|
||||
"either 0.12.0 or 0.11.2 whichever is soonest."
|
||||
"Use mode instead."
|
||||
"If both level and mode are given,"
|
||||
"only mode will be used."
|
||||
)
|
||||
if self.mode is None:
|
||||
self.mode = self.level
|
||||
|
||||
count_none = self.custom_ops.count("none")
|
||||
count_all = self.custom_ops.count("all")
|
||||
assert count_none + count_all <= 1, "Can only specify 'none' or 'all'"
|
||||
|
||||
Reference in New Issue
Block a user