[1/N][Cleanup] Standardize on use of is_quantized_kv_cache (#38659)
Signed-off-by: Matthew Bonanni <mbonanni@redhat.com>
This commit is contained in:
@@ -46,7 +46,7 @@ from vllm.tasks import SupportedTask
|
||||
from vllm.tracing import instrument
|
||||
from vllm.utils.mem_constants import GiB_bytes
|
||||
from vllm.utils.mem_utils import MemorySnapshot, format_gib, memory_profiling
|
||||
from vllm.utils.torch_utils import set_random_seed
|
||||
from vllm.utils.torch_utils import is_quantized_kv_cache, set_random_seed
|
||||
from vllm.v1.core.sched.output import GrammarOutput, SchedulerOutput
|
||||
from vllm.v1.kv_cache_interface import KVCacheConfig, KVCacheSpec
|
||||
from vllm.v1.outputs import (
|
||||
@@ -197,7 +197,7 @@ class Worker(WorkerBase):
|
||||
# especially the FP8 scaling factor.
|
||||
if (
|
||||
(tags is None or "kv_cache" in tags)
|
||||
and self.cache_config.cache_dtype.startswith("fp8")
|
||||
and is_quantized_kv_cache(self.cache_config.cache_dtype)
|
||||
and hasattr(self.model_runner, "init_fp8_kv_scales")
|
||||
):
|
||||
self.model_runner.init_fp8_kv_scales()
|
||||
|
||||
Reference in New Issue
Block a user