[1/N][Cleanup] Standardize on use of is_quantized_kv_cache (#38659)

Signed-off-by: Matthew Bonanni <mbonanni@redhat.com>
This commit is contained in:
Matthew Bonanni
2026-04-01 00:08:01 -04:00
committed by GitHub
parent 7b01d97a22
commit 116f4be405
28 changed files with 90 additions and 75 deletions

View File

@@ -46,7 +46,7 @@ from vllm.tasks import SupportedTask
from vllm.tracing import instrument
from vllm.utils.mem_constants import GiB_bytes
from vllm.utils.mem_utils import MemorySnapshot, format_gib, memory_profiling
from vllm.utils.torch_utils import set_random_seed
from vllm.utils.torch_utils import is_quantized_kv_cache, set_random_seed
from vllm.v1.core.sched.output import GrammarOutput, SchedulerOutput
from vllm.v1.kv_cache_interface import KVCacheConfig, KVCacheSpec
from vllm.v1.outputs import (
@@ -197,7 +197,7 @@ class Worker(WorkerBase):
# especially the FP8 scaling factor.
if (
(tags is None or "kv_cache" in tags)
and self.cache_config.cache_dtype.startswith("fp8")
and is_quantized_kv_cache(self.cache_config.cache_dtype)
and hasattr(self.model_runner, "init_fp8_kv_scales")
):
self.model_runner.init_fp8_kv_scales()