[Misc] DeepGEMM : Avoid JIT generation in the hot-path (#22215)

Signed-off-by: Varun Sundar Rabindranath <vsundarr@redhat.com>
Co-authored-by: Varun Sundar Rabindranath <vsundarr@redhat.com>
This commit is contained in:
Varun Sundar Rabindranath
2025-08-08 19:09:59 -04:00
committed by GitHub
parent cd9b9de1fb
commit f703b923f3
5 changed files with 274 additions and 37 deletions

View File

@@ -21,6 +21,7 @@ from vllm.distributed.parallel_state import get_pp_group, get_tp_group
from vllm.logger import init_logger
from vllm.lora.request import LoRARequest
from vllm.model_executor import set_random_seed
from vllm.model_executor.warmup.kernel_warmup import kernel_warmup
from vllm.platforms import current_platform
from vllm.sequence import IntermediateTensors
from vllm.tasks import SupportedTask
@@ -338,6 +339,10 @@ class Worker(WorkerBase):
self.model_runner._dummy_sampler_run(
hidden_states=last_hidden_states)
# Warmup kernels used during model execution
kernel_warmup(self.get_model(),
max_tokens=self.scheduler_config.max_num_batched_tokens)
# Reset the seed to ensure that the random state is not affected by
# the model initialization and profiling.
set_random_seed(self.model_config.seed)