[V1] Support LLM.apply_model (#18465)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
Cyrus Leung
2025-09-20 15:14:35 +08:00
committed by GitHub
parent be874c0201
commit 3d9a1d2de5
17 changed files with 194 additions and 169 deletions

View File

@@ -522,9 +522,14 @@ class LLM:
"""
Run a function directly on the model inside each worker,
returning the result for each of them.
!!! warning
To reduce the overhead of data transfer, avoid returning large
arrays or tensors from this method. If you must return them,
make sure you move them to CPU first to avoid taking up additional
VRAM!
"""
executor = self.llm_engine.model_executor
return executor.apply_model(func)
return self.llm_engine.apply_model(func)
def _get_beam_search_lora_requests(
self,