[Bugfix] Fix DP MTP Dummy Run (#35243)
Signed-off-by: Benjamin Chislett <bchislett@nvidia.com>
This commit is contained in:
committed by
GitHub
parent
c9e5096256
commit
f63ed7b5ac
@@ -903,7 +903,8 @@ class Worker(WorkerBase):
|
||||
self.profiler.stop()
|
||||
|
||||
def execute_dummy_batch(self) -> None:
|
||||
self.model_runner._dummy_run(1, uniform_decode=True)
|
||||
num_tokens = getattr(self.model_runner, "uniform_decode_query_len", 1)
|
||||
self.model_runner._dummy_run(num_tokens, uniform_decode=True)
|
||||
|
||||
def add_lora(self, lora_request: LoRARequest) -> bool:
|
||||
return self.model_runner.add_lora(lora_request)
|
||||
|
||||
Reference in New Issue
Block a user