Elastic Expert Parallel Initial Support (#20775)

Signed-off-by: Rui Qiao <ruisearch42@gmail.com>
This commit is contained in:
Rui Qiao
2025-07-18 17:46:09 -07:00
committed by GitHub
parent 5782581acf
commit 217937221b
24 changed files with 1659 additions and 68 deletions

View File

@@ -324,3 +324,9 @@ class EngineClient(ABC):
async def add_lora(self, lora_request: LoRARequest) -> None:
"""Load a new LoRA adapter into the engine for future requests."""
...
async def scale_elastic_ep(self,
new_data_parallel_size: int,
drain_timeout: int = 300) -> None:
"""Scale the engine"""
raise NotImplementedError