Remove hard-dependencies of Speculative decode to CUDA workers (#10587)

Signed-off-by: Chendi Xue <chendi.xue@intel.com>
This commit is contained in:
Chendi.Xue
2024-11-26 19:57:11 -06:00
committed by GitHub
parent 2f0a0a17a4
commit 0a71900bc9
19 changed files with 219 additions and 77 deletions

View File

@@ -990,6 +990,7 @@ class ParallelConfig:
# the full name of the worker class to use. If "auto", the worker class
# will be determined based on the platform.
worker_cls: str = "auto"
sd_worker_cls: str = "auto"
world_size: int = field(init=False)