[Core] Fix engine-use-ray broken (#4105)
This commit is contained in:
@@ -333,8 +333,7 @@ class AsyncLLMEngine:
|
||||
if engine_config.device_config.device_type == "neuron":
|
||||
raise NotImplementedError("Neuron is not supported for "
|
||||
"async engine yet.")
|
||||
elif (engine_config.parallel_config.worker_use_ray
|
||||
or engine_args.engine_use_ray):
|
||||
elif engine_config.parallel_config.worker_use_ray:
|
||||
initialize_ray_cluster(engine_config.parallel_config)
|
||||
from vllm.executor.ray_gpu_executor import RayGPUExecutorAsync
|
||||
executor_class = RayGPUExecutorAsync
|
||||
@@ -410,8 +409,8 @@ class AsyncLLMEngine:
|
||||
else:
|
||||
# FIXME(woosuk): This is a bit hacky. Be careful when changing the
|
||||
# order of the arguments.
|
||||
cache_config = args[1]
|
||||
parallel_config = args[2]
|
||||
cache_config = kwargs["cache_config"]
|
||||
parallel_config = kwargs["parallel_config"]
|
||||
if parallel_config.tensor_parallel_size == 1:
|
||||
num_gpus = cache_config.gpu_memory_utilization
|
||||
else:
|
||||
|
||||
Reference in New Issue
Block a user