[V1][core] Implement pipeline parallel on Ray (#12996)
This commit is contained in:
@@ -35,7 +35,7 @@ try:
|
||||
|
||||
class RayWorkerWrapper(WorkerWrapperBase):
|
||||
"""Ray wrapper for vllm.worker.Worker, allowing Worker to be
|
||||
lazliy initialized after Ray sets CUDA_VISIBLE_DEVICES."""
|
||||
lazily initialized after Ray sets CUDA_VISIBLE_DEVICES."""
|
||||
|
||||
def __init__(self, *args, **kwargs) -> None:
|
||||
super().__init__(*args, **kwargs)
|
||||
@@ -118,7 +118,14 @@ try:
|
||||
) -> "ModelRunnerOutput":
|
||||
self.setup_device_if_necessary()
|
||||
assert self.worker is not None, "Worker is not initialized"
|
||||
output = self.worker.model_runner.execute_model(scheduler_output)
|
||||
if isinstance(scheduler_output, tuple):
|
||||
scheduler_output, intermediate_tensors = scheduler_output
|
||||
else:
|
||||
scheduler_output, intermediate_tensors = scheduler_output, None
|
||||
output = self.worker.model_runner.execute_model(
|
||||
scheduler_output, intermediate_tensors)
|
||||
if isinstance(output, IntermediateTensors):
|
||||
output = scheduler_output, output
|
||||
return output
|
||||
|
||||
def override_env_vars(self, vars: Dict[str, str]):
|
||||
|
||||
Reference in New Issue
Block a user