[core][executor] simplify instance id (#10976)

Signed-off-by: youkaichao <youkaichao@gmail.com>
2024-12-07 09:33:45 -08:00
parent 78029b34ed
commit 1b62745b1d
10 changed files with 22 additions and 55 deletions
--- a/vllm/executor/multiproc_gpu_executor.py
+++ b/vllm/executor/multiproc_gpu_executor.py
@@ -16,7 +16,7 @@ from vllm.sequence import ExecuteModelRequest
 from vllm.triton_utils.importing import HAS_TRITON
 from vllm.utils import (_run_task_with_lock, cuda_device_count_stateless,
                        cuda_is_initialized, get_distributed_init_method,
-                        get_open_port, get_vllm_instance_id, make_async,
+                        get_open_port, make_async,
                        update_environment_variables)

 if HAS_TRITON:
@@ -37,9 +37,6 @@ class MultiprocessingGPUExecutor(DistributedGPUExecutor):
        world_size = self.parallel_config.world_size
        tensor_parallel_size = self.parallel_config.tensor_parallel_size

-        # Ensure that VLLM_INSTANCE_ID is set, to be inherited by workers
-        os.environ["VLLM_INSTANCE_ID"] = get_vllm_instance_id()
-
        # Disable torch async compiling which won't work with daemonic processes
        os.environ["TORCHINDUCTOR_COMPILE_THREADS"] = "1"