[vlm] Remove vision language config. (#6089)
Signed-off-by: Xiaowei Jiang <xwjiang2010@gmail.com> Co-authored-by: Roger Wang <ywang@roblox.com>
This commit is contained in:
@@ -46,7 +46,7 @@ class CPUExecutor(ExecutorBase):
|
||||
rank=0,
|
||||
distributed_init_method=distributed_init_method,
|
||||
lora_config=self.lora_config,
|
||||
vision_language_config=self.vision_language_config,
|
||||
multimodal_config=self.multimodal_config,
|
||||
kv_cache_dtype=self.cache_config.cache_dtype,
|
||||
is_driver_worker=True,
|
||||
)
|
||||
|
||||
@@ -3,8 +3,8 @@ from abc import ABC, abstractmethod
|
||||
from typing import List, Optional, Set, Tuple
|
||||
|
||||
from vllm.config import (CacheConfig, DeviceConfig, LoadConfig, LoRAConfig,
|
||||
ModelConfig, ParallelConfig, SchedulerConfig,
|
||||
SpeculativeConfig, VisionLanguageConfig)
|
||||
ModelConfig, MultiModalConfig, ParallelConfig,
|
||||
SchedulerConfig, SpeculativeConfig)
|
||||
from vllm.lora.request import LoRARequest
|
||||
from vllm.sequence import ExecuteModelRequest, SamplerOutput
|
||||
|
||||
@@ -26,7 +26,7 @@ class ExecutorBase(ABC):
|
||||
device_config: DeviceConfig,
|
||||
load_config: LoadConfig,
|
||||
lora_config: Optional[LoRAConfig],
|
||||
vision_language_config: Optional[VisionLanguageConfig],
|
||||
multimodal_config: Optional[MultiModalConfig],
|
||||
speculative_config: Optional[SpeculativeConfig],
|
||||
) -> None:
|
||||
self.model_config = model_config
|
||||
@@ -36,7 +36,7 @@ class ExecutorBase(ABC):
|
||||
self.parallel_config = parallel_config
|
||||
self.scheduler_config = scheduler_config
|
||||
self.device_config = device_config
|
||||
self.vision_language_config = vision_language_config
|
||||
self.multimodal_config = multimodal_config
|
||||
self.speculative_config = speculative_config
|
||||
|
||||
self._init_executor()
|
||||
@@ -120,7 +120,7 @@ class ExecutorAsyncBase(ExecutorBase):
|
||||
device_config: DeviceConfig,
|
||||
load_config: LoadConfig,
|
||||
lora_config: Optional[LoRAConfig],
|
||||
vision_language_config: Optional[VisionLanguageConfig],
|
||||
multimodal_config: Optional[MultiModalConfig],
|
||||
speculative_config: Optional[SpeculativeConfig],
|
||||
) -> None:
|
||||
# This locks each pipeline parallel stage so multiple virtual engines
|
||||
@@ -132,8 +132,7 @@ class ExecutorAsyncBase(ExecutorBase):
|
||||
|
||||
super().__init__(model_config, cache_config, parallel_config,
|
||||
scheduler_config, device_config, load_config,
|
||||
lora_config, vision_language_config,
|
||||
speculative_config)
|
||||
lora_config, multimodal_config, speculative_config)
|
||||
|
||||
@abstractmethod
|
||||
async def execute_model_async(
|
||||
|
||||
@@ -43,7 +43,7 @@ class GPUExecutor(ExecutorBase):
|
||||
rank=rank,
|
||||
distributed_init_method=distributed_init_method,
|
||||
lora_config=self.lora_config,
|
||||
vision_language_config=self.vision_language_config,
|
||||
multimodal_config=self.multimodal_config,
|
||||
speculative_config=self.speculative_config,
|
||||
is_driver_worker=(not self.parallel_config)
|
||||
or (rank % self.parallel_config.tensor_parallel_size == 0),
|
||||
|
||||
@@ -47,7 +47,7 @@ class OpenVINOExecutor(ExecutorBase):
|
||||
rank=0,
|
||||
distributed_init_method=distributed_init_method,
|
||||
lora_config=self.lora_config,
|
||||
vision_language_config=self.vision_language_config,
|
||||
multimodal_config=self.multimodal_config,
|
||||
kv_cache_dtype=self.cache_config.cache_dtype,
|
||||
is_driver_worker=True,
|
||||
)
|
||||
|
||||
@@ -7,8 +7,8 @@ from typing import (TYPE_CHECKING, Any, Awaitable, Dict, List, Optional, Set,
|
||||
Tuple, Union)
|
||||
|
||||
from vllm.config import (CacheConfig, DeviceConfig, LoadConfig, LoRAConfig,
|
||||
ModelConfig, ParallelConfig, SchedulerConfig,
|
||||
SpeculativeConfig, VisionLanguageConfig)
|
||||
ModelConfig, MultiModalConfig, ParallelConfig,
|
||||
SchedulerConfig, SpeculativeConfig)
|
||||
from vllm.executor.distributed_gpu_executor import ( # yapf: disable
|
||||
DistributedGPUExecutor, DistributedGPUExecutorAsync)
|
||||
from vllm.executor.ray_utils import RayWorkerWrapper, ray
|
||||
@@ -43,7 +43,7 @@ class RayXPUExecutor(DistributedGPUExecutor):
|
||||
device_config: DeviceConfig,
|
||||
load_config: LoadConfig,
|
||||
lora_config: Optional[LoRAConfig],
|
||||
vision_language_config: Optional[VisionLanguageConfig],
|
||||
multimodal_config: Optional[MultiModalConfig],
|
||||
speculative_config: Optional[SpeculativeConfig],
|
||||
) -> None:
|
||||
assert device_config.device_type == "xpu"
|
||||
@@ -57,7 +57,7 @@ class RayXPUExecutor(DistributedGPUExecutor):
|
||||
self.parallel_config = parallel_config
|
||||
self.scheduler_config = scheduler_config
|
||||
self.device_config = device_config
|
||||
self.vision_language_config = vision_language_config
|
||||
self.multimodal_config = multimodal_config
|
||||
|
||||
placement_group = self.parallel_config.placement_group
|
||||
|
||||
@@ -199,7 +199,7 @@ class RayXPUExecutor(DistributedGPUExecutor):
|
||||
rank=rank,
|
||||
distributed_init_method=distributed_init_method,
|
||||
lora_config=self.lora_config,
|
||||
vision_language_config=self.vision_language_config,
|
||||
multimodal_config=self.multimodal_config,
|
||||
is_driver_worker=rank == 0,
|
||||
))
|
||||
self._run_workers("init_worker", all_kwargs=init_worker_all_kwargs)
|
||||
|
||||
@@ -50,7 +50,7 @@ class TPUExecutor(ExecutorBase):
|
||||
local_rank=local_rank,
|
||||
rank=rank,
|
||||
distributed_init_method=distributed_init_method,
|
||||
vision_language_config=self.vision_language_config,
|
||||
multimodal_config=self.multimodal_config,
|
||||
is_driver_worker=rank == 0,
|
||||
)
|
||||
|
||||
|
||||
@@ -3,8 +3,8 @@ from typing import List, Optional
|
||||
import torch
|
||||
|
||||
from vllm.config import (CacheConfig, DeviceConfig, LoadConfig, LoRAConfig,
|
||||
ModelConfig, ParallelConfig, SchedulerConfig,
|
||||
SpeculativeConfig, VisionLanguageConfig)
|
||||
ModelConfig, MultiModalConfig, ParallelConfig,
|
||||
SchedulerConfig, SpeculativeConfig)
|
||||
from vllm.executor.executor_base import ExecutorAsyncBase
|
||||
from vllm.executor.gpu_executor import GPUExecutor
|
||||
from vllm.logger import init_logger
|
||||
@@ -26,7 +26,7 @@ class XPUExecutor(GPUExecutor):
|
||||
device_config: DeviceConfig,
|
||||
load_config: LoadConfig,
|
||||
lora_config: Optional[LoRAConfig],
|
||||
vision_language_config: Optional[VisionLanguageConfig],
|
||||
multimodal_config: Optional[MultiModalConfig],
|
||||
speculative_config: Optional[SpeculativeConfig],
|
||||
) -> None:
|
||||
assert device_config.device_type == "xpu"
|
||||
@@ -42,7 +42,7 @@ class XPUExecutor(GPUExecutor):
|
||||
self.parallel_config = parallel_config
|
||||
self.scheduler_config = scheduler_config
|
||||
self.device_config = device_config
|
||||
self.vision_language_config = vision_language_config
|
||||
self.multimodal_config = multimodal_config
|
||||
self.speculative_config = None
|
||||
|
||||
# Instantiate the worker and load the model to GPU.
|
||||
|
||||
Reference in New Issue
Block a user