[Frontend] [Core] feat: Add model loading using tensorizer (#3476)
This commit is contained in:
@@ -7,7 +7,7 @@ from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple
|
||||
|
||||
from vllm.config import (CacheConfig, DeviceConfig, LoRAConfig, ModelConfig,
|
||||
ParallelConfig, SchedulerConfig, SpeculativeConfig,
|
||||
VisionLanguageConfig)
|
||||
TensorizerConfig, VisionLanguageConfig)
|
||||
from vllm.engine.ray_utils import RayWorkerVllm, ray
|
||||
from vllm.executor.executor_base import ExecutorAsyncBase, ExecutorBase
|
||||
from vllm.logger import init_logger
|
||||
@@ -42,6 +42,7 @@ class RayGPUExecutor(ExecutorBase):
|
||||
lora_config: Optional[LoRAConfig],
|
||||
vision_language_config: Optional[VisionLanguageConfig],
|
||||
speculative_config: Optional[SpeculativeConfig],
|
||||
tensorizer_config: Optional[TensorizerConfig],
|
||||
) -> None:
|
||||
self.model_config = model_config
|
||||
self.cache_config = cache_config
|
||||
@@ -50,6 +51,7 @@ class RayGPUExecutor(ExecutorBase):
|
||||
self.scheduler_config = scheduler_config
|
||||
self.device_config = device_config
|
||||
self.vision_language_config = vision_language_config
|
||||
self.tensorizer_config = tensorizer_config
|
||||
assert (not speculative_config
|
||||
), "Speculative decoding not yet supported for RayGPU backend."
|
||||
|
||||
@@ -171,6 +173,7 @@ class RayGPUExecutor(ExecutorBase):
|
||||
distributed_init_method=distributed_init_method,
|
||||
lora_config=lora_config,
|
||||
vision_language_config=vision_language_config,
|
||||
tensorizer_config=self.tensorizer_config,
|
||||
))
|
||||
|
||||
# Initialize the driver worker with the Worker class.
|
||||
@@ -187,6 +190,7 @@ class RayGPUExecutor(ExecutorBase):
|
||||
distributed_init_method=distributed_init_method,
|
||||
lora_config=self.lora_config,
|
||||
vision_language_config=self.vision_language_config,
|
||||
tensorizer_config=self.tensorizer_config,
|
||||
is_driver_worker=True,
|
||||
)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user