[Frontend] [Core] feat: Add model loading using tensorizer (#3476)

This commit is contained in:
Sanger Steel
2024-04-13 20:13:01 -04:00
committed by GitHub
parent 989ae2538d
commit 711a000255
20 changed files with 1351 additions and 51 deletions

View File

@@ -7,7 +7,7 @@ from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple
from vllm.config import (CacheConfig, DeviceConfig, LoRAConfig, ModelConfig,
ParallelConfig, SchedulerConfig, SpeculativeConfig,
VisionLanguageConfig)
TensorizerConfig, VisionLanguageConfig)
from vllm.engine.ray_utils import RayWorkerVllm, ray
from vllm.executor.executor_base import ExecutorAsyncBase, ExecutorBase
from vllm.logger import init_logger
@@ -42,6 +42,7 @@ class RayGPUExecutor(ExecutorBase):
lora_config: Optional[LoRAConfig],
vision_language_config: Optional[VisionLanguageConfig],
speculative_config: Optional[SpeculativeConfig],
tensorizer_config: Optional[TensorizerConfig],
) -> None:
self.model_config = model_config
self.cache_config = cache_config
@@ -50,6 +51,7 @@ class RayGPUExecutor(ExecutorBase):
self.scheduler_config = scheduler_config
self.device_config = device_config
self.vision_language_config = vision_language_config
self.tensorizer_config = tensorizer_config
assert (not speculative_config
), "Speculative decoding not yet supported for RayGPU backend."
@@ -171,6 +173,7 @@ class RayGPUExecutor(ExecutorBase):
distributed_init_method=distributed_init_method,
lora_config=lora_config,
vision_language_config=vision_language_config,
tensorizer_config=self.tensorizer_config,
))
# Initialize the driver worker with the Worker class.
@@ -187,6 +190,7 @@ class RayGPUExecutor(ExecutorBase):
distributed_init_method=distributed_init_method,
lora_config=self.lora_config,
vision_language_config=self.vision_language_config,
tensorizer_config=self.tensorizer_config,
is_driver_worker=True,
)