[Frontend] [Core] feat: Add model loading using tensorizer (#3476)
This commit is contained in:
@@ -6,7 +6,7 @@ from transformers import PreTrainedTokenizer
|
||||
import vllm
|
||||
from vllm.config import (CacheConfig, DeviceConfig, LoRAConfig, ModelConfig,
|
||||
ParallelConfig, SchedulerConfig, SpeculativeConfig,
|
||||
VisionLanguageConfig)
|
||||
TensorizerConfig, VisionLanguageConfig)
|
||||
from vllm.core.scheduler import Scheduler, SchedulerOutputs
|
||||
from vllm.engine.arg_utils import EngineArgs
|
||||
from vllm.engine.metrics import StatLogger, Stats
|
||||
@@ -74,6 +74,7 @@ class LLMEngine:
|
||||
lora_config: Optional[LoRAConfig],
|
||||
vision_language_config: Optional[VisionLanguageConfig],
|
||||
speculative_config: Optional[SpeculativeConfig],
|
||||
tensorizer_config: Optional[TensorizerConfig],
|
||||
executor_class: Type[ExecutorBase],
|
||||
log_stats: bool,
|
||||
usage_context: UsageContext = UsageContext.ENGINE_CONTEXT,
|
||||
@@ -110,6 +111,7 @@ class LLMEngine:
|
||||
self.scheduler_config = scheduler_config
|
||||
self.device_config = device_config
|
||||
self.speculative_config = speculative_config
|
||||
self.tensorizer_config = tensorizer_config
|
||||
self.log_stats = log_stats
|
||||
|
||||
self._init_tokenizer()
|
||||
@@ -125,6 +127,7 @@ class LLMEngine:
|
||||
lora_config=lora_config,
|
||||
vision_language_config=vision_language_config,
|
||||
speculative_config=speculative_config,
|
||||
tensorizer_config=tensorizer_config,
|
||||
)
|
||||
|
||||
self._initialize_kv_caches()
|
||||
@@ -264,6 +267,9 @@ class LLMEngine:
|
||||
def _verify_args(self) -> None:
|
||||
self.model_config.verify_with_parallel_config(self.parallel_config)
|
||||
self.cache_config.verify_with_parallel_config(self.parallel_config)
|
||||
if self.tensorizer_config:
|
||||
self.tensorizer_config.verify_with_parallel_config(
|
||||
self.parallel_config)
|
||||
if self.lora_config:
|
||||
self.lora_config.verify_with_model_config(self.model_config)
|
||||
self.lora_config.verify_with_scheduler_config(
|
||||
|
||||
Reference in New Issue
Block a user