[Frontend] [Core] Add Tensorizer support for V1, LoRA adapter serialization and deserialization (#17926)

Signed-off-by: Sanger Steel <sangersteel@gmail.com>
This commit is contained in:
Sanger Steel
2025-05-22 21:44:18 -04:00
committed by GitHub
parent c91fe7b1b9
commit c32e249a23
16 changed files with 606 additions and 197 deletions

View File

@@ -31,6 +31,7 @@ from vllm.v1.worker.worker_base import WorkerBase
logger = init_logger(__name__)
if TYPE_CHECKING:
from vllm.model_executor.model_loader.tensorizer import TensorizerConfig
from vllm.v1.core.sched.output import SchedulerOutput
@@ -326,6 +327,13 @@ class Worker(WorkerBase):
max_size=max_size,
)
def save_tensorized_model(
self,
tensorizer_config: "TensorizerConfig",
) -> None:
self.model_runner.save_tensorized_model(
tensorizer_config=tensorizer_config, )
def init_worker_distributed_environment(
vllm_config: VllmConfig,