[Core] Support inplace model weights loading (#18745)

Signed-off-by: 22quinn <33176974+22quinn@users.noreply.github.com>
This commit is contained in:
22quinn
2025-06-02 02:38:50 -07:00
committed by GitHub
parent b9f61e1387
commit 9760fd8f6a
13 changed files with 240 additions and 288 deletions

View File

@@ -4,7 +4,6 @@ import gc
import os
import pathlib
import subprocess
from unittest.mock import MagicMock, patch
import pytest
import torch
@@ -16,7 +15,6 @@ from vllm.engine.arg_utils import EngineArgs
from vllm.model_executor.model_loader.tensorizer import (TensorizerConfig,
TensorSerializer,
is_vllm_tensorized,
load_with_tensorizer,
open_stream,
tensorize_vllm_model)
# yapf: enable
@@ -61,21 +59,6 @@ def write_keyfile(keyfile_path: str):
f.write(encryption_params.key)
@patch('vllm.model_executor.model_loader.tensorizer.TensorizerAgent')
def test_load_with_tensorizer(mock_agent, tensorizer_config):
mock_linear_method = MagicMock()
mock_agent_instance = mock_agent.return_value
mock_agent_instance.deserialize.return_value = MagicMock()
result = load_with_tensorizer(tensorizer_config,
quant_method=mock_linear_method)
mock_agent.assert_called_once_with(tensorizer_config,
quant_method=mock_linear_method)
mock_agent_instance.deserialize.assert_called_once()
assert result == mock_agent_instance.deserialize.return_value
@pytest.mark.skipif(not is_curl_installed(), reason="cURL is not installed")
def test_can_deserialize_s3(vllm_runner):
model_ref = "EleutherAI/pythia-1.4b"