[Core] Refactor model loading code (#4097)

2024-04-16 11:34:39 -07:00
parent 05434764cd
commit 69e1d2fb69
67 changed files with 1054 additions and 963 deletions
--- a/tests/lora/test_worker.py
+++ b/tests/lora/test_worker.py
@@ -3,8 +3,8 @@ import random
 import tempfile
 from unittest.mock import patch

-from vllm.config import (CacheConfig, DeviceConfig, LoRAConfig, ModelConfig,
-                         ParallelConfig, SchedulerConfig)
+from vllm.config import (CacheConfig, DeviceConfig, LoadConfig, LoRAConfig,
+                         ModelConfig, ParallelConfig, SchedulerConfig)
 from vllm.lora.models import LoRAMapping
 from vllm.lora.request import LoRARequest
 from vllm.worker.worker import Worker
@@ -18,12 +18,14 @@ def test_worker_apply_lora(sql_lora_files):
            "meta-llama/Llama-2-7b-hf",
            tokenizer_mode="auto",
            trust_remote_code=False,
-            download_dir=None,
-            load_format="dummy",
            seed=0,
            dtype="float16",
            revision=None,
        ),
+        load_config=LoadConfig(
+            download_dir=None,
+            load_format="dummy",
+        ),
        parallel_config=ParallelConfig(1, 1, False),
        scheduler_config=SchedulerConfig(32, 32, 32),
        device_config=DeviceConfig("cuda"),