[V1][Spec Decoding] Use model_loader.get_model() to load models (#18273)

Signed-off-by: Mark McLoughlin <markmc@redhat.com>
This commit is contained in:
Mark McLoughlin
2025-05-23 03:05:44 +01:00
committed by GitHub
parent 04eb88dc80
commit c6b636f9fb
16 changed files with 59 additions and 135 deletions

View File

@@ -569,10 +569,9 @@ class BitsAndBytesModelLoader(BaseModelLoader):
def download_model(self, model_config: ModelConfig) -> None:
self._prepare_weights(model_config.model, model_config.revision)
def load_model(self, vllm_config: VllmConfig) -> nn.Module:
def load_model(self, vllm_config: VllmConfig,
model_config: ModelConfig) -> nn.Module:
device_config = vllm_config.device_config
model_config = vllm_config.model_config
with set_default_torch_dtype(model_config.dtype):
with torch.device(device_config.device):