[V1][Spec Decoding] Use model_loader.get_model() to load models (#18273)

Signed-off-by: Mark McLoughlin <markmc@redhat.com>
This commit is contained in:
Mark McLoughlin
2025-05-23 03:05:44 +01:00
committed by GitHub
parent 04eb88dc80
commit c6b636f9fb
16 changed files with 59 additions and 135 deletions

View File

@@ -51,10 +51,7 @@ class Medusa(nn.Module):
needs to have truncated_vocab_size (=k) as an attribute."""
def __init__(self, *, vllm_config: VllmConfig, prefix: str = "") -> None:
if hasattr(vllm_config, 'draft_model_config'):
config = vllm_config.draft_model_config.hf_config
else:
config = vllm_config.model_config.hf_config
config = vllm_config.speculative_config.draft_model_config.hf_config
super().__init__()
self.config = config
self.blocks = nn.ModuleList([