[Model] vLLM v1 supports Medusa (#17956)

Signed-off-by: lisiqi23 <lisiqi23@xiaomi.com>
Signed-off-by: skylee-01 <497627264@qq.com>
Co-authored-by: lisiqi23 <lisiqi23@xiaomi.com>
This commit is contained in:
Sky Lee
2025-05-16 12:05:31 +08:00
committed by GitHub
parent ee659e3b60
commit f4937a51c1
4 changed files with 108 additions and 2 deletions

View File

@@ -1324,19 +1324,22 @@ class EngineArgs:
# Only Ngram speculative decoding so far.
is_ngram_enabled = False
is_eagle_enabled = False
is_medusa_enabled = False
if self.speculative_config is not None:
# This is supported but experimental (handled below).
speculative_method = self.speculative_config.get("method")
if speculative_method:
if speculative_method in ("ngram", "[ngram]"):
is_ngram_enabled = True
elif speculative_method == "medusa":
is_medusa_enabled = True
elif speculative_method in ("eagle", "eagle3"):
is_eagle_enabled = True
else:
speculative_model = self.speculative_config.get("model")
if speculative_model in ("ngram", "[ngram]"):
is_ngram_enabled = True
if not (is_ngram_enabled or is_eagle_enabled):
if not (is_ngram_enabled or is_eagle_enabled or is_medusa_enabled):
# Other speculative decoding methods are not supported yet.
_raise_or_fallback(feature_name="Speculative Decoding",
recommend_to_remove=False)