[Model] vLLM v1 supports Medusa (#17956)
Signed-off-by: lisiqi23 <lisiqi23@xiaomi.com> Signed-off-by: skylee-01 <497627264@qq.com> Co-authored-by: lisiqi23 <lisiqi23@xiaomi.com>
This commit is contained in:
@@ -1324,19 +1324,22 @@ class EngineArgs:
|
||||
# Only Ngram speculative decoding so far.
|
||||
is_ngram_enabled = False
|
||||
is_eagle_enabled = False
|
||||
is_medusa_enabled = False
|
||||
if self.speculative_config is not None:
|
||||
# This is supported but experimental (handled below).
|
||||
speculative_method = self.speculative_config.get("method")
|
||||
if speculative_method:
|
||||
if speculative_method in ("ngram", "[ngram]"):
|
||||
is_ngram_enabled = True
|
||||
elif speculative_method == "medusa":
|
||||
is_medusa_enabled = True
|
||||
elif speculative_method in ("eagle", "eagle3"):
|
||||
is_eagle_enabled = True
|
||||
else:
|
||||
speculative_model = self.speculative_config.get("model")
|
||||
if speculative_model in ("ngram", "[ngram]"):
|
||||
is_ngram_enabled = True
|
||||
if not (is_ngram_enabled or is_eagle_enabled):
|
||||
if not (is_ngram_enabled or is_eagle_enabled or is_medusa_enabled):
|
||||
# Other speculative decoding methods are not supported yet.
|
||||
_raise_or_fallback(feature_name="Speculative Decoding",
|
||||
recommend_to_remove=False)
|
||||
|
||||
Reference in New Issue
Block a user