[Misc] Add --attention-backend auto option (#35738)
Signed-off-by: NickLucche <nlucches@redhat.com>
This commit is contained in:
@@ -14,7 +14,7 @@ class AttentionConfig:
|
||||
"""Configuration for attention mechanisms in vLLM."""
|
||||
|
||||
backend: AttentionBackendEnum | None = None
|
||||
"""Attention backend to use. If None, will be selected automatically."""
|
||||
"""Attention backend to use. Use "auto" or None for automatic selection."""
|
||||
|
||||
flash_attn_version: Literal[2, 3, 4] | None = None
|
||||
"""Force vllm to use a specific flash-attention version (2, 3, or 4).
|
||||
@@ -63,7 +63,13 @@ class AttentionConfig:
|
||||
@field_validator("backend", mode="before")
|
||||
@classmethod
|
||||
def validate_backend_before(cls, value: Any) -> Any:
|
||||
"""Enable parsing of the `backend` enum type from string."""
|
||||
"""Enable parsing of the `backend` enum type from string.
|
||||
|
||||
The special value "auto" is treated as None, which triggers
|
||||
automatic backend selection.
|
||||
"""
|
||||
if isinstance(value, str):
|
||||
if value.lower() == "auto":
|
||||
return None
|
||||
return AttentionBackendEnum[value.upper()]
|
||||
return value
|
||||
|
||||
Reference in New Issue
Block a user