adapt voxtral (#31095)

Signed-off-by: Patrick von Platen <patrick.v.platen@gmail.com>
This commit is contained in:
Patrick von Platen
2025-12-23 14:31:55 +01:00
committed by GitHub
parent b10d47e0e0
commit 3faa8bee57
12 changed files with 739 additions and 98 deletions

View File

@@ -835,6 +835,15 @@ def subclass_attention_backend(
)
def subclass_attention_backend_with_overrides(
name_prefix: str,
attention_backend_cls: type[AttentionBackend],
overrides: dict[str, Any],
) -> type[AttentionBackend]:
name: str = name_prefix + attention_backend_cls.__name__ # type: ignore
return type(name, (attention_backend_cls,), overrides)
def split_decodes_prefills_and_extends(
common_attn_metadata: CommonAttentionMetadata,
decode_threshold: int = 1,