[Misc][Docs] Raise error when flashinfer is not installed and VLLM_ATTENTION_BACKEND is set (#12513)

Signed-off-by: NickLucche <nlucches@redhat.com>
This commit is contained in:
Nicolò Lucchesi
2025-02-24 16:43:21 +01:00
committed by GitHub
parent ccc00515fd
commit 444b0f0f62
2 changed files with 19 additions and 0 deletions

View File

@@ -9,6 +9,7 @@ import sys
import warnings
from contextlib import contextmanager
from dataclasses import dataclass, field, replace
from importlib.util import find_spec
from pathlib import Path
from typing import (TYPE_CHECKING, Any, Callable, ClassVar, Counter, Dict,
Final, List, Literal, Mapping, Optional, Protocol, Set,
@@ -294,6 +295,14 @@ class ModelConfig:
self.maybe_pull_model_tokenizer_for_s3(model, tokenizer)
if (backend := envs.VLLM_ATTENTION_BACKEND
) and backend == "FLASHINFER" and find_spec("flashinfer") is None:
raise ValueError(
"VLLM_ATTENTION_BACKEND is set to FLASHINFER, but flashinfer "
"module was not found."
"See https://github.com/vllm-project/vllm/blob/main/Dockerfile"
"for instructions on how to install it.")
# The tokenizer version is consistent with the model version by default.
if tokenizer_revision is None:
self.tokenizer_revision = revision