[UX] Add vLLM model inspection view (#29450)

Signed-off-by: mgoin <mgoin64@gmail.com>
This commit is contained in:
Michael Goin
2026-01-09 12:12:35 -05:00
committed by GitHub
parent 08d954f036
commit d5ec6c056f
6 changed files with 180 additions and 1 deletions

View File

@@ -250,6 +250,7 @@ if TYPE_CHECKING:
VLLM_SHARED_EXPERTS_STREAM_TOKEN_THRESHOLD: int = 256
VLLM_COMPILE_CACHE_SAVE_FORMAT: Literal["binary", "unpacked"] = "binary"
VLLM_USE_V2_MODEL_RUNNER: bool = False
VLLM_LOG_MODEL_INSPECTION: bool = False
VLLM_DEBUG_MFU_METRICS: bool = False
@@ -1595,6 +1596,12 @@ environment_variables: dict[str, Callable[[], Any]] = {
"VLLM_USE_V2_MODEL_RUNNER": lambda: bool(
int(os.getenv("VLLM_USE_V2_MODEL_RUNNER", "0"))
),
# Log model inspection after loading.
# If enabled, logs a transformers-style hierarchical view of the model
# with quantization methods and attention backends.
"VLLM_LOG_MODEL_INSPECTION": lambda: bool(
int(os.getenv("VLLM_LOG_MODEL_INSPECTION", "0"))
),
# Debug logging for --enable-mfu-metrics
"VLLM_DEBUG_MFU_METRICS": lambda: bool(
int(os.getenv("VLLM_DEBUG_MFU_METRICS", "0"))