[UX] Add vLLM model inspection view (#29450)
Signed-off-by: mgoin <mgoin64@gmail.com>
This commit is contained in:
@@ -250,6 +250,7 @@ if TYPE_CHECKING:
|
||||
VLLM_SHARED_EXPERTS_STREAM_TOKEN_THRESHOLD: int = 256
|
||||
VLLM_COMPILE_CACHE_SAVE_FORMAT: Literal["binary", "unpacked"] = "binary"
|
||||
VLLM_USE_V2_MODEL_RUNNER: bool = False
|
||||
VLLM_LOG_MODEL_INSPECTION: bool = False
|
||||
VLLM_DEBUG_MFU_METRICS: bool = False
|
||||
|
||||
|
||||
@@ -1595,6 +1596,12 @@ environment_variables: dict[str, Callable[[], Any]] = {
|
||||
"VLLM_USE_V2_MODEL_RUNNER": lambda: bool(
|
||||
int(os.getenv("VLLM_USE_V2_MODEL_RUNNER", "0"))
|
||||
),
|
||||
# Log model inspection after loading.
|
||||
# If enabled, logs a transformers-style hierarchical view of the model
|
||||
# with quantization methods and attention backends.
|
||||
"VLLM_LOG_MODEL_INSPECTION": lambda: bool(
|
||||
int(os.getenv("VLLM_LOG_MODEL_INSPECTION", "0"))
|
||||
),
|
||||
# Debug logging for --enable-mfu-metrics
|
||||
"VLLM_DEBUG_MFU_METRICS": lambda: bool(
|
||||
int(os.getenv("VLLM_DEBUG_MFU_METRICS", "0"))
|
||||
|
||||
Reference in New Issue
Block a user