[UX] Add vLLM model inspection view (#29450)

Signed-off-by: mgoin <mgoin64@gmail.com>
2026-01-09 12:12:35 -05:00
parent 08d954f036
commit d5ec6c056f
6 changed files with 180 additions and 1 deletions
--- a/vllm/entrypoints/llm.py
+++ b/vllm/entrypoints/llm.py
@@ -348,6 +348,9 @@ class LLM:
        self.input_processor = self.llm_engine.input_processor
        self.io_processor = self.llm_engine.io_processor

+        # Cache for __repr__ to avoid repeated collective_rpc calls
+        self._cached_repr: str | None = None
+
    def get_tokenizer(self) -> TokenizerLike:
        return self.llm_engine.get_tokenizer()

@@ -1786,3 +1789,16 @@ class LLM:
        # This is necessary because some requests may be finished earlier than
        # its previous requests.
        return sorted(outputs, key=lambda x: int(x.request_id))
+
+    def __repr__(self) -> str:
+        """Return a transformers-style hierarchical view of the model."""
+        # Cache the result to avoid repeated collective_rpc calls
+        if self._cached_repr is None:
+            results = self.llm_engine.collective_rpc("get_model_inspection")
+            # In distributed settings, we get results from all workers
+            # Just return the first one (they should all be the same)
+            if results:
+                self._cached_repr = results[0]
+            else:
+                self._cached_repr = f"LLM(model={self.model_config.model!r})"
+        return self._cached_repr