[UX] Add vLLM model inspection view (#29450)

Signed-off-by: mgoin <mgoin64@gmail.com>
This commit is contained in:
Michael Goin
2026-01-09 12:12:35 -05:00
committed by GitHub
parent 08d954f036
commit d5ec6c056f
6 changed files with 180 additions and 1 deletions

View File

@@ -348,6 +348,9 @@ class LLM:
self.input_processor = self.llm_engine.input_processor
self.io_processor = self.llm_engine.io_processor
# Cache for __repr__ to avoid repeated collective_rpc calls
self._cached_repr: str | None = None
def get_tokenizer(self) -> TokenizerLike:
return self.llm_engine.get_tokenizer()
@@ -1786,3 +1789,16 @@ class LLM:
# This is necessary because some requests may be finished earlier than
# its previous requests.
return sorted(outputs, key=lambda x: int(x.request_id))
def __repr__(self) -> str:
"""Return a transformers-style hierarchical view of the model."""
# Cache the result to avoid repeated collective_rpc calls
if self._cached_repr is None:
results = self.llm_engine.collective_rpc("get_model_inspection")
# In distributed settings, we get results from all workers
# Just return the first one (they should all be the same)
if results:
self._cached_repr = results[0]
else:
self._cached_repr = f"LLM(model={self.model_config.model!r})"
return self._cached_repr