[UX] Add vLLM model inspection view (#29450)
Signed-off-by: mgoin <mgoin64@gmail.com>
This commit is contained in:
@@ -348,6 +348,9 @@ class LLM:
|
||||
self.input_processor = self.llm_engine.input_processor
|
||||
self.io_processor = self.llm_engine.io_processor
|
||||
|
||||
# Cache for __repr__ to avoid repeated collective_rpc calls
|
||||
self._cached_repr: str | None = None
|
||||
|
||||
def get_tokenizer(self) -> TokenizerLike:
|
||||
return self.llm_engine.get_tokenizer()
|
||||
|
||||
@@ -1786,3 +1789,16 @@ class LLM:
|
||||
# This is necessary because some requests may be finished earlier than
|
||||
# its previous requests.
|
||||
return sorted(outputs, key=lambda x: int(x.request_id))
|
||||
|
||||
def __repr__(self) -> str:
|
||||
"""Return a transformers-style hierarchical view of the model."""
|
||||
# Cache the result to avoid repeated collective_rpc calls
|
||||
if self._cached_repr is None:
|
||||
results = self.llm_engine.collective_rpc("get_model_inspection")
|
||||
# In distributed settings, we get results from all workers
|
||||
# Just return the first one (they should all be the same)
|
||||
if results:
|
||||
self._cached_repr = results[0]
|
||||
else:
|
||||
self._cached_repr = f"LLM(model={self.model_config.model!r})"
|
||||
return self._cached_repr
|
||||
|
||||
Reference in New Issue
Block a user