[Frontend][OpenAI] Support for returning max_model_len on /v1/models response (#4643)

This commit is contained in:
Avinash Raj
2024-06-02 13:36:13 +05:30
committed by GitHub
parent ed59a7ed23
commit f790ad3c50
2 changed files with 2 additions and 0 deletions

View File

@@ -62,6 +62,7 @@ class OpenAIServing:
"""Show available models. Right now we only have one model."""
model_cards = [
ModelCard(id=served_model_name,
max_model_len=self.max_model_len,
root=self.served_model_names[0],
permission=[ModelPermission()])
for served_model_name in self.served_model_names