[Frontend][OpenAI] Support for returning max_model_len on /v1/models response (#4643)
This commit is contained in:
@@ -62,6 +62,7 @@ class OpenAIServing:
|
||||
"""Show available models. Right now we only have one model."""
|
||||
model_cards = [
|
||||
ModelCard(id=served_model_name,
|
||||
max_model_len=self.max_model_len,
|
||||
root=self.served_model_names[0],
|
||||
permission=[ModelPermission()])
|
||||
for served_model_name in self.served_model_names
|
||||
|
||||
Reference in New Issue
Block a user