[Misc][OpenAI] deprecate max_tokens in favor of new max_completion_tokens field for chat completion endpoint (#9837)
This commit is contained in:
committed by
GitHub
parent
64384bbcdf
commit
abbfb6134d
@@ -109,7 +109,7 @@ SkyPilot can scale up the service to multiple service replicas with built-in aut
|
||||
messages:
|
||||
- role: user
|
||||
content: Hello! What is your name?
|
||||
max_tokens: 1
|
||||
max_completion_tokens: 1
|
||||
|
||||
.. raw:: html
|
||||
|
||||
@@ -129,7 +129,7 @@ SkyPilot can scale up the service to multiple service replicas with built-in aut
|
||||
messages:
|
||||
- role: user
|
||||
content: Hello! What is your name?
|
||||
max_tokens: 1
|
||||
max_completion_tokens: 1
|
||||
|
||||
resources:
|
||||
accelerators: {L4, A10g, A10, L40, A40, A100, A100-80GB} # We can use cheaper accelerators for 8B model.
|
||||
@@ -255,7 +255,7 @@ This will scale the service up to when the QPS exceeds 2 for each replica.
|
||||
messages:
|
||||
- role: user
|
||||
content: Hello! What is your name?
|
||||
max_tokens: 1
|
||||
max_completion_tokens: 1
|
||||
|
||||
resources:
|
||||
accelerators: {L4, A10g, A10, L40, A40, A100, A100-80GB} # We can use cheaper accelerators for 8B model.
|
||||
|
||||
Reference in New Issue
Block a user