[doc] Fold long code blocks to improve readability (#19926)
Signed-off-by: reidliu41 <reid201711@gmail.com> Co-authored-by: reidliu41 <reid201711@gmail.com>
This commit is contained in:
@@ -147,20 +147,22 @@ curl http://localhost:8000/v1/completions \
|
||||
|
||||
Since this server is compatible with OpenAI API, you can use it as a drop-in replacement for any applications using OpenAI API. For example, another way to query the server is via the `openai` Python package:
|
||||
|
||||
```python
|
||||
from openai import OpenAI
|
||||
??? Code
|
||||
|
||||
# Modify OpenAI's API key and API base to use vLLM's API server.
|
||||
openai_api_key = "EMPTY"
|
||||
openai_api_base = "http://localhost:8000/v1"
|
||||
client = OpenAI(
|
||||
api_key=openai_api_key,
|
||||
base_url=openai_api_base,
|
||||
)
|
||||
completion = client.completions.create(model="Qwen/Qwen2.5-1.5B-Instruct",
|
||||
prompt="San Francisco is a")
|
||||
print("Completion result:", completion)
|
||||
```
|
||||
```python
|
||||
from openai import OpenAI
|
||||
|
||||
# Modify OpenAI's API key and API base to use vLLM's API server.
|
||||
openai_api_key = "EMPTY"
|
||||
openai_api_base = "http://localhost:8000/v1"
|
||||
client = OpenAI(
|
||||
api_key=openai_api_key,
|
||||
base_url=openai_api_base,
|
||||
)
|
||||
completion = client.completions.create(model="Qwen/Qwen2.5-1.5B-Instruct",
|
||||
prompt="San Francisco is a")
|
||||
print("Completion result:", completion)
|
||||
```
|
||||
|
||||
A more detailed client example can be found here: <gh-file:examples/online_serving/openai_completion_client.py>
|
||||
|
||||
@@ -184,26 +186,28 @@ curl http://localhost:8000/v1/chat/completions \
|
||||
|
||||
Alternatively, you can use the `openai` Python package:
|
||||
|
||||
```python
|
||||
from openai import OpenAI
|
||||
# Set OpenAI's API key and API base to use vLLM's API server.
|
||||
openai_api_key = "EMPTY"
|
||||
openai_api_base = "http://localhost:8000/v1"
|
||||
??? Code
|
||||
|
||||
client = OpenAI(
|
||||
api_key=openai_api_key,
|
||||
base_url=openai_api_base,
|
||||
)
|
||||
```python
|
||||
from openai import OpenAI
|
||||
# Set OpenAI's API key and API base to use vLLM's API server.
|
||||
openai_api_key = "EMPTY"
|
||||
openai_api_base = "http://localhost:8000/v1"
|
||||
|
||||
chat_response = client.chat.completions.create(
|
||||
model="Qwen/Qwen2.5-1.5B-Instruct",
|
||||
messages=[
|
||||
{"role": "system", "content": "You are a helpful assistant."},
|
||||
{"role": "user", "content": "Tell me a joke."},
|
||||
]
|
||||
)
|
||||
print("Chat response:", chat_response)
|
||||
```
|
||||
client = OpenAI(
|
||||
api_key=openai_api_key,
|
||||
base_url=openai_api_base,
|
||||
)
|
||||
|
||||
chat_response = client.chat.completions.create(
|
||||
model="Qwen/Qwen2.5-1.5B-Instruct",
|
||||
messages=[
|
||||
{"role": "system", "content": "You are a helpful assistant."},
|
||||
{"role": "user", "content": "Tell me a joke."},
|
||||
]
|
||||
)
|
||||
print("Chat response:", chat_response)
|
||||
```
|
||||
|
||||
## On Attention Backends
|
||||
|
||||
|
||||
Reference in New Issue
Block a user