[Doc] ruff format some Python examples (#26767)
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
@@ -63,7 +63,7 @@ If successful, you should be returned a CURL command that you can call inference
|
||||
|
||||
??? console "Command"
|
||||
|
||||
```python
|
||||
```bash
|
||||
curl -X POST https://api.cortex.cerebrium.ai/v4/p-xxxxxx/vllm/run \
|
||||
-H 'Content-Type: application/json' \
|
||||
-H 'Authorization: <JWT TOKEN>' \
|
||||
@@ -81,7 +81,7 @@ You should get a response like:
|
||||
|
||||
??? console "Response"
|
||||
|
||||
```python
|
||||
```json
|
||||
{
|
||||
"run_id": "52911756-3066-9ae8-bcc9-d9129d1bd262",
|
||||
"result": {
|
||||
|
||||
@@ -83,7 +83,7 @@ After the provisioning, you can interact with the model by using the OpenAI SDK:
|
||||
|
||||
client = OpenAI(
|
||||
base_url="https://gateway.<gateway domain>",
|
||||
api_key="<YOUR-DSTACK-SERVER-ACCESS-TOKEN>"
|
||||
api_key="<YOUR-DSTACK-SERVER-ACCESS-TOKEN>",
|
||||
)
|
||||
|
||||
completion = client.chat.completions.create(
|
||||
@@ -93,7 +93,7 @@ After the provisioning, you can interact with the model by using the OpenAI SDK:
|
||||
"role": "user",
|
||||
"content": "Compose a poem that explains the concept of recursion in programming.",
|
||||
}
|
||||
]
|
||||
],
|
||||
)
|
||||
|
||||
print(completion.choices[0].message.content)
|
||||
|
||||
@@ -34,7 +34,7 @@ pip install vllm haystack-ai
|
||||
api_key=Secret.from_token("VLLM-PLACEHOLDER-API-KEY"),
|
||||
model="mistralai/Mistral-7B-Instruct-v0.1",
|
||||
api_base_url="http://{your-vLLM-host-ip}:{your-vLLM-host-port}/v1",
|
||||
generation_kwargs = {"max_tokens": 512}
|
||||
generation_kwargs={"max_tokens": 512},
|
||||
)
|
||||
|
||||
response = generator.run(
|
||||
|
||||
@@ -32,28 +32,28 @@ This is the easiest way to get started with vLLM on Hugging Face Inference Endpo
|
||||
import os
|
||||
|
||||
client = OpenAI(
|
||||
base_url = DEPLOYMENT_URL,
|
||||
api_key = os.environ["HF_TOKEN"] # https://huggingface.co/settings/tokens
|
||||
base_url=DEPLOYMENT_URL,
|
||||
api_key=os.environ["HF_TOKEN"], # https://huggingface.co/settings/tokens
|
||||
)
|
||||
|
||||
chat_completion = client.chat.completions.create(
|
||||
model = "HuggingFaceTB/SmolLM3-3B",
|
||||
messages = [
|
||||
model="HuggingFaceTB/SmolLM3-3B",
|
||||
messages=[
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{
|
||||
"type": "text",
|
||||
"text": "Give me a brief explanation of gravity in simple terms."
|
||||
"text": "Give me a brief explanation of gravity in simple terms.",
|
||||
}
|
||||
]
|
||||
],
|
||||
}
|
||||
],
|
||||
stream = True
|
||||
stream=True,
|
||||
)
|
||||
|
||||
for message in chat_completion:
|
||||
print(message.choices[0].delta.content, end = "")
|
||||
print(message.choices[0].delta.content, end="")
|
||||
```
|
||||
|
||||
!!! note
|
||||
@@ -86,34 +86,34 @@ This method applies to models with the [`transformers` library tag](https://hugg
|
||||
import os
|
||||
|
||||
client = OpenAI(
|
||||
base_url = DEPLOYMENT_URL,
|
||||
api_key = os.environ["HF_TOKEN"] # https://huggingface.co/settings/tokens
|
||||
base_url=DEPLOYMENT_URL,
|
||||
api_key=os.environ["HF_TOKEN"], # https://huggingface.co/settings/tokens
|
||||
)
|
||||
|
||||
chat_completion = client.chat.completions.create(
|
||||
model = "ibm-granite/granite-docling-258M",
|
||||
messages = [
|
||||
model="ibm-granite/granite-docling-258M",
|
||||
messages=[
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{
|
||||
"type": "image_url",
|
||||
"image_url": {
|
||||
"url": "https://huggingface.co/ibm-granite/granite-docling-258M/resolve/main/assets/new_arxiv.png"
|
||||
}
|
||||
"url": "https://huggingface.co/ibm-granite/granite-docling-258M/resolve/main/assets/new_arxiv.png",
|
||||
},
|
||||
},
|
||||
{
|
||||
"type": "text",
|
||||
"text": "Convert this page to docling."
|
||||
}
|
||||
"text": "Convert this page to docling.",
|
||||
},
|
||||
]
|
||||
}
|
||||
],
|
||||
stream = True
|
||||
stream=True,
|
||||
)
|
||||
|
||||
for message in chat_completion:
|
||||
print(message.choices[0].delta.content, end = "")
|
||||
print(message.choices[0].delta.content, end="")
|
||||
```
|
||||
|
||||
!!! note
|
||||
|
||||
@@ -36,15 +36,16 @@ pip install vllm litellm
|
||||
```python
|
||||
import litellm
|
||||
|
||||
messages = [{ "content": "Hello, how are you?","role": "user"}]
|
||||
messages = [{"content": "Hello, how are you?", "role": "user"}]
|
||||
|
||||
# hosted_vllm is prefix key word and necessary
|
||||
response = litellm.completion(
|
||||
model="hosted_vllm/qwen/Qwen1.5-0.5B-Chat", # pass the vllm model name
|
||||
messages=messages,
|
||||
api_base="http://{your-vllm-server-host}:{your-vllm-server-port}/v1",
|
||||
temperature=0.2,
|
||||
max_tokens=80)
|
||||
model="hosted_vllm/qwen/Qwen1.5-0.5B-Chat", # pass the vllm model name
|
||||
messages=messages,
|
||||
api_base="http://{your-vllm-server-host}:{your-vllm-server-port}/v1",
|
||||
temperature=0.2,
|
||||
max_tokens=80,
|
||||
)
|
||||
|
||||
print(response)
|
||||
```
|
||||
|
||||
@@ -40,7 +40,7 @@ pip install -U vllm \
|
||||
|
||||
1. Run the script
|
||||
|
||||
```python
|
||||
```bash
|
||||
python retrieval_augmented_generation_with_langchain.py
|
||||
```
|
||||
|
||||
@@ -78,6 +78,6 @@ pip install vllm \
|
||||
|
||||
1. Run the script:
|
||||
|
||||
```python
|
||||
```bash
|
||||
python retrieval_augmented_generation_with_llamaindex.py
|
||||
```
|
||||
|
||||
Reference in New Issue
Block a user