39 lines
1.2 KiB
YAML
39 lines
1.2 KiB
YAML
services:
|
|
vllm:
|
|
image: vllm/vllm-openai:v0.19.0
|
|
pull_policy: always
|
|
privileged: true
|
|
environment:
|
|
- HF_TOKEN=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
|
|
command:
|
|
- HuggingFaceTB/SmolLM3-3B
|
|
- --host=0.0.0.0
|
|
- --port=80
|
|
- --chat-template-content-format=string
|
|
- --chat-template=/root/chat_template.jinja
|
|
- --enable-auto-tool-choice
|
|
- --tool-call-parser=hermes
|
|
- --reasoning-parser=deepseek_r1
|
|
- --enable-lora
|
|
- --lora-modules=smollm-toolcall=/root/loras/better-tool-call
|
|
#- --max-model-len=131072
|
|
#- --hf-overrides={"rope_scaling":{"type":"yarn","factor":2.0,"original_max_position_embeddings":65536}}
|
|
deploy:
|
|
resources:
|
|
reservations:
|
|
devices:
|
|
- driver: nvidia
|
|
count: all
|
|
capabilities: [gpu]
|
|
ipc: host
|
|
security_opt:
|
|
- seccomp:unconfined
|
|
tty: true
|
|
stdin_open: true
|
|
volumes:
|
|
- /srv:/root/.cache/huggingface
|
|
- ./chat_template.jinja:/root/chat_template.jinja
|
|
- ./smol_tool_parser.py:/usr/local/lib/python3.12/dist-packages/vllm/tool_parsers/hermes_tool_parser.py
|
|
- ./loras:/root/loras
|
|
network_mode: host
|