Files
smollm3-3b-vllm/docker-compose.yaml
2026-04-10 13:55:43 +00:00

36 lines
1.1 KiB
YAML

services:
vllm:
image: vllm/vllm-openai:v0.19.0
pull_policy: always
privileged: true
environment:
- HF_TOKEN=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
command:
- HuggingFaceTB/SmolLM3-3B
- --host=0.0.0.0
- --port=80
- --chat-template-content-format=string
- --chat-template=/root/chat_template.jinja
- --enable-auto-tool-choice
- --tool-call-parser=hermes
- --reasoning-parser=deepseek_r1
#- --max-model-len=131072
#- --hf-overrides={"rope_scaling":{"type":"yarn","factor":2.0,"original_max_position_embeddings":65536}}
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: all
capabilities: [gpu]
ipc: host
security_opt:
- seccomp:unconfined
tty: true
stdin_open: true
volumes:
- /srv:/root/.cache/huggingface
- ./chat_template.jinja:/root/chat_template.jinja
- ./hermes_tool_parser.py:/usr/local/lib/python3.12/dist-packages/vllm/tool_parsers/hermes_tool_parser.py
network_mode: host