Files
smollm3-3b-vllm/docker-compose.yaml
2026-04-10 14:05:50 +00:00

39 lines
1.2 KiB
YAML

services:
vllm:
image: vllm/vllm-openai:v0.19.0
pull_policy: always
privileged: true
environment:
- HF_TOKEN=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
command:
- HuggingFaceTB/SmolLM3-3B
- --host=0.0.0.0
- --port=80
- --chat-template-content-format=string
- --chat-template=/root/chat_template.jinja
- --enable-auto-tool-choice
- --tool-call-parser=hermes
- --reasoning-parser=deepseek_r1
- --enable-lora
- --lora-modules=smollm-toolcall=/root/loras/better-tool-call
#- --max-model-len=131072
#- --hf-overrides={"rope_scaling":{"type":"yarn","factor":2.0,"original_max_position_embeddings":65536}}
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: all
capabilities: [gpu]
ipc: host
security_opt:
- seccomp:unconfined
tty: true
stdin_open: true
volumes:
- /srv:/root/.cache/huggingface
- ./chat_template.jinja:/root/chat_template.jinja
- ./smol_tool_parser.py:/usr/local/lib/python3.12/dist-packages/vllm/tool_parsers/hermes_tool_parser.py
- ./loras:/root/loras
network_mode: host