Files
smollm3-3b-vllm/docker-compose.yaml

39 lines
1.2 KiB
YAML
Raw Permalink Normal View History

2026-04-10 13:55:43 +00:00
services:
vllm:
image: vllm/vllm-openai:v0.19.0
pull_policy: always
privileged: true
environment:
- HF_TOKEN=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
command:
- HuggingFaceTB/SmolLM3-3B
- --host=0.0.0.0
- --port=80
- --chat-template-content-format=string
- --chat-template=/root/chat_template.jinja
- --enable-auto-tool-choice
2026-04-10 14:05:50 +00:00
- --tool-call-parser=hermes
2026-04-10 13:55:43 +00:00
- --reasoning-parser=deepseek_r1
2026-04-10 14:01:42 +00:00
- --enable-lora
- --lora-modules=smollm-toolcall=/root/loras/better-tool-call
2026-04-10 13:55:43 +00:00
#- --max-model-len=131072
#- --hf-overrides={"rope_scaling":{"type":"yarn","factor":2.0,"original_max_position_embeddings":65536}}
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: all
capabilities: [gpu]
ipc: host
security_opt:
- seccomp:unconfined
tty: true
stdin_open: true
volumes:
- /srv:/root/.cache/huggingface
- ./chat_template.jinja:/root/chat_template.jinja
2026-04-10 14:05:50 +00:00
- ./smol_tool_parser.py:/usr/local/lib/python3.12/dist-packages/vllm/tool_parsers/hermes_tool_parser.py
2026-04-10 14:01:42 +00:00
- ./loras:/root/loras
2026-04-10 13:55:43 +00:00
network_mode: host