docker-compose.yaml

services:
  vllm:
    image: vllm/vllm-openai:v0.19.0
    pull_policy: always
    privileged: true
    environment:
      - HF_TOKEN=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
    command:
      - HuggingFaceTB/SmolLM3-3B
      - --host=0.0.0.0
      - --port=80
      - --chat-template-content-format=string
      - --chat-template=/root/chat_template.jinja
      - --enable-auto-tool-choice
      - --tool-call-parser=hermes
      - --reasoning-parser=deepseek_r1
      - --enable-lora
      - --lora-modules=smollm-toolcall=/root/loras/better-tool-call
      #- --max-model-len=131072
      #- --hf-overrides={"rope_scaling":{"type":"yarn","factor":2.0,"original_max_position_embeddings":65536}}
    deploy:
      resources:
        reservations:
          devices:
            - driver: nvidia
              count: all
              capabilities: [gpu]
    ipc: host
    security_opt:
      - seccomp:unconfined
    tty: true
    stdin_open: true
    volumes:
       - /srv:/root/.cache/huggingface
       - ./chat_template.jinja:/root/chat_template.jinja
       - ./smol_tool_parser.py:/usr/local/lib/python3.12/dist-packages/vllm/tool_parsers/hermes_tool_parser.py
       - ./loras:/root/loras
    network_mode: host
init commit 2026-04-10 13:55:43 +00:00			`services:`
			`vllm:`
			`image: vllm/vllm-openai:v0.19.0`
			`pull_policy: always`
			`privileged: true`
			`environment:`
			`- HF_TOKEN=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`
			`command:`
			`- HuggingFaceTB/SmolLM3-3B`
			`- --host=0.0.0.0`
			`- --port=80`
			`- --chat-template-content-format=string`
			`- --chat-template=/root/chat_template.jinja`
			`- --enable-auto-tool-choice`
override parser 2026-04-10 14:05:50 +00:00			`- --tool-call-parser=hermes`
init commit 2026-04-10 13:55:43 +00:00			`- --reasoning-parser=deepseek_r1`
add in lora 2026-04-10 14:01:42 +00:00			`- --enable-lora`
			`- --lora-modules=smollm-toolcall=/root/loras/better-tool-call`
init commit 2026-04-10 13:55:43 +00:00			`#- --max-model-len=131072`
			`#- --hf-overrides={"rope_scaling":{"type":"yarn","factor":2.0,"original_max_position_embeddings":65536}}`
			`deploy:`
			`resources:`
			`reservations:`
			`devices:`
			`- driver: nvidia`
			`count: all`
			`capabilities: [gpu]`
			`ipc: host`
			`security_opt:`
			`- seccomp:unconfined`
			`tty: true`
			`stdin_open: true`
			`volumes:`
			`- /srv:/root/.cache/huggingface`
			`- ./chat_template.jinja:/root/chat_template.jinja`
override parser 2026-04-10 14:05:50 +00:00			`- ./smol_tool_parser.py:/usr/local/lib/python3.12/dist-packages/vllm/tool_parsers/hermes_tool_parser.py`
add in lora 2026-04-10 14:01:42 +00:00			`- ./loras:/root/loras`
init commit 2026-04-10 13:55:43 +00:00			`network_mode: host`