diff --git a/docker/docker-bake.hcl b/docker/docker-bake.hcl new file mode 100644 index 000000000..daf0d62a6 --- /dev/null +++ b/docker/docker-bake.hcl @@ -0,0 +1,76 @@ +# docker-bake.hcl - vLLM Docker build configuration +# +# This file lives in vLLM repo at docker/docker-bake.hcl +# +# Usage: +# cd docker && docker buildx bake # Build default target (openai) +# cd docker && docker buildx bake test # Build test target +# docker buildx bake --print # Show resolved config +# +# Reference: https://docs.docker.com/build/bake/reference/ + +# Build configuration + +variable "MAX_JOBS" { + default = 16 +} + +variable "NVCC_THREADS" { + default = 8 +} + +variable "TORCH_CUDA_ARCH_LIST" { + default = "8.0 8.9 9.0 10.0" +} + +variable "COMMIT" { + default = "" +} + +# Groups + +group "default" { + targets = ["openai"] +} + +# Base targets + +target "_common" { + dockerfile = "docker/Dockerfile" + context = "." + args = { + max_jobs = MAX_JOBS + nvcc_threads = NVCC_THREADS + torch_cuda_arch_list = TORCH_CUDA_ARCH_LIST + } +} + +target "_labels" { + labels = { + "org.opencontainers.image.source" = "https://github.com/vllm-project/vllm" + "org.opencontainers.image.vendor" = "vLLM" + "org.opencontainers.image.title" = "vLLM" + "org.opencontainers.image.description" = "vLLM: A high-throughput and memory-efficient inference and serving engine for LLMs" + "org.opencontainers.image.licenses" = "Apache-2.0" + "org.opencontainers.image.revision" = COMMIT + } + annotations = [ + "index,manifest:org.opencontainers.image.revision=${COMMIT}", + ] +} + +# Build targets + +target "test" { + inherits = ["_common", "_labels"] + target = "test" + tags = ["vllm:test"] + output = ["type=docker"] +} + +target "openai" { + inherits = ["_common", "_labels"] + target = "vllm-openai" + tags = ["vllm:openai"] + output = ["type=docker"] +}