diff --git a/lmcache/Dockerfile b/lmcache/Dockerfile index 273e066..425225c 100644 --- a/lmcache/Dockerfile +++ b/lmcache/Dockerfile @@ -46,11 +46,11 @@ FROM base AS build-base # Install build tools and dependencies RUN uv pip install -U build cmake ninja pybind11 setuptools==79.0.1 wheel -# Clone and build LMCache wheel -RUN git clone https://github.com/rajesh-s/LMCache.git -b v0.3.0 && \ +RUN echo "Hello" +# Clone the repo, apply the patch, and build +RUN git clone https://github.com/LMCache/LMCache.git -b v0.3.3 && \ cd LMCache && \ uv pip install setuptools_scm && \ python -m build --wheel --no-isolation && \ cp dist/*.whl /workspace/ - CMD ["/bin/bash"] \ No newline at end of file diff --git a/lmcache/README b/lmcache/README new file mode 100644 index 0000000..b04e713 --- /dev/null +++ b/lmcache/README @@ -0,0 +1,5 @@ +sudo docker build -t lmcache . +sudo docker run -it --rm lmcache /bin/bash + +sudo docker ps +sudo docker cp e1a106fdc1d6:/workspace/lmcache-0.3.3-cp312-cp312-linux_aarch64.whl . \ No newline at end of file diff --git a/vllm/Dockerfile b/vllm/Dockerfile index 107238a..c3cd5e6 100644 --- a/vllm/Dockerfile +++ b/vllm/Dockerfile @@ -51,8 +51,7 @@ RUN apt-get update && apt install -y wget RUN uv pip install numpy==2.0.0 # Install pytorch nightly -RUN uv pip install torch==2.7.0+cu128 torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu128 --torch-backend=cu128 -# RUN wget https://download.pytorch.org/whl/nightly/cu128/torch-2.7.0.dev20250310%2Bcu128-cp312-cp312-linux_aarch64.whl -O torch-2.7.0.dev20250310+cu128-cp312-cp312-linux_aarch64.whl +RUN uv pip install torch==2.7.1+cu128 torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu128 --torch-backend=cu128 # Install from the wheel # RUN uv pip install ./torch-2.7.0.dev20250310+cu128-cp312-cp312-linux_aarch64.whl @@ -100,8 +99,14 @@ RUN cd xformers && \ # git submodule update --init --recursive -j 8 && \ # uv build --wheel --no-build-isolation -o /wheels +RUN git clone https://github.com/flashinfer-ai/flashinfer.git --recursive && \ + cd flashinfer && git checkout v0.2.8rc1 && \ + uv pip install ninja && \ + uv pip install --no-build-isolation --verbose . + + FROM build-base AS build-vllm -ARG VLLM_REF=v0.9.1 +ARG VLLM_REF=v0.10.0 RUN git clone https://github.com/vllm-project/vllm.git RUN cd vllm && \ git checkout ${VLLM_REF} && \ @@ -109,7 +114,7 @@ RUN cd vllm && \ git submodule update --init --recursive -j 8 && \ python use_existing_torch.py && \ uv pip install -r requirements/build.txt && \ - MAX_JOBS=32 uv build --wheel --no-build-isolation -o /wheels + MAX_JOBS=16 uv build --wheel --no-build-isolation -o /wheels FROM base AS vllm-openai @@ -139,8 +144,8 @@ RUN uv pip install -U build cmake ninja pybind11 setuptools==79.0.1 wheel # Clone and build LMCache wheel without Infinistore that is broken on aarch64 # Copy the wheel from host to container -COPY lmcache-0.3.1.dev1-cp312-cp312-linux_aarch64.whl /tmp/ -RUN uv pip install /tmp/lmcache-0.3.1.dev1-cp312-cp312-linux_aarch64.whl --no-deps +COPY lmcache-0.3.3-cp312-cp312-linux_aarch64.whl /tmp/ +RUN uv pip install /tmp/lmcache-0.3.3-cp312-cp312-linux_aarch64.whl --no-deps # Enable hf-transfer ENV HF_HUB_ENABLE_HF_TRANSFER=1 @@ -157,10 +162,7 @@ RUN apt install -y --no-install-recommends tmux cmake # Install required build tool RUN uv pip install ninja -RUN git clone https://github.com/flashinfer-ai/flashinfer.git --recursive && \ - cd flashinfer && \ - uv pip install ninja && \ - uv pip install --no-build-isolation --verbose . + # API server entrypoint # ENTRYPOINT ["vllm", "serve"] CMD ["/bin/bash"] diff --git a/vllm/README.md b/vllm/README.md index 527f26c..8187ab9 100644 --- a/vllm/README.md +++ b/vllm/README.md @@ -6,6 +6,9 @@ Hosted [here](https://hub.docker.com/repository/docker/rajesh550/gh200-vllm) docker login docker buildx build --platform linux/arm64 --memory=600g -t rajesh550/gh200-vllm:0.9.0.1 . +# Alternative +sudo docker build --memory=300g --platform linux/arm64 -t rajesh550/gh200-vllm:0.10.1 . + docker build --memory=300g -t rajesh550/gh200-vllm:0.9.0.1 . docker push rajesh550/gh200-vllm:0.9.0.1 ``` diff --git a/vllm/lmcache-0.3.3-cp312-cp312-linux_aarch64.whl b/vllm/lmcache-0.3.3-cp312-cp312-linux_aarch64.whl new file mode 100644 index 0000000..6726ac1 Binary files /dev/null and b/vllm/lmcache-0.3.3-cp312-cp312-linux_aarch64.whl differ