Updated for v0.10.0
This commit is contained in:
@@ -46,11 +46,11 @@ FROM base AS build-base
|
|||||||
# Install build tools and dependencies
|
# Install build tools and dependencies
|
||||||
RUN uv pip install -U build cmake ninja pybind11 setuptools==79.0.1 wheel
|
RUN uv pip install -U build cmake ninja pybind11 setuptools==79.0.1 wheel
|
||||||
|
|
||||||
# Clone and build LMCache wheel
|
RUN echo "Hello"
|
||||||
RUN git clone https://github.com/rajesh-s/LMCache.git -b v0.3.0 && \
|
# Clone the repo, apply the patch, and build
|
||||||
|
RUN git clone https://github.com/LMCache/LMCache.git -b v0.3.3 && \
|
||||||
cd LMCache && \
|
cd LMCache && \
|
||||||
uv pip install setuptools_scm && \
|
uv pip install setuptools_scm && \
|
||||||
python -m build --wheel --no-isolation && \
|
python -m build --wheel --no-isolation && \
|
||||||
cp dist/*.whl /workspace/
|
cp dist/*.whl /workspace/
|
||||||
|
|
||||||
CMD ["/bin/bash"]
|
CMD ["/bin/bash"]
|
||||||
5
lmcache/README
Normal file
5
lmcache/README
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
sudo docker build -t lmcache .
|
||||||
|
sudo docker run -it --rm lmcache /bin/bash
|
||||||
|
|
||||||
|
sudo docker ps
|
||||||
|
sudo docker cp e1a106fdc1d6:/workspace/lmcache-0.3.3-cp312-cp312-linux_aarch64.whl .
|
||||||
@@ -51,8 +51,7 @@ RUN apt-get update && apt install -y wget
|
|||||||
|
|
||||||
RUN uv pip install numpy==2.0.0
|
RUN uv pip install numpy==2.0.0
|
||||||
# Install pytorch nightly
|
# Install pytorch nightly
|
||||||
RUN uv pip install torch==2.7.0+cu128 torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu128 --torch-backend=cu128
|
RUN uv pip install torch==2.7.1+cu128 torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu128 --torch-backend=cu128
|
||||||
# RUN wget https://download.pytorch.org/whl/nightly/cu128/torch-2.7.0.dev20250310%2Bcu128-cp312-cp312-linux_aarch64.whl -O torch-2.7.0.dev20250310+cu128-cp312-cp312-linux_aarch64.whl
|
|
||||||
|
|
||||||
# Install from the wheel
|
# Install from the wheel
|
||||||
# RUN uv pip install ./torch-2.7.0.dev20250310+cu128-cp312-cp312-linux_aarch64.whl
|
# RUN uv pip install ./torch-2.7.0.dev20250310+cu128-cp312-cp312-linux_aarch64.whl
|
||||||
@@ -100,8 +99,14 @@ RUN cd xformers && \
|
|||||||
# git submodule update --init --recursive -j 8 && \
|
# git submodule update --init --recursive -j 8 && \
|
||||||
# uv build --wheel --no-build-isolation -o /wheels
|
# uv build --wheel --no-build-isolation -o /wheels
|
||||||
|
|
||||||
|
RUN git clone https://github.com/flashinfer-ai/flashinfer.git --recursive && \
|
||||||
|
cd flashinfer && git checkout v0.2.8rc1 && \
|
||||||
|
uv pip install ninja && \
|
||||||
|
uv pip install --no-build-isolation --verbose .
|
||||||
|
|
||||||
|
|
||||||
FROM build-base AS build-vllm
|
FROM build-base AS build-vllm
|
||||||
ARG VLLM_REF=v0.9.1
|
ARG VLLM_REF=v0.10.0
|
||||||
RUN git clone https://github.com/vllm-project/vllm.git
|
RUN git clone https://github.com/vllm-project/vllm.git
|
||||||
RUN cd vllm && \
|
RUN cd vllm && \
|
||||||
git checkout ${VLLM_REF} && \
|
git checkout ${VLLM_REF} && \
|
||||||
@@ -109,7 +114,7 @@ RUN cd vllm && \
|
|||||||
git submodule update --init --recursive -j 8 && \
|
git submodule update --init --recursive -j 8 && \
|
||||||
python use_existing_torch.py && \
|
python use_existing_torch.py && \
|
||||||
uv pip install -r requirements/build.txt && \
|
uv pip install -r requirements/build.txt && \
|
||||||
MAX_JOBS=32 uv build --wheel --no-build-isolation -o /wheels
|
MAX_JOBS=16 uv build --wheel --no-build-isolation -o /wheels
|
||||||
|
|
||||||
|
|
||||||
FROM base AS vllm-openai
|
FROM base AS vllm-openai
|
||||||
@@ -139,8 +144,8 @@ RUN uv pip install -U build cmake ninja pybind11 setuptools==79.0.1 wheel
|
|||||||
|
|
||||||
# Clone and build LMCache wheel without Infinistore that is broken on aarch64
|
# Clone and build LMCache wheel without Infinistore that is broken on aarch64
|
||||||
# Copy the wheel from host to container
|
# Copy the wheel from host to container
|
||||||
COPY lmcache-0.3.1.dev1-cp312-cp312-linux_aarch64.whl /tmp/
|
COPY lmcache-0.3.3-cp312-cp312-linux_aarch64.whl /tmp/
|
||||||
RUN uv pip install /tmp/lmcache-0.3.1.dev1-cp312-cp312-linux_aarch64.whl --no-deps
|
RUN uv pip install /tmp/lmcache-0.3.3-cp312-cp312-linux_aarch64.whl --no-deps
|
||||||
|
|
||||||
# Enable hf-transfer
|
# Enable hf-transfer
|
||||||
ENV HF_HUB_ENABLE_HF_TRANSFER=1
|
ENV HF_HUB_ENABLE_HF_TRANSFER=1
|
||||||
@@ -157,10 +162,7 @@ RUN apt install -y --no-install-recommends tmux cmake
|
|||||||
# Install required build tool
|
# Install required build tool
|
||||||
RUN uv pip install ninja
|
RUN uv pip install ninja
|
||||||
|
|
||||||
RUN git clone https://github.com/flashinfer-ai/flashinfer.git --recursive && \
|
|
||||||
cd flashinfer && \
|
|
||||||
uv pip install ninja && \
|
|
||||||
uv pip install --no-build-isolation --verbose .
|
|
||||||
# API server entrypoint
|
# API server entrypoint
|
||||||
# ENTRYPOINT ["vllm", "serve"]
|
# ENTRYPOINT ["vllm", "serve"]
|
||||||
CMD ["/bin/bash"]
|
CMD ["/bin/bash"]
|
||||||
|
|||||||
@@ -6,6 +6,9 @@ Hosted [here](https://hub.docker.com/repository/docker/rajesh550/gh200-vllm)
|
|||||||
docker login
|
docker login
|
||||||
docker buildx build --platform linux/arm64 --memory=600g -t rajesh550/gh200-vllm:0.9.0.1 .
|
docker buildx build --platform linux/arm64 --memory=600g -t rajesh550/gh200-vllm:0.9.0.1 .
|
||||||
|
|
||||||
|
# Alternative
|
||||||
|
sudo docker build --memory=300g --platform linux/arm64 -t rajesh550/gh200-vllm:0.10.1 .
|
||||||
|
|
||||||
docker build --memory=300g -t rajesh550/gh200-vllm:0.9.0.1 .
|
docker build --memory=300g -t rajesh550/gh200-vllm:0.9.0.1 .
|
||||||
docker push rajesh550/gh200-vllm:0.9.0.1
|
docker push rajesh550/gh200-vllm:0.9.0.1
|
||||||
```
|
```
|
||||||
|
|||||||
BIN
vllm/lmcache-0.3.3-cp312-cp312-linux_aarch64.whl
Normal file
BIN
vllm/lmcache-0.3.3-cp312-cp312-linux_aarch64.whl
Normal file
Binary file not shown.
Reference in New Issue
Block a user