[Kernel][RFC] Refactor the punica kernel based on Triton (#5036)

This commit is contained in:
Jee Jee Li
2024-08-01 08:12:24 +08:00
committed by GitHub
parent 7eb0cb4a14
commit 7ecee34321
47 changed files with 3177 additions and 4366 deletions

View File

@@ -13,8 +13,6 @@ $python_executable -m pip install -r requirements-cuda.txt
# Limit the number of parallel jobs to avoid OOM
export MAX_JOBS=1
# Make sure punica is built for the release (for LoRA)
export VLLM_INSTALL_PUNICA_KERNELS=1
# Make sure release wheels are built for the following architectures
export TORCH_CUDA_ARCH_LIST="7.0 7.5 8.0 8.6 8.9 9.0+PTX"
# Build