[CI] Renovation of nightly wheel build & generation (take 2) (#29838)

Signed-off-by: Shengqi Chen <harry-chen@outlook.com>
This commit is contained in:
Shengqi Chen
2025-12-02 14:17:10 +08:00
committed by GitHub
parent 653591d5e7
commit 4b612664fd
9 changed files with 606 additions and 148 deletions

View File

@@ -74,7 +74,7 @@ if TYPE_CHECKING:
VLLM_MEDIA_CONNECTOR: str = "http"
VLLM_MM_INPUT_CACHE_GIB: int = 4
VLLM_TARGET_DEVICE: str = "cuda"
VLLM_MAIN_CUDA_VERSION: str = "12.8"
VLLM_MAIN_CUDA_VERSION: str = "12.9"
MAX_JOBS: str | None = None
NVCC_THREADS: str | None = None
VLLM_USE_PRECOMPILED: bool = False
@@ -445,10 +445,9 @@ environment_variables: dict[str, Callable[[], Any]] = {
# Target device of vLLM, supporting [cuda (by default),
# rocm, cpu]
"VLLM_TARGET_DEVICE": lambda: os.getenv("VLLM_TARGET_DEVICE", "cuda").lower(),
# Main CUDA version of vLLM, supporting [12.6, 12.8, 12.9],
# 12.8 is the default. This follows PyTorch but can be overridden.
# Main CUDA version of vLLM. This follows PyTorch but can be overridden.
"VLLM_MAIN_CUDA_VERSION": lambda: os.getenv("VLLM_MAIN_CUDA_VERSION", "").lower()
or "12.8",
or "12.9",
# Maximum number of compilation jobs to run in parallel.
# By default this is the number of CPUs
"MAX_JOBS": lambda: os.getenv("MAX_JOBS", None),