[CI] Renovation of nightly wheel build & generation (take 2) (#29838)

Signed-off-by: Shengqi Chen <harry-chen@outlook.com>
This commit is contained in:
Shengqi Chen
2025-12-02 14:17:10 +08:00
committed by GitHub
parent 653591d5e7
commit 4b612664fd
9 changed files with 606 additions and 148 deletions

148
setup.py
View File

@@ -311,7 +311,7 @@ class precompiled_build_ext(build_ext):
"""Disables extension building when using precompiled binaries."""
def run(self) -> None:
assert _is_cuda(), "VLLM_USE_PRECOMPILED is only supported for CUDA builds"
return
def build_extensions(self) -> None:
print("Skipping build_ext: using precompiled extensions.")
@@ -322,14 +322,121 @@ class precompiled_wheel_utils:
"""Extracts libraries and other files from an existing wheel."""
@staticmethod
def extract_precompiled_and_patch_package(wheel_url_or_path: str) -> dict:
def fetch_metadata_for_variant(
commit: str, variant: str | None
) -> tuple[list[dict], str]:
"""
Fetches metadata for a specific variant of the precompiled wheel.
"""
variant_dir = f"{variant}/" if variant is not None else ""
repo_url = f"https://wheels.vllm.ai/{commit}/{variant_dir}vllm/"
meta_url = repo_url + "metadata.json"
print(f"Trying to fetch nightly build metadata from {meta_url}")
from urllib.request import urlopen
with urlopen(meta_url) as resp:
# urlopen raises HTTPError on unexpected status code
wheels = json.loads(resp.read().decode("utf-8"))
return wheels, repo_url
@staticmethod
def determine_wheel_url() -> tuple[str, str | None]:
"""
Try to determine the precompiled wheel URL or path to use.
The order of preference is:
1. user-specified wheel location (can be either local or remote, via
VLLM_PRECOMPILED_WHEEL_LOCATION)
2. user-specified variant from nightly repo (current main commit via
VLLM_PRECOMPILED_WHEEL_VARIANT)
3. the variant corresponding to VLLM_MAIN_CUDA_VERSION from nightly repo
4. the default variant from nightly repo (current main commit)
"""
wheel_location = os.getenv("VLLM_PRECOMPILED_WHEEL_LOCATION", None)
if wheel_location is not None:
print(f"Using user-specified precompiled wheel location: {wheel_location}")
return wheel_location, None
else:
import platform
arch = platform.machine()
# try to fetch the wheel metadata from the nightly wheel repo
main_variant = "cu" + envs.VLLM_MAIN_CUDA_VERSION.replace(".", "")
variant = os.getenv("VLLM_PRECOMPILED_WHEEL_VARIANT", main_variant)
commit = os.getenv(
"VLLM_PRECOMPILED_WHEEL_COMMIT",
precompiled_wheel_utils.get_base_commit_in_main_branch(),
)
print(f"Using precompiled wheel commit {commit} with variant {variant}")
try_default = False
wheels, repo_url, download_filename = None, None, None
try:
wheels, repo_url = precompiled_wheel_utils.fetch_metadata_for_variant(
commit, variant
)
except Exception as e:
logger.warning(
"Failed to fetch precompiled wheel metadata for variant %s: %s",
variant,
e,
)
try_default = True # try outside handler to keep the stacktrace simple
if try_default:
print("Trying the default variant from remote")
wheels, repo_url = precompiled_wheel_utils.fetch_metadata_for_variant(
commit, None
)
# if this also fails, then we have nothing more to try / cache
assert wheels is not None and repo_url is not None, (
"Failed to fetch precompiled wheel metadata"
)
# The metadata.json has the following format:
# see .buildkite/scripts/generate-nightly-index.py for details
"""[{
"package_name": "vllm",
"version": "0.11.2.dev278+gdbc3d9991",
"build_tag": null,
"python_tag": "cp38",
"abi_tag": "abi3",
"platform_tag": "manylinux1_x86_64",
"variant": null,
"filename": "vllm-0.11.2.dev278+gdbc3d9991-cp38-abi3-manylinux1_x86_64.whl",
"path": "../vllm-0.11.2.dev278%2Bgdbc3d9991-cp38-abi3-manylinux1_x86_64.whl"
},
...]"""
from urllib.parse import urljoin
for wheel in wheels:
# TODO: maybe check more compatibility later? (python_tag, abi_tag, etc)
if wheel.get("package_name") == "vllm" and arch in wheel.get(
"platform_tag", ""
):
print(f"Found precompiled wheel metadata: {wheel}")
if "path" not in wheel:
raise ValueError(f"Wheel metadata missing path: {wheel}")
wheel_url = urljoin(repo_url, wheel["path"])
download_filename = wheel.get("filename")
print(f"Using precompiled wheel URL: {wheel_url}")
break
else:
raise ValueError(
f"No precompiled vllm wheel found for architecture {arch} "
f"from repo {repo_url}. All available wheels: {wheels}"
)
return wheel_url, download_filename
@staticmethod
def extract_precompiled_and_patch_package(
wheel_url_or_path: str, download_filename: str | None
) -> dict:
import tempfile
import zipfile
temp_dir = None
try:
if not os.path.isfile(wheel_url_or_path):
wheel_filename = wheel_url_or_path.split("/")[-1]
# use provided filename first, then derive from URL
wheel_filename = download_filename or wheel_url_or_path.split("/")[-1]
temp_dir = tempfile.mkdtemp(prefix="vllm-wheels")
wheel_path = os.path.join(temp_dir, wheel_filename)
print(f"Downloading wheel from {wheel_url_or_path} to {wheel_path}")
@@ -648,38 +755,13 @@ package_data = {
]
}
# If using precompiled, extract and patch package_data (in advance of setup)
if envs.VLLM_USE_PRECOMPILED:
assert _is_cuda(), "VLLM_USE_PRECOMPILED is only supported for CUDA builds"
wheel_location = os.getenv("VLLM_PRECOMPILED_WHEEL_LOCATION", None)
if wheel_location is not None:
wheel_url = wheel_location
else:
import platform
arch = platform.machine()
if arch == "x86_64":
wheel_tag = "manylinux1_x86_64"
elif arch == "aarch64":
wheel_tag = "manylinux2014_aarch64"
else:
raise ValueError(f"Unsupported architecture: {arch}")
base_commit = precompiled_wheel_utils.get_base_commit_in_main_branch()
wheel_url = f"https://wheels.vllm.ai/{base_commit}/vllm-1.0.0.dev-cp38-abi3-{wheel_tag}.whl"
nightly_wheel_url = (
f"https://wheels.vllm.ai/nightly/vllm-1.0.0.dev-cp38-abi3-{wheel_tag}.whl"
)
from urllib.request import urlopen
try:
with urlopen(wheel_url) as resp:
if resp.status != 200:
wheel_url = nightly_wheel_url
except Exception as e:
print(f"[warn] Falling back to nightly wheel: {e}")
wheel_url = nightly_wheel_url
patch = precompiled_wheel_utils.extract_precompiled_and_patch_package(wheel_url)
wheel_url, download_filename = precompiled_wheel_utils.determine_wheel_url()
patch = precompiled_wheel_utils.extract_precompiled_and_patch_package(
wheel_url, download_filename
)
for pkg, files in patch.items():
package_data.setdefault(pkg, []).extend(files)