[CI] Renovation of nightly wheel build & generation (take 2) (#29838)
Signed-off-by: Shengqi Chen <harry-chen@outlook.com>
This commit is contained in:
148
setup.py
148
setup.py
@@ -311,7 +311,7 @@ class precompiled_build_ext(build_ext):
|
||||
"""Disables extension building when using precompiled binaries."""
|
||||
|
||||
def run(self) -> None:
|
||||
assert _is_cuda(), "VLLM_USE_PRECOMPILED is only supported for CUDA builds"
|
||||
return
|
||||
|
||||
def build_extensions(self) -> None:
|
||||
print("Skipping build_ext: using precompiled extensions.")
|
||||
@@ -322,14 +322,121 @@ class precompiled_wheel_utils:
|
||||
"""Extracts libraries and other files from an existing wheel."""
|
||||
|
||||
@staticmethod
|
||||
def extract_precompiled_and_patch_package(wheel_url_or_path: str) -> dict:
|
||||
def fetch_metadata_for_variant(
|
||||
commit: str, variant: str | None
|
||||
) -> tuple[list[dict], str]:
|
||||
"""
|
||||
Fetches metadata for a specific variant of the precompiled wheel.
|
||||
"""
|
||||
variant_dir = f"{variant}/" if variant is not None else ""
|
||||
repo_url = f"https://wheels.vllm.ai/{commit}/{variant_dir}vllm/"
|
||||
meta_url = repo_url + "metadata.json"
|
||||
print(f"Trying to fetch nightly build metadata from {meta_url}")
|
||||
from urllib.request import urlopen
|
||||
|
||||
with urlopen(meta_url) as resp:
|
||||
# urlopen raises HTTPError on unexpected status code
|
||||
wheels = json.loads(resp.read().decode("utf-8"))
|
||||
return wheels, repo_url
|
||||
|
||||
@staticmethod
|
||||
def determine_wheel_url() -> tuple[str, str | None]:
|
||||
"""
|
||||
Try to determine the precompiled wheel URL or path to use.
|
||||
The order of preference is:
|
||||
1. user-specified wheel location (can be either local or remote, via
|
||||
VLLM_PRECOMPILED_WHEEL_LOCATION)
|
||||
2. user-specified variant from nightly repo (current main commit via
|
||||
VLLM_PRECOMPILED_WHEEL_VARIANT)
|
||||
3. the variant corresponding to VLLM_MAIN_CUDA_VERSION from nightly repo
|
||||
4. the default variant from nightly repo (current main commit)
|
||||
"""
|
||||
wheel_location = os.getenv("VLLM_PRECOMPILED_WHEEL_LOCATION", None)
|
||||
if wheel_location is not None:
|
||||
print(f"Using user-specified precompiled wheel location: {wheel_location}")
|
||||
return wheel_location, None
|
||||
else:
|
||||
import platform
|
||||
|
||||
arch = platform.machine()
|
||||
# try to fetch the wheel metadata from the nightly wheel repo
|
||||
main_variant = "cu" + envs.VLLM_MAIN_CUDA_VERSION.replace(".", "")
|
||||
variant = os.getenv("VLLM_PRECOMPILED_WHEEL_VARIANT", main_variant)
|
||||
commit = os.getenv(
|
||||
"VLLM_PRECOMPILED_WHEEL_COMMIT",
|
||||
precompiled_wheel_utils.get_base_commit_in_main_branch(),
|
||||
)
|
||||
print(f"Using precompiled wheel commit {commit} with variant {variant}")
|
||||
try_default = False
|
||||
wheels, repo_url, download_filename = None, None, None
|
||||
try:
|
||||
wheels, repo_url = precompiled_wheel_utils.fetch_metadata_for_variant(
|
||||
commit, variant
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
"Failed to fetch precompiled wheel metadata for variant %s: %s",
|
||||
variant,
|
||||
e,
|
||||
)
|
||||
try_default = True # try outside handler to keep the stacktrace simple
|
||||
if try_default:
|
||||
print("Trying the default variant from remote")
|
||||
wheels, repo_url = precompiled_wheel_utils.fetch_metadata_for_variant(
|
||||
commit, None
|
||||
)
|
||||
# if this also fails, then we have nothing more to try / cache
|
||||
assert wheels is not None and repo_url is not None, (
|
||||
"Failed to fetch precompiled wheel metadata"
|
||||
)
|
||||
# The metadata.json has the following format:
|
||||
# see .buildkite/scripts/generate-nightly-index.py for details
|
||||
"""[{
|
||||
"package_name": "vllm",
|
||||
"version": "0.11.2.dev278+gdbc3d9991",
|
||||
"build_tag": null,
|
||||
"python_tag": "cp38",
|
||||
"abi_tag": "abi3",
|
||||
"platform_tag": "manylinux1_x86_64",
|
||||
"variant": null,
|
||||
"filename": "vllm-0.11.2.dev278+gdbc3d9991-cp38-abi3-manylinux1_x86_64.whl",
|
||||
"path": "../vllm-0.11.2.dev278%2Bgdbc3d9991-cp38-abi3-manylinux1_x86_64.whl"
|
||||
},
|
||||
...]"""
|
||||
from urllib.parse import urljoin
|
||||
|
||||
for wheel in wheels:
|
||||
# TODO: maybe check more compatibility later? (python_tag, abi_tag, etc)
|
||||
if wheel.get("package_name") == "vllm" and arch in wheel.get(
|
||||
"platform_tag", ""
|
||||
):
|
||||
print(f"Found precompiled wheel metadata: {wheel}")
|
||||
if "path" not in wheel:
|
||||
raise ValueError(f"Wheel metadata missing path: {wheel}")
|
||||
wheel_url = urljoin(repo_url, wheel["path"])
|
||||
download_filename = wheel.get("filename")
|
||||
print(f"Using precompiled wheel URL: {wheel_url}")
|
||||
break
|
||||
else:
|
||||
raise ValueError(
|
||||
f"No precompiled vllm wheel found for architecture {arch} "
|
||||
f"from repo {repo_url}. All available wheels: {wheels}"
|
||||
)
|
||||
|
||||
return wheel_url, download_filename
|
||||
|
||||
@staticmethod
|
||||
def extract_precompiled_and_patch_package(
|
||||
wheel_url_or_path: str, download_filename: str | None
|
||||
) -> dict:
|
||||
import tempfile
|
||||
import zipfile
|
||||
|
||||
temp_dir = None
|
||||
try:
|
||||
if not os.path.isfile(wheel_url_or_path):
|
||||
wheel_filename = wheel_url_or_path.split("/")[-1]
|
||||
# use provided filename first, then derive from URL
|
||||
wheel_filename = download_filename or wheel_url_or_path.split("/")[-1]
|
||||
temp_dir = tempfile.mkdtemp(prefix="vllm-wheels")
|
||||
wheel_path = os.path.join(temp_dir, wheel_filename)
|
||||
print(f"Downloading wheel from {wheel_url_or_path} to {wheel_path}")
|
||||
@@ -648,38 +755,13 @@ package_data = {
|
||||
]
|
||||
}
|
||||
|
||||
|
||||
# If using precompiled, extract and patch package_data (in advance of setup)
|
||||
if envs.VLLM_USE_PRECOMPILED:
|
||||
assert _is_cuda(), "VLLM_USE_PRECOMPILED is only supported for CUDA builds"
|
||||
wheel_location = os.getenv("VLLM_PRECOMPILED_WHEEL_LOCATION", None)
|
||||
if wheel_location is not None:
|
||||
wheel_url = wheel_location
|
||||
else:
|
||||
import platform
|
||||
|
||||
arch = platform.machine()
|
||||
if arch == "x86_64":
|
||||
wheel_tag = "manylinux1_x86_64"
|
||||
elif arch == "aarch64":
|
||||
wheel_tag = "manylinux2014_aarch64"
|
||||
else:
|
||||
raise ValueError(f"Unsupported architecture: {arch}")
|
||||
base_commit = precompiled_wheel_utils.get_base_commit_in_main_branch()
|
||||
wheel_url = f"https://wheels.vllm.ai/{base_commit}/vllm-1.0.0.dev-cp38-abi3-{wheel_tag}.whl"
|
||||
nightly_wheel_url = (
|
||||
f"https://wheels.vllm.ai/nightly/vllm-1.0.0.dev-cp38-abi3-{wheel_tag}.whl"
|
||||
)
|
||||
from urllib.request import urlopen
|
||||
|
||||
try:
|
||||
with urlopen(wheel_url) as resp:
|
||||
if resp.status != 200:
|
||||
wheel_url = nightly_wheel_url
|
||||
except Exception as e:
|
||||
print(f"[warn] Falling back to nightly wheel: {e}")
|
||||
wheel_url = nightly_wheel_url
|
||||
|
||||
patch = precompiled_wheel_utils.extract_precompiled_and_patch_package(wheel_url)
|
||||
wheel_url, download_filename = precompiled_wheel_utils.determine_wheel_url()
|
||||
patch = precompiled_wheel_utils.extract_precompiled_and_patch_package(
|
||||
wheel_url, download_filename
|
||||
)
|
||||
for pkg, files in patch.items():
|
||||
package_data.setdefault(pkg, []).extend(files)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user