[ci] Use env var to control whether to use S3 bucket in CI (#13634)

This commit is contained in:
Kevin H. Luu
2025-02-22 19:19:45 -08:00
committed by GitHub
parent 322d2a27d6
commit 2c5e637b57
30 changed files with 222 additions and 231 deletions

View File

@@ -6,7 +6,6 @@ from contextlib import nullcontext
from vllm_test_utils import BlameResult, blame
from vllm import LLM, SamplingParams
from vllm.config import LoadFormat
from vllm.distributed import cleanup_dist_env_and_memory
@@ -44,8 +43,7 @@ def run_normal():
sampling_params = SamplingParams(temperature=0.8, top_p=0.95)
# Create an LLM without guided decoding as a baseline.
llm = LLM(model="s3://vllm-ci-model-weights/distilgpt2",
load_format=LoadFormat.RUNAI_STREAMER,
llm = LLM(model="distilbert/distilgpt2",
enforce_eager=True,
gpu_memory_utilization=0.3)
outputs = llm.generate(prompts, sampling_params)
@@ -61,8 +59,7 @@ def run_normal():
def run_lmfe(sample_regex):
# Create an LLM with guided decoding enabled.
llm = LLM(model="s3://vllm-ci-model-weights/distilgpt2",
load_format=LoadFormat.RUNAI_STREAMER,
llm = LLM(model="distilbert/distilgpt2",
enforce_eager=True,
guided_decoding_backend="lm-format-enforcer",
gpu_memory_utilization=0.3)