[ci] Use env var to control whether to use S3 bucket in CI (#13634)

2025-02-22 19:19:45 -08:00
parent 322d2a27d6
commit 2c5e637b57
30 changed files with 222 additions and 231 deletions
--- a/tests/entrypoints/llm/test_lazy_outlines.py
+++ b/tests/entrypoints/llm/test_lazy_outlines.py
@@ -6,7 +6,6 @@ from contextlib import nullcontext
 from vllm_test_utils import BlameResult, blame

 from vllm import LLM, SamplingParams
-from vllm.config import LoadFormat
 from vllm.distributed import cleanup_dist_env_and_memory


@@ -44,8 +43,7 @@ def run_normal():
    sampling_params = SamplingParams(temperature=0.8, top_p=0.95)

    # Create an LLM without guided decoding as a baseline.
-    llm = LLM(model="s3://vllm-ci-model-weights/distilgpt2",
-              load_format=LoadFormat.RUNAI_STREAMER,
+    llm = LLM(model="distilbert/distilgpt2",
              enforce_eager=True,
              gpu_memory_utilization=0.3)
    outputs = llm.generate(prompts, sampling_params)
@@ -61,8 +59,7 @@ def run_normal():

 def run_lmfe(sample_regex):
    # Create an LLM with guided decoding enabled.
-    llm = LLM(model="s3://vllm-ci-model-weights/distilgpt2",
-              load_format=LoadFormat.RUNAI_STREAMER,
+    llm = LLM(model="distilbert/distilgpt2",
              enforce_eager=True,
              guided_decoding_backend="lm-format-enforcer",
              gpu_memory_utilization=0.3)