[CI]: reduce HTTP calls inside entrypoints openai tests (#23646)

Signed-off-by: AzizCode92 <azizbenothman76@gmail.com> Signed-off-by: Aziz <azizbenothman76@gmail.com> Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
2025-09-02 12:49:32 +02:00
parent 2f0bab3f26
commit ce30dca5c4
8 changed files with 30 additions and 74 deletions
--- a/tests/entrypoints/openai/test_lora_adapters.py
+++ b/tests/entrypoints/openai/test_lora_adapters.py
@@ -9,8 +9,6 @@ from contextlib import suppress
 import openai  # use the official client for correctness check
 import pytest
 import pytest_asyncio
-# downloading lora to test lora requests
-from huggingface_hub import snapshot_download

 from ...utils import RemoteOpenAIServer

@@ -18,7 +16,6 @@ from ...utils import RemoteOpenAIServer
 MODEL_NAME = "HuggingFaceH4/zephyr-7b-beta"
 # technically this needs Mistral-7B-v0.1 as base, but we're not testing
 # generation quality here
-LORA_NAME = "typeof/zephyr-7b-beta-lora"

 BADREQUEST_CASES = [
    (
@@ -48,11 +45,6 @@ BADREQUEST_CASES = [
 ]


-@pytest.fixture(scope="module")
-def zephyr_lora_files():
-    return snapshot_download(repo_id=LORA_NAME)
-
-
@pytest.fixture(scope="module")
 def monkeypatch_module():
    from _pytest.monkeypatch import MonkeyPatch