[LoRA] Cleanup LoRA unused code (#29611)
Signed-off-by: Jee Jee Li <pandaleefree@gmail.com> Co-authored-by: Cyrus Leung <tlleungac@connect.ust.hk>
This commit is contained in:
@@ -8,7 +8,7 @@ import pytest_asyncio
|
||||
from ...utils import RemoteOpenAIServer
|
||||
|
||||
# any model with a chat template should work here
|
||||
MODEL_NAME = "HuggingFaceH4/zephyr-7b-beta"
|
||||
MODEL_NAME = "Qwen/Qwen3-0.6B"
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
@@ -110,8 +110,9 @@ async def test_single_completion(client: openai.AsyncOpenAI):
|
||||
choice = completion.choices[0]
|
||||
assert len(choice.text) >= 5
|
||||
assert choice.finish_reason == "length"
|
||||
# When using Qwen3-0.6B, prompt tokens=[9707, 11, 847, 829, 374]
|
||||
assert completion.usage == openai.types.CompletionUsage(
|
||||
completion_tokens=5, prompt_tokens=6, total_tokens=11
|
||||
completion_tokens=5, prompt_tokens=5, total_tokens=10
|
||||
)
|
||||
|
||||
# test using token IDs
|
||||
|
||||
Reference in New Issue
Block a user