Convert formatting to use ruff instead of yapf + isort (#26247)
Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
@@ -76,7 +76,9 @@ def test_kv_sharing_fast_prefill(
|
||||
# managing buffers for cudagraph
|
||||
cudagraph_copy_inputs=True,
|
||||
level=CompilationLevel.PIECEWISE
|
||||
if not enforce_eager else CompilationLevel.NO_COMPILATION)
|
||||
if not enforce_eager
|
||||
else CompilationLevel.NO_COMPILATION,
|
||||
)
|
||||
|
||||
with monkeypatch.context() as m:
|
||||
m.setenv("VLLM_USE_V1", "1")
|
||||
@@ -94,21 +96,21 @@ def test_kv_sharing_fast_prefill(
|
||||
|
||||
cleanup(llm, compilation_config)
|
||||
|
||||
llm = LLM(model="google/gemma-3n-E2B-it",
|
||||
enforce_eager=enforce_eager,
|
||||
compilation_config=compilation_config,
|
||||
seed=SEED,
|
||||
kv_sharing_fast_prefill=True)
|
||||
llm = LLM(
|
||||
model="google/gemma-3n-E2B-it",
|
||||
enforce_eager=enforce_eager,
|
||||
compilation_config=compilation_config,
|
||||
seed=SEED,
|
||||
kv_sharing_fast_prefill=True,
|
||||
)
|
||||
optimized_responses = llm.generate(test_prompts, sampling_params)
|
||||
|
||||
cleanup(llm, compilation_config)
|
||||
|
||||
misses = 0
|
||||
|
||||
for ref_response, optimized_response in zip(ref_responses,
|
||||
optimized_responses):
|
||||
if ref_response.outputs[0].text != optimized_response.outputs[
|
||||
0].text:
|
||||
for ref_response, optimized_response in zip(ref_responses, optimized_responses):
|
||||
if ref_response.outputs[0].text != optimized_response.outputs[0].text:
|
||||
misses += 1
|
||||
|
||||
assert misses == 0
|
||||
|
||||
Reference in New Issue
Block a user