[Core][5/N] Fully working chunked prefill e2e (#3884)

2024-04-11 09:56:48 +09:00
parent 63e7176f26
commit 67b4221a61
26 changed files with 927 additions and 315 deletions
--- a/tests/distributed/test_basic_distributed_correctness.py
+++ b/tests/distributed/test_basic_distributed_correctness.py
@@ -33,11 +33,16 @@ def test_models(
    dtype: str,
    max_tokens: int,
 ) -> None:
+
    hf_model = hf_runner(model, dtype=dtype)
    hf_outputs = hf_model.generate_greedy(example_prompts, max_tokens)
    del hf_model

-    vllm_model = vllm_runner(model, dtype=dtype, tensor_parallel_size=2)
+    vllm_model = vllm_runner(
+        model,
+        dtype=dtype,
+        tensor_parallel_size=2,
+    )
    vllm_outputs = vllm_model.generate_greedy(example_prompts, max_tokens)
    del vllm_model