[Core][5/N] Fully working chunked prefill e2e (#3884)

This commit is contained in:
SangBin Cho
2024-04-11 09:56:48 +09:00
committed by GitHub
parent 63e7176f26
commit 67b4221a61
26 changed files with 927 additions and 315 deletions

View File

@@ -141,7 +141,7 @@ def server(zephyr_lora_files):
"--max-cpu-loras",
"2",
"--max-num-seqs",
"128"
"128",
])
ray.get(server_runner.ready.remote())
yield server_runner