Add output streaming support to multi-step + async while ensuring RequestOutput obj reuse (#8335)

This commit is contained in:
Alexander Matveev
2024-09-23 18:38:04 -04:00
committed by GitHub
parent 5f7bb58427
commit 1a2aef3e59
7 changed files with 142 additions and 42 deletions

View File

@@ -19,7 +19,11 @@ FILTER = "exact_match,strict-match"
RTOL = 0.03
EXPECTED_VALUE = 0.58
DEFAULT_ARGS = ["--max-model-len", "4096", "--disable-log-requests"]
MORE_ARGS_LIST = [["--enable-chunked-prefill"], ["--num-scheduler-steps", "8"]]
MORE_ARGS_LIST = [
["--enable-chunked-prefill"], # Chunked
["--num-scheduler-steps", "8"], # MS
["--num-scheduler-steps", "8", "--multi-step-stream-outputs"] # MS+Stream
]
@pytest.mark.parametrize("more_args", MORE_ARGS_LIST)