Add output streaming support to multi-step + async while ensuring RequestOutput obj reuse (#8335)
This commit is contained in:
committed by
GitHub
parent
5f7bb58427
commit
1a2aef3e59
@@ -19,7 +19,11 @@ FILTER = "exact_match,strict-match"
|
||||
RTOL = 0.03
|
||||
EXPECTED_VALUE = 0.58
|
||||
DEFAULT_ARGS = ["--max-model-len", "4096", "--disable-log-requests"]
|
||||
MORE_ARGS_LIST = [["--enable-chunked-prefill"], ["--num-scheduler-steps", "8"]]
|
||||
MORE_ARGS_LIST = [
|
||||
["--enable-chunked-prefill"], # Chunked
|
||||
["--num-scheduler-steps", "8"], # MS
|
||||
["--num-scheduler-steps", "8", "--multi-step-stream-outputs"] # MS+Stream
|
||||
]
|
||||
|
||||
|
||||
@pytest.mark.parametrize("more_args", MORE_ARGS_LIST)
|
||||
|
||||
Reference in New Issue
Block a user