[Bugfix][Mamba] Fix Multistep on Mamba-like models (#10705)
Signed-off-by: mzusman <mor.zusmann@gmail.com>
This commit is contained in:
@@ -283,3 +283,39 @@ def test_state_cleanup(
|
||||
except ValueError:
|
||||
pytest.fail("Mamba inner state wasn't cleaned up between states, "
|
||||
"could be related to finished_requests_ids")
|
||||
|
||||
|
||||
@pytest.mark.parametrize("model", MODELS)
|
||||
@pytest.mark.parametrize("dtype", ["float"])
|
||||
def test_multistep(
|
||||
vllm_runner,
|
||||
model: str,
|
||||
dtype: str,
|
||||
example_prompts,
|
||||
) -> None:
|
||||
with vllm_runner(model, num_scheduler_steps=8,
|
||||
max_num_seqs=2) as vllm_model:
|
||||
vllm_model.generate_greedy([example_prompts[0]] * 10, 1)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("model", MODELS)
|
||||
@pytest.mark.parametrize("dtype", ["float"])
|
||||
@pytest.mark.parametrize("max_tokens", [64])
|
||||
def test_multistep_correctness(vllm_runner, model: str, dtype: str,
|
||||
max_tokens: int, example_prompts) -> None:
|
||||
with vllm_runner(model, num_scheduler_steps=8,
|
||||
max_num_seqs=2) as vllm_model:
|
||||
vllm_outputs_multistep = vllm_model.generate_greedy(
|
||||
example_prompts, max_tokens)
|
||||
|
||||
with vllm_runner(model, num_scheduler_steps=1,
|
||||
max_num_seqs=2) as vllm_model:
|
||||
vllm_outputs_single_step = vllm_model.generate_greedy(
|
||||
example_prompts, max_tokens)
|
||||
|
||||
check_outputs_equal(
|
||||
outputs_0_lst=vllm_outputs_multistep,
|
||||
outputs_1_lst=vllm_outputs_single_step,
|
||||
name_0="vllm_outputs_multistep",
|
||||
name_1="vllm_outputs_single_step",
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user