[Encoder Decoder] Add flash_attn kernel support for encoder-decoder models (#9559)

This commit is contained in:
sroy745
2024-11-01 23:22:49 -07:00
committed by GitHub
parent d522034c85
commit a78dd3303e
11 changed files with 715 additions and 316 deletions

View File

@@ -85,7 +85,7 @@ def run_test(
@pytest.mark.parametrize("model", MODELS)
@pytest.mark.parametrize("dtype", ["float"])
@pytest.mark.parametrize("dtype", ["float", "bfloat16"])
@pytest.mark.parametrize("max_tokens", [64])
@pytest.mark.parametrize("num_logprobs", [5])
def test_models(hf_runner, vllm_runner, model, dtype, max_tokens,