[Attention][V0 Deprecation] Deprecate accept output buffer (#39125)

Signed-off-by: Lucas Wilkinson <lwilkins@redhat.com>
This commit is contained in:
Lucas Wilkinson
2026-04-07 17:14:58 -04:00
committed by GitHub
parent 08bfedc152
commit 70406eb1dc
22 changed files with 94 additions and 227 deletions

View File

@@ -216,12 +216,14 @@ def test_splitting_ops_dynamic():
compilation_config=CompilationConfig(
mode=CompilationMode.VLLM_COMPILE,
use_inductor_graph_partition=True,
splitting_ops=["vllm::unified_attention"],
splitting_ops=["vllm::unified_attention_with_output"],
)
)
# with inductor partition we use splitting_ops directly for
# partition rules
assert config.compilation_config.splitting_ops == ["vllm::unified_attention"]
assert config.compilation_config.splitting_ops == [
"vllm::unified_attention_with_output"
]
# When attn_fusion pass enabled.
config = VllmConfig(
@@ -281,7 +283,7 @@ def test_moe_splitting_ops_deepep_ht_inductor_partition():
mode=CompilationMode.VLLM_COMPILE,
use_inductor_graph_partition=True,
splitting_ops=[
"vllm::unified_attention",
"vllm::unified_attention_with_output",
"vllm::moe_forward",
"vllm::moe_forward_shared",
],
@@ -289,7 +291,7 @@ def test_moe_splitting_ops_deepep_ht_inductor_partition():
)
splitting_ops = config.compilation_config.splitting_ops
assert splitting_ops == [
"vllm::unified_attention",
"vllm::unified_attention_with_output",
"vllm::moe_forward",
"vllm::moe_forward_shared",
]