[Attention][V0 Deprecation] Deprecate accept output buffer (#39125)
Signed-off-by: Lucas Wilkinson <lwilkins@redhat.com>
This commit is contained in:
@@ -216,12 +216,14 @@ def test_splitting_ops_dynamic():
|
||||
compilation_config=CompilationConfig(
|
||||
mode=CompilationMode.VLLM_COMPILE,
|
||||
use_inductor_graph_partition=True,
|
||||
splitting_ops=["vllm::unified_attention"],
|
||||
splitting_ops=["vllm::unified_attention_with_output"],
|
||||
)
|
||||
)
|
||||
# with inductor partition we use splitting_ops directly for
|
||||
# partition rules
|
||||
assert config.compilation_config.splitting_ops == ["vllm::unified_attention"]
|
||||
assert config.compilation_config.splitting_ops == [
|
||||
"vllm::unified_attention_with_output"
|
||||
]
|
||||
|
||||
# When attn_fusion pass enabled.
|
||||
config = VllmConfig(
|
||||
@@ -281,7 +283,7 @@ def test_moe_splitting_ops_deepep_ht_inductor_partition():
|
||||
mode=CompilationMode.VLLM_COMPILE,
|
||||
use_inductor_graph_partition=True,
|
||||
splitting_ops=[
|
||||
"vllm::unified_attention",
|
||||
"vllm::unified_attention_with_output",
|
||||
"vllm::moe_forward",
|
||||
"vllm::moe_forward_shared",
|
||||
],
|
||||
@@ -289,7 +291,7 @@ def test_moe_splitting_ops_deepep_ht_inductor_partition():
|
||||
)
|
||||
splitting_ops = config.compilation_config.splitting_ops
|
||||
assert splitting_ops == [
|
||||
"vllm::unified_attention",
|
||||
"vllm::unified_attention_with_output",
|
||||
"vllm::moe_forward",
|
||||
"vllm::moe_forward_shared",
|
||||
]
|
||||
|
||||
Reference in New Issue
Block a user