[Bugfix] Fix GPT-OSS AR+NORM fusion (#28841)
Signed-off-by: elvischenv <219235043+elvischenv@users.noreply.github.com>
This commit is contained in:
@@ -111,6 +111,17 @@ if current_platform.is_cuda():
|
||||
async_tp=96, # MLP is MoE, half the fusions of dense
|
||||
),
|
||||
),
|
||||
ModelBackendTestCase(
|
||||
model_name="openai/gpt-oss-20b",
|
||||
model_kwargs=dict(max_model_len=1024, kv_cache_dtype="fp8"),
|
||||
backend=AttentionBackendEnum.FLASHINFER,
|
||||
matches=Matches(
|
||||
attention_fusion=0,
|
||||
allreduce_fusion=49,
|
||||
sequence_parallel=49,
|
||||
async_tp=48,
|
||||
),
|
||||
),
|
||||
]
|
||||
|
||||
elif current_platform.is_rocm():
|
||||
|
||||
Reference in New Issue
Block a user