Signed-off-by: Huamin Li <3ericli@gmail.com>
This commit is contained in:
@@ -111,17 +111,6 @@ if current_platform.is_cuda():
|
||||
async_tp=96, # MLP is MoE, half the fusions of dense
|
||||
),
|
||||
),
|
||||
ModelBackendTestCase(
|
||||
model_name="openai/gpt-oss-20b",
|
||||
model_kwargs=dict(max_model_len=1024, kv_cache_dtype="fp8"),
|
||||
backend=AttentionBackendEnum.FLASHINFER,
|
||||
matches=Matches(
|
||||
attention_fusion=0,
|
||||
allreduce_fusion=49,
|
||||
sequence_parallel=49,
|
||||
async_tp=48,
|
||||
),
|
||||
),
|
||||
]
|
||||
|
||||
elif current_platform.is_rocm():
|
||||
|
||||
Reference in New Issue
Block a user