[Model] Add LongCat-Flash (#23991)

Signed-off-by: yangxurui <yangxurui@meituan.com>
Co-authored-by: yangxurui <yangxurui@meituan.com>
This commit is contained in:
XuruiYang
2025-09-25 12:53:40 +08:00
committed by GitHub
parent 90b139cfff
commit 845adb3ec6
31 changed files with 1357 additions and 66 deletions

View File

@@ -138,7 +138,7 @@ def test_flashinfer_per_tensor_moe_fp8_no_graph(
td = TestData.make_moe_tensors_8bit(m, k, n, e, reorder=True)
score = torch.randn((m, e), device="cuda", dtype=torch.bfloat16)
topk_weights, topk_ids = FusedMoE.select_experts(
topk_weights, topk_ids, _ = FusedMoE.select_experts(
hidden_states=td.hidden_states,
router_logits=score,
use_grouped_topk=False,
@@ -206,7 +206,7 @@ def test_flashinfer_cutlass_moe_fp8_no_graph(
td = TestData.make_moe_tensors_8bit(m, k, n, e, reorder=False)
score = torch.randn((m, e), device="cuda", dtype=torch.bfloat16)
topk_weights, topk_ids = FusedMoE.select_experts(
topk_weights, topk_ids, _ = FusedMoE.select_experts(
hidden_states=td.hidden_states,
router_logits=score,
use_grouped_topk=False,