From 7c3f88b2a895f58d653e9284dbb7b1da85c85e73 Mon Sep 17 00:00:00 2001
From: Thomas Parnell <tpa@zurich.ibm.com>
Date: Mon, 30 Mar 2026 14:32:26 +0200
Subject: [PATCH] [Bugfix] Remove false-positive format mismatch warnings in
 FLA ops (#38255)

Signed-off-by: Thomas Parnell <tpa@zurich.ibm.com>
---
 vllm/model_executor/layers/fla/ops/chunk.py  | 8 --------
 vllm/model_executor/layers/fla/ops/cumsum.py | 9 ---------
 2 files changed, 17 deletions(-)

diff --git a/vllm/model_executor/layers/fla/ops/chunk.py b/vllm/model_executor/layers/fla/ops/chunk.py
index 926188595..73cba7f90 100644
--- a/vllm/model_executor/layers/fla/ops/chunk.py
+++ b/vllm/model_executor/layers/fla/ops/chunk.py
@@ -7,7 +7,6 @@
 # the following copyright notice:
 # Copyright (c) 2023-2025, Songlin Yang, Yu Zhang
 # ruff: noqa: E501
-import warnings
 
 import torch
 
@@ -184,13 +183,6 @@ def chunk_gated_delta_rule(
         "ChunkGatedDeltaRuleFunction does not support float32. Please use bfloat16."
     )
     assert len(beta.shape) == 3, "beta must be of shape [B, T, H]."
-    if q.shape[1] < q.shape[2]:
-        warnings.warn(
-            f"Input tensor shape suggests potential format mismatch: seq_len ({q.shape[1]}) < num_heads ({q.shape[2]}). "
-            "This may indicate the inputs were passed in head-first format [B, H, T, ...] "
-            "Please verify your input tensor format matches the expected shape [B, T, H, ...].",
-            stacklevel=2,
-        )
     if cu_seqlens is not None:
         if q.shape[0] != 1:
             raise ValueError(
diff --git a/vllm/model_executor/layers/fla/ops/cumsum.py b/vllm/model_executor/layers/fla/ops/cumsum.py
index 99b417947..13238020c 100644
--- a/vllm/model_executor/layers/fla/ops/cumsum.py
+++ b/vllm/model_executor/layers/fla/ops/cumsum.py
@@ -7,7 +7,6 @@
 # the following copyright notice:
 # Copyright (c) 2023-2025, Songlin Yang, Yu Zhang
 # ruff: noqa: E501
-import warnings
 
 import torch
 
@@ -252,14 +251,6 @@ def chunk_local_cumsum(
     output_dtype: torch.dtype | None = torch.float,
     **kwargs,
 ) -> torch.Tensor:
-    if not head_first and g.shape[1] < g.shape[2]:
-        warnings.warn(
-            f"Input tensor shape suggests potential format mismatch: seq_len ({g.shape[1]}) < num_heads ({g.shape[2]}). "
-            "This may indicate the inputs were passed in head-first format [B, H, T, ...] "
-            "when head_first=False was specified. "
-            "Please verify your input tensor format matches the expected shape [B, T, H, ...].",
-            stacklevel=2,
-        )
     if cu_seqlens is not None:
         assert g.shape[0] == 1, (
             "Only batch size 1 is supported when cu_seqlens are provided"