[Bugfix gpt-oss] Fix float32 convert for flashinfer sink support (#23016)
Signed-off-by: mgoin <mgoin64@gmail.com>
This commit is contained in:
@@ -642,9 +642,6 @@ class FlashInferImpl(AttentionImpl):
|
||||
f"heads in the layer. Expected {num_heads}, but got "
|
||||
f"{sinks.shape[0]}."
|
||||
)
|
||||
# Cast sinks to float32 if needed (FlashInfer requirement)
|
||||
if sinks.dtype != torch.float32:
|
||||
sinks = sinks.to(torch.float32)
|
||||
self.sinks = sinks
|
||||
|
||||
def forward(
|
||||
|
||||
Reference in New Issue
Block a user