Force TRTLLM attention for gpt-oss on SM100 (#22678)

Signed-off-by: mgoin <mgoin64@gmail.com>
This commit is contained in:
Michael Goin
2025-08-13 00:22:16 -04:00
committed by GitHub
parent b1361c7273
commit c6b928798e
4 changed files with 20 additions and 9 deletions

View File

@@ -285,6 +285,7 @@ class PerLayerParameters:
window_left: int
logits_soft_cap: Optional[float]
sm_scale: float
has_sinks: bool = False
def get_per_layer_parameters(
@@ -307,9 +308,11 @@ def get_per_layer_parameters(
window_left = window_size[0] if window_size is not None else -1
logits_soft_cap = getattr(impl, "logits_soft_cap", None)
sm_scale = impl.scale
has_sinks = getattr(impl, "sinks", None) is not None
per_layer_params[key] = PerLayerParameters(window_left,
logits_soft_cap, sm_scale)
logits_soft_cap, sm_scale,
has_sinks)
return per_layer_params