[Bugfix] Fix missing scale passing for encoder Triton Attention implementation (#32149)

Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn>
This commit is contained in:
Isotr0py
2026-01-12 19:13:41 +08:00
committed by GitHub
parent a5f89ae296
commit 9dbe1fe960
4 changed files with 13 additions and 27 deletions

View File

@@ -4,10 +4,7 @@
from argparse import Namespace
from vllm import LLM, EngineArgs
from vllm.config import AttentionConfig
from vllm.platforms import current_platform
from vllm.utils.argparse_utils import FlexibleArgumentParser
from vllm.v1.attention.backends.registry import AttentionBackendEnum
def parse_args():
@@ -23,11 +20,6 @@ def parse_args():
def main(args: Namespace):
if current_platform.is_rocm():
args.attention_config = AttentionConfig(
backend=AttentionBackendEnum.FLEX_ATTENTION
)
# Sample prompts.
prompts = [
"Hello, my name is",