diff --git a/vllm/lora/ops/triton_ops/utils.py b/vllm/lora/ops/triton_ops/utils.py index 39c175f30..c7ac5914b 100644 --- a/vllm/lora/ops/triton_ops/utils.py +++ b/vllm/lora/ops/triton_ops/utils.py @@ -251,7 +251,7 @@ def get_lora_op_configs( else: default = { "block_m": 64, - "block_n": max(64, next_power_of_2(128 // num_slices)), + "block_n": 64 if num_slices > 1 else 128, "block_k": 16, "num_warps": 4, "num_ctas": 1,