Signed-off-by: arpitkh101 <arpit5khandelwal@gmail.com>
Co-authored-by: Luka Govedič <ProExpertProg@users.noreply.github.com>
(cherry picked from commit d7284a2604)
This commit is contained in:
committed by
Kevin H. Luu
parent
a1d627e40f
commit
4fd9d6a85c
@@ -373,7 +373,7 @@ def test_attention_quant_pattern(
|
||||
|
||||
# Run model with attn fusion enabled
|
||||
vllm_config.compilation_config.pass_config = PassConfig(
|
||||
enable_attn_fusion=True, enable_noop=True
|
||||
fuse_attn_quant=True, eliminate_noops=True
|
||||
)
|
||||
with (
|
||||
set_current_vllm_config(vllm_config),
|
||||
|
||||
Reference in New Issue
Block a user