diff --git a/vllm/model_executor/layers/quantization/quark/schemes/quark_ocp_mx.py b/vllm/model_executor/layers/quantization/quark/schemes/quark_ocp_mx.py index 9c0ce9723..c5f50122e 100644 --- a/vllm/model_executor/layers/quantization/quark/schemes/quark_ocp_mx.py +++ b/vllm/model_executor/layers/quantization/quark/schemes/quark_ocp_mx.py @@ -118,7 +118,12 @@ try: ) gemm_a4w4( - x_q, weight, x_s, weight_scale.view(x_s.dtype), y, bpreshuffle=True + x_q, + weight.view(x_q.dtype), + x_s, + weight_scale.view(x_s.dtype), + y, + bpreshuffle=True, ) return y[:M] else: