Remove dead bitsandbytes CxB code from 8-bit inference path (#34633)

Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-17 04:49:14 -05:00
parent 1d65283e95
commit d44a5b6c47
1 changed files with 0 additions and 10 deletions
--- a/vllm/model_executor/layers/quantization/bitsandbytes.py
+++ b/vllm/model_executor/layers/quantization/bitsandbytes.py
@@ -336,16 +336,6 @@ class BitsAndBytesLinearMethod(LinearMethodBase):

            current_index += output_size

-            # only update the matmul_states if it is not profile_run
-            if (
-                generation > 0
-                and not self.quant_config.llm_int8_has_fp16_weight
-                and matmul_states[i].CB is not None
-                and matmul_states[i].CxB is not None
-            ):
-                del matmul_states[i].CB
-                qweight[offsets[i] : offsets[i + 1]] = matmul_states[i].CxB
-
        out = out.to(original_type)

        if reshape_after_matmul: