Remove dead bitsandbytes CxB code from 8-bit inference path (#34633)
Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -336,16 +336,6 @@ class BitsAndBytesLinearMethod(LinearMethodBase):
|
||||
|
||||
current_index += output_size
|
||||
|
||||
# only update the matmul_states if it is not profile_run
|
||||
if (
|
||||
generation > 0
|
||||
and not self.quant_config.llm_int8_has_fp16_weight
|
||||
and matmul_states[i].CB is not None
|
||||
and matmul_states[i].CxB is not None
|
||||
):
|
||||
del matmul_states[i].CB
|
||||
qweight[offsets[i] : offsets[i + 1]] = matmul_states[i].CxB
|
||||
|
||||
out = out.to(original_type)
|
||||
|
||||
if reshape_after_matmul:
|
||||
|
||||
Reference in New Issue
Block a user