[CORE] [QUANT] Support for GPTQModel's dynamic quantization per module override/control (#7086)
This commit is contained in:
committed by
GitHub
parent
2c2b560f48
commit
36a08630e8
@@ -108,9 +108,9 @@ class LogitsProcessor(nn.Module):
|
||||
embedding_bias: Optional[torch.Tensor],
|
||||
) -> Optional[torch.Tensor]:
|
||||
# Get the logits for the next tokens.
|
||||
logits = lm_head.linear_method.apply(lm_head,
|
||||
hidden_states,
|
||||
bias=embedding_bias)
|
||||
logits = lm_head.quant_method.apply(lm_head,
|
||||
hidden_states,
|
||||
bias=embedding_bias)
|
||||
|
||||
# Gather logits for TP
|
||||
logits = self._gather_logits(logits)
|
||||
|
||||
Reference in New Issue
Block a user