[Model] [Quantization] Support deepseek_v3 w8a8 fp8 block-wise quantization (#11523)
Signed-off-by: mgoin <michael@neuralmagic.com> Signed-off-by: simon-mo <simon.mo@hey.com> Signed-off-by: simon-mo <xmo@berkeley.edu> Co-authored-by: simon-mo <simon.mo@hey.com> Co-authored-by: simon-mo <xmo@berkeley.edu> Co-authored-by: HandH1998 <1335248067@qq.com>
This commit is contained in:
@@ -161,7 +161,7 @@ class ModelConfig:
|
||||
override default pooling config for the pooling model.
|
||||
logits_processor_pattern: Optional regex pattern specifying valid
|
||||
logits processor qualified names that can be passed with the
|
||||
`logits_processors` extra completion argument. Defaults to None,
|
||||
`logits_processors` extra completion argument. Defaults to None,
|
||||
which allows no processors.
|
||||
generation_config: Configuration parameter file for generation.
|
||||
"""
|
||||
@@ -364,7 +364,7 @@ class ModelConfig:
|
||||
def maybe_pull_model_tokenizer_for_s3(self, model: str,
|
||||
tokenizer: str) -> None:
|
||||
"""
|
||||
Pull the model config or tokenizer to a temporary
|
||||
Pull the model config or tokenizer to a temporary
|
||||
directory in case of S3.
|
||||
|
||||
Args:
|
||||
@@ -866,14 +866,14 @@ class ModelConfig:
|
||||
|
||||
def get_diff_sampling_param(self) -> Dict[str, Any]:
|
||||
"""
|
||||
This method returns a dictionary containing the parameters
|
||||
that differ from the default sampling parameters, but only
|
||||
if `generation_config` is set. If `generation_config` is not
|
||||
This method returns a dictionary containing the parameters
|
||||
that differ from the default sampling parameters, but only
|
||||
if `generation_config` is set. If `generation_config` is not
|
||||
set, an empty dictionary is returned.
|
||||
|
||||
Returns:
|
||||
Dict[str, Any]: A dictionary with the differing sampling
|
||||
parameters if `generation_config` is set, otherwise an
|
||||
Dict[str, Any]: A dictionary with the differing sampling
|
||||
parameters if `generation_config` is set, otherwise an
|
||||
empty dictionary.
|
||||
"""
|
||||
if self.generation_config is None:
|
||||
|
||||
Reference in New Issue
Block a user