[Minor] Add more detailed explanation on quantization argument (#2145)

2023-12-17 01:56:16 -08:00
parent 3a765bd5e1
commit 30fb0956df
2 changed files with 10 additions and 4 deletions
--- a/vllm/engine/arg_utils.py
+++ b/vllm/engine/arg_utils.py
@@ -183,7 +183,12 @@ class EngineArgs:
                            type=str,
                            choices=['awq', 'gptq', 'squeezellm', None],
                            default=None,
-                            help='Method used to quantize the weights')
+                            help='Method used to quantize the weights. If '
+                            'None, we first check the `quantization_config` '
+                            'attribute in the model config file. If that is '
+                            'None, we assume the model weights are not '
+                            'quantized and use `dtype` to determine the data '
+                            'type of the weights.')
        parser.add_argument('--enforce-eager',
                            action='store_true',
                            help='Always use eager-mode PyTorch. If False, '