[Doc] add load_format items in docs (#14804)
Signed-off-by: wwl2755 <wangwenlong2755@gmail.com>
This commit is contained in:
@@ -1294,6 +1294,12 @@ class LoadConfig:
|
|||||||
"tensorizer" will use CoreWeave's tensorizer library for
|
"tensorizer" will use CoreWeave's tensorizer library for
|
||||||
fast weight loading.
|
fast weight loading.
|
||||||
"bitsandbytes" will load nf4 type weights.
|
"bitsandbytes" will load nf4 type weights.
|
||||||
|
"sharded_state" will load weights from pre-sharded checkpoint files,
|
||||||
|
supporting efficient loading of tensor-parallel models.
|
||||||
|
"gguf" will load weights from GGUF format files.
|
||||||
|
"mistral" will load weights from consolidated safetensors files used
|
||||||
|
by Mistral models.
|
||||||
|
"runai_streamer" will load weights from RunAI streamer format files.
|
||||||
model_loader_extra_config: The extra config for the model loader.
|
model_loader_extra_config: The extra config for the model loader.
|
||||||
ignore_patterns: The list of patterns to ignore when loading the model.
|
ignore_patterns: The list of patterns to ignore when loading the model.
|
||||||
Default to "original/**/*" to avoid repeated loading of llama's
|
Default to "original/**/*" to avoid repeated loading of llama's
|
||||||
|
|||||||
@@ -339,9 +339,15 @@ class EngineArgs:
|
|||||||
'CoreWeave. See the Tensorize vLLM Model script in the Examples '
|
'CoreWeave. See the Tensorize vLLM Model script in the Examples '
|
||||||
'section for more information.\n'
|
'section for more information.\n'
|
||||||
'* "runai_streamer" will load the Safetensors weights using Run:ai'
|
'* "runai_streamer" will load the Safetensors weights using Run:ai'
|
||||||
'Model Streamer \n'
|
'Model Streamer.\n'
|
||||||
'* "bitsandbytes" will load the weights using bitsandbytes '
|
'* "bitsandbytes" will load the weights using bitsandbytes '
|
||||||
'quantization.\n')
|
'quantization.\n'
|
||||||
|
'* "sharded_state" will load weights from pre-sharded checkpoint '
|
||||||
|
'files, supporting efficient loading of tensor-parallel models\n'
|
||||||
|
'* "gguf" will load weights from GGUF format files (details '
|
||||||
|
'specified in https://github.com/ggml-org/ggml/blob/master/docs/gguf.md).\n'
|
||||||
|
'* "mistral" will load weights from consolidated safetensors files '
|
||||||
|
'used by Mistral models.\n')
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
'--config-format',
|
'--config-format',
|
||||||
default=EngineArgs.config_format,
|
default=EngineArgs.config_format,
|
||||||
|
|||||||
Reference in New Issue
Block a user