[Misc] Clean up the BitsAndBytes arguments (#15140)
Signed-off-by: Jee Jee Li <pandaleefree@gmail.com>
This commit is contained in:
@@ -25,7 +25,7 @@ import torch
|
||||
# unsloth/tinyllama-bnb-4bit is a pre-quantized checkpoint.
|
||||
model_id = "unsloth/tinyllama-bnb-4bit"
|
||||
llm = LLM(model=model_id, dtype=torch.bfloat16, trust_remote_code=True, \
|
||||
quantization="bitsandbytes", load_format="bitsandbytes")
|
||||
quantization="bitsandbytes")
|
||||
```
|
||||
|
||||
## Inflight quantization: load as 4bit quantization
|
||||
@@ -35,7 +35,7 @@ from vllm import LLM
|
||||
import torch
|
||||
model_id = "huggyllama/llama-7b"
|
||||
llm = LLM(model=model_id, dtype=torch.bfloat16, trust_remote_code=True, \
|
||||
quantization="bitsandbytes", load_format="bitsandbytes")
|
||||
quantization="bitsandbytes")
|
||||
```
|
||||
|
||||
## OpenAI Compatible Server
|
||||
@@ -43,5 +43,5 @@ quantization="bitsandbytes", load_format="bitsandbytes")
|
||||
Append the following to your 4bit model arguments:
|
||||
|
||||
```console
|
||||
--quantization bitsandbytes --load-format bitsandbytes
|
||||
--quantization bitsandbytes
|
||||
```
|
||||
|
||||
Reference in New Issue
Block a user