[Misc] unify variable for LLM instance (#20996)
Signed-off-by: Andy Xie <andy.xning@gmail.com>
This commit is contained in:
@@ -86,8 +86,9 @@ Load and run the model in `vllm`:
|
||||
|
||||
```python
|
||||
from vllm import LLM
|
||||
model = LLM("./Meta-Llama-3-8B-Instruct-FP8-Dynamic")
|
||||
result = model.generate("Hello my name is")
|
||||
|
||||
llm = LLM("./Meta-Llama-3-8B-Instruct-FP8-Dynamic")
|
||||
result = llm.generate("Hello my name is")
|
||||
print(result[0].outputs[0].text)
|
||||
```
|
||||
|
||||
@@ -125,9 +126,10 @@ In this mode, all Linear modules (except for the final `lm_head`) have their wei
|
||||
|
||||
```python
|
||||
from vllm import LLM
|
||||
model = LLM("facebook/opt-125m", quantization="fp8")
|
||||
|
||||
llm = LLM("facebook/opt-125m", quantization="fp8")
|
||||
# INFO 06-10 17:55:42 model_runner.py:157] Loading model weights took 0.1550 GB
|
||||
result = model.generate("Hello, my name is")
|
||||
result = llm.generate("Hello, my name is")
|
||||
print(result[0].outputs[0].text)
|
||||
```
|
||||
|
||||
|
||||
Reference in New Issue
Block a user