[Core] Support loading GGUF model (#5191)
Co-authored-by: Michael Goin <michael@neuralmagic.com>
This commit is contained in:
@@ -7,11 +7,12 @@ from typing import Tuple
|
||||
import pytest
|
||||
import torch
|
||||
|
||||
from vllm.model_executor.layers.linear import UnquantizedLinearMethod
|
||||
from vllm.model_executor.layers.quantization.gptq import GPTQLinearMethod
|
||||
from vllm.model_executor.layers.quantization.gptq_marlin import (
|
||||
GPTQMarlinLinearMethod)
|
||||
from vllm.model_executor.layers.quantization.marlin import MarlinLinearMethod
|
||||
from vllm.model_executor.layers.vocab_parallel_embedding import (
|
||||
UnquantizedEmbeddingMethod)
|
||||
|
||||
PROMPT = "On the surface of Mars, we found"
|
||||
|
||||
@@ -37,7 +38,8 @@ def test_lm_head(
|
||||
lm_head_layer.linear_method,
|
||||
(GPTQLinearMethod, GPTQMarlinLinearMethod, MarlinLinearMethod))
|
||||
else:
|
||||
assert isinstance(lm_head_layer.linear_method, UnquantizedLinearMethod)
|
||||
assert isinstance(lm_head_layer.linear_method,
|
||||
UnquantizedEmbeddingMethod)
|
||||
|
||||
print(
|
||||
vllm_model.generate_greedy(prompts=["Hello my name is"],
|
||||
|
||||
Reference in New Issue
Block a user