[Misc] Directly use compressed-tensors for checkpoint definitions (#8909)

Co-authored-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
Michael Goin
2024-10-15 18:40:25 -04:00
committed by GitHub
parent 5d264f4ab8
commit 22f8a69549
10 changed files with 15 additions and 115 deletions

View File

@@ -6,13 +6,12 @@ from typing import Optional
import pytest
import torch
from compressed_tensors.quantization import QuantizationType
from vllm.model_executor.layers.quantization.compressed_tensors.compressed_tensors import ( # noqa: E501
CompressedTensorsLinearMethod, CompressedTensorsW4A16Sparse24,
CompressedTensorsW8A8Fp8, CompressedTensorsW8A8Int8,
CompressedTensorsW8A16Fp8, CompressedTensorsWNA16)
from vllm.model_executor.layers.quantization.compressed_tensors.utils import (
QuantizationType)
@pytest.mark.parametrize(