[Misc] Directly use compressed-tensors for checkpoint definitions (#8909)

Co-authored-by: DarkLight1337 <tlleungac@connect.ust.hk>
2024-10-15 18:40:25 -04:00
parent 5d264f4ab8
commit 22f8a69549
10 changed files with 15 additions and 115 deletions
--- a/tests/quantization/test_compressed_tensors.py
+++ b/tests/quantization/test_compressed_tensors.py
@@ -6,13 +6,12 @@ from typing import Optional

 import pytest
 import torch
+from compressed_tensors.quantization import QuantizationType

 from vllm.model_executor.layers.quantization.compressed_tensors.compressed_tensors import (  # noqa: E501
    CompressedTensorsLinearMethod, CompressedTensorsW4A16Sparse24,
    CompressedTensorsW8A8Fp8, CompressedTensorsW8A8Int8,
    CompressedTensorsW8A16Fp8, CompressedTensorsWNA16)
-from vllm.model_executor.layers.quantization.compressed_tensors.utils import (
-    QuantizationType)


@pytest.mark.parametrize(