[Bugfix] Fix support for dimension like integers and ScalarType (#9299)

This commit is contained in:
bnellnm
2024-10-17 15:08:34 -04:00
committed by GitHub
parent 0f41fbe5a3
commit eca2c5f7c0
22 changed files with 427 additions and 677 deletions

View File

@@ -89,7 +89,7 @@ torch::Tensor gptq_marlin_24_gemm(torch::Tensor& a, torch::Tensor& b_q_weight,
torch::Tensor& b_meta,
torch::Tensor& b_scales,
torch::Tensor& workspace,
vllm::ScalarTypeTorchPtr const& b_q_type,
vllm::ScalarTypeId const b_q_type_id,
int64_t size_m, int64_t size_n,
int64_t size_k) {
TORCH_CHECK_NOT_IMPLEMENTED(
@@ -1029,13 +1029,14 @@ torch::Tensor gptq_marlin_24_gemm(torch::Tensor& a, torch::Tensor& b_q_weight,
torch::Tensor& b_meta,
torch::Tensor& b_scales,
torch::Tensor& workspace,
vllm::ScalarTypeTorchPtr const& b_q_type,
vllm::ScalarTypeId const b_q_type_id,
int64_t size_m, int64_t size_n,
int64_t size_k) {
vllm::ScalarType const b_q_type = vllm::ScalarType::from_id(b_q_type_id);
// Verify num_bits
TORCH_CHECK(*b_q_type == vllm::kU4B8 || *b_q_type == vllm::kU8B128,
"num_bits must be uint4b8 or uint8b128. Got = ", b_q_type->str());
int pack_factor = 32 / b_q_type->size_bits();
TORCH_CHECK(b_q_type == vllm::kU4B8 || b_q_type == vllm::kU8B128,
"num_bits must be uint4b8 or uint8b128. Got = ", b_q_type.str());
int pack_factor = 32 / b_q_type.size_bits();
// Verify M
TORCH_CHECK(size_m == a.size(0),
@@ -1130,8 +1131,8 @@ torch::Tensor gptq_marlin_24_gemm(torch::Tensor& a, torch::Tensor& b_q_weight,
marlin_24::marlin_cuda_2_4(
a.data_ptr(), b_q_weight.data_ptr(), b_meta.data_ptr(), c.data_ptr(),
b_scales.data_ptr(), size_n, size_m, size_k, workspace.data_ptr(),
b_q_type->size_bits(), groupsize, dev,
at::cuda::getCurrentCUDAStream(dev), thread_k, thread_m, sms, max_par);
b_q_type.size_bits(), groupsize, dev, at::cuda::getCurrentCUDAStream(dev),
thread_k, thread_m, sms, max_par);
return c;
}