[Kernel] [Triton] [AMD] Adding Triton implementations awq_dequantize and awq_gemm to support AWQ (#7386)

This commit is contained in:
rasmith
2024-08-28 14:37:47 -05:00
committed by GitHub
parent b98cc28f91
commit e5697d161c
5 changed files with 493 additions and 1 deletions

View File

@@ -400,6 +400,10 @@ environment_variables: Dict[str, Callable[[], Any]] = {
"VLLM_TORCH_PROFILER_DIR":
lambda: (None if os.getenv("VLLM_TORCH_PROFILER_DIR", None) is None else os
.path.expanduser(os.getenv("VLLM_TORCH_PROFILER_DIR", "."))),
# If set, vLLM will use Triton implementations of AWQ.
"VLLM_USE_TRITON_AWQ":
lambda: bool(int(os.getenv("VLLM_USE_TRITON_AWQ", "0"))),
}
# end-env-vars-definition