[Kernel] [Triton] [AMD] Adding Triton implementations awq_dequantize and awq_gemm to support AWQ (#7386)

2024-08-28 14:37:47 -05:00
parent b98cc28f91
commit e5697d161c
5 changed files with 493 additions and 1 deletions
--- a/vllm/envs.py
+++ b/vllm/envs.py
@@ -400,6 +400,10 @@ environment_variables: Dict[str, Callable[[], Any]] = {
    "VLLM_TORCH_PROFILER_DIR":
    lambda: (None if os.getenv("VLLM_TORCH_PROFILER_DIR", None) is None else os
             .path.expanduser(os.getenv("VLLM_TORCH_PROFILER_DIR", "."))),
+
+    # If set, vLLM will use Triton implementations of AWQ.
+    "VLLM_USE_TRITON_AWQ":
+    lambda: bool(int(os.getenv("VLLM_USE_TRITON_AWQ", "0"))),
 }

 # end-env-vars-definition