[Kernel] [Triton] [AMD] Adding Triton implementations awq_dequantize and awq_gemm to support AWQ (#7386)
This commit is contained in:
@@ -400,6 +400,10 @@ environment_variables: Dict[str, Callable[[], Any]] = {
|
||||
"VLLM_TORCH_PROFILER_DIR":
|
||||
lambda: (None if os.getenv("VLLM_TORCH_PROFILER_DIR", None) is None else os
|
||||
.path.expanduser(os.getenv("VLLM_TORCH_PROFILER_DIR", "."))),
|
||||
|
||||
# If set, vLLM will use Triton implementations of AWQ.
|
||||
"VLLM_USE_TRITON_AWQ":
|
||||
lambda: bool(int(os.getenv("VLLM_USE_TRITON_AWQ", "0"))),
|
||||
}
|
||||
|
||||
# end-env-vars-definition
|
||||
|
||||
Reference in New Issue
Block a user