[FEAT] [ROCm]: Add AITER RMS Norm (Layer Norm) Feature (#14959)
Signed-off-by: tjtanaa <tunjian.tan@embeddedllm.com>
This commit is contained in:
13
vllm/envs.py
13
vllm/envs.py
@@ -75,6 +75,8 @@ if TYPE_CHECKING:
|
||||
VLLM_SKIP_P2P_CHECK: bool = False
|
||||
VLLM_DISABLED_KERNELS: list[str] = []
|
||||
VLLM_USE_V1: bool = True
|
||||
VLLM_ROCM_USE_AITER: bool = False
|
||||
VLLM_ROCM_USE_AITER_RMSNORM: bool = True
|
||||
VLLM_ROCM_FP8_PADDING: bool = True
|
||||
VLLM_ENABLE_V1_MULTIPROCESSING: bool = True
|
||||
VLLM_LOG_BATCHSIZE_INTERVAL: float = -1
|
||||
@@ -528,6 +530,17 @@ environment_variables: dict[str, Callable[[], Any]] = {
|
||||
"VLLM_USE_V1":
|
||||
lambda: bool(int(os.getenv("VLLM_USE_V1", "1"))),
|
||||
|
||||
# Disable aiter ops unless specifically enabled.
|
||||
# Acts as a parent switch to enable the rest of the other operations.
|
||||
"VLLM_ROCM_USE_AITER":
|
||||
lambda: (os.getenv("VLLM_ROCM_USE_AITER", "False").lower() in
|
||||
("true", "1")),
|
||||
|
||||
# use aiter rms norm op if aiter ops are enabled.
|
||||
"VLLM_ROCM_USE_AITER_RMSNORM":
|
||||
lambda: (os.getenv("VLLM_ROCM_USE_AITER_RMSNORM", "True").lower() in
|
||||
("true", "1")),
|
||||
|
||||
# Pad the fp8 weights to 256 bytes for ROCm
|
||||
"VLLM_ROCM_FP8_PADDING":
|
||||
lambda: bool(int(os.getenv("VLLM_ROCM_FP8_PADDING", "1"))),
|
||||
|
||||
Reference in New Issue
Block a user