[FEATURE] support custom vllm tuned config path for fused moe triton kernels (#22791)

Signed-off-by: Chi Zhang <zhangchi.usc1992@bytedance.com>
This commit is contained in:
Chi Zhang
2025-08-13 20:27:25 +08:00
committed by GitHub
parent 653124bd46
commit 98deac3879
2 changed files with 26 additions and 8 deletions

View File

@@ -158,6 +158,7 @@ if TYPE_CHECKING:
VLLM_USE_TRTLLM_ATTENTION: Optional[str] = None
VLLM_USE_FLASHINFER_MOE_MXFP4_MXFP8: bool = False
VLLM_USE_FLASHINFER_MOE_MXFP4_BF16: bool = False
VLLM_TUNED_CONFIG_FOLDER: Optional[str] = None
def get_default_cache_root():
@@ -1120,6 +1121,11 @@ environment_variables: dict[str, Callable[[], Any]] = {
# never removed from memory until the server terminates.
"VLLM_ENABLE_RESPONSES_API_STORE":
lambda: bool(int(os.getenv("VLLM_ENABLE_RESPONSES_API_STORE", "0"))),
# Allows vllm to find tuned config under customized folder
"VLLM_TUNED_CONFIG_FOLDER":
lambda: os.getenv("VLLM_TUNED_CONFIG_FOLDER", None),
}
# --8<-- [end:env-vars-definition]