[Misc] Support routing logic simulation (#21990)

Signed-off-by: Ming Yang <minos.future@gmail.com>
Co-authored-by: Tyler Michael Smith <tyler@neuralmagic.com>
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
This commit is contained in:
Ming Yang
2025-08-06 23:06:20 -07:00
committed by GitHub
parent 370661856b
commit 82216dc21f
4 changed files with 481 additions and 0 deletions

View File

@@ -989,6 +989,15 @@ environment_variables: dict[str, Callable[[], Any]] = {
"VLLM_MAX_TOKENS_PER_EXPERT_FP4_MOE":
lambda: int(os.getenv("VLLM_MAX_TOKENS_PER_EXPERT_FP4_MOE", "163840")),
# MoE routing strategy selector.
# See `RoutingSimulator.get_available_strategies()` # for available
# strategies.
# Cutstom routing strategies can be registered by
# RoutingSimulator.register_strategy()
# Note: custom strategies may not produce correct model outputs
"VLLM_MOE_ROUTING_SIMULATION_STRATEGY":
lambda: os.environ.get("VLLM_MOE_ROUTING_SIMULATION_STRATEGY", "").lower(),
# Regex timeout for use by the vLLM tool parsing plugins.
"VLLM_TOOL_PARSE_REGEX_TIMEOUT_SECONDS":
lambda: int(os.getenv("VLLM_TOOL_PARSE_REGEX_TIMEOUT_SECONDS", "1")),