[Core] add and implement VLLM_LOGITS_PROCESSOR_THREADS (#12368)

Signed-off-by: Aviv Keshet <akeshet@scaledcognition.com>
This commit is contained in:
Aviv Keshet
2025-02-04 18:46:26 -08:00
committed by GitHub
parent 75e94309e8
commit b3a0d01e45
2 changed files with 44 additions and 11 deletions

View File

@@ -31,6 +31,7 @@ if TYPE_CHECKING:
VLLM_LOGGING_LEVEL: str = "INFO"
VLLM_LOGGING_PREFIX: str = ""
VLLM_LOGGING_CONFIG_PATH: Optional[str] = None
VLLM_LOGITS_PROCESSOR_THREADS: Optional[int] = None
VLLM_TRACE_FUNCTION: int = 0
VLLM_ATTENTION_BACKEND: Optional[str] = None
VLLM_USE_FLASHINFER_SAMPLER: Optional[bool] = None
@@ -282,6 +283,14 @@ environment_variables: Dict[str, Callable[[], Any]] = {
"VLLM_LOGGING_PREFIX":
lambda: os.getenv("VLLM_LOGGING_PREFIX", ""),
# if set, vllm will call logits processors in a thread pool with this many
# threads. This is useful when using custom logits processors that either
# (a) launch additional CUDA kernels or (b) do significant CPU-bound work
# while not holding the python GIL, or both.
"VLLM_LOGITS_PROCESSOR_THREADS":
lambda: int(os.getenv("VLLM_LOGITS_PROCESSOR_THREADS", "0"))
if "VLLM_LOGITS_PROCESSOR_THREADS" in os.environ else None,
# Trace function calls
# If set to 1, vllm will trace function calls
# Useful for debugging