[Core] add and implement VLLM_LOGITS_PROCESSOR_THREADS (#12368)
Signed-off-by: Aviv Keshet <akeshet@scaledcognition.com>
This commit is contained in:
@@ -31,6 +31,7 @@ if TYPE_CHECKING:
|
||||
VLLM_LOGGING_LEVEL: str = "INFO"
|
||||
VLLM_LOGGING_PREFIX: str = ""
|
||||
VLLM_LOGGING_CONFIG_PATH: Optional[str] = None
|
||||
VLLM_LOGITS_PROCESSOR_THREADS: Optional[int] = None
|
||||
VLLM_TRACE_FUNCTION: int = 0
|
||||
VLLM_ATTENTION_BACKEND: Optional[str] = None
|
||||
VLLM_USE_FLASHINFER_SAMPLER: Optional[bool] = None
|
||||
@@ -282,6 +283,14 @@ environment_variables: Dict[str, Callable[[], Any]] = {
|
||||
"VLLM_LOGGING_PREFIX":
|
||||
lambda: os.getenv("VLLM_LOGGING_PREFIX", ""),
|
||||
|
||||
# if set, vllm will call logits processors in a thread pool with this many
|
||||
# threads. This is useful when using custom logits processors that either
|
||||
# (a) launch additional CUDA kernels or (b) do significant CPU-bound work
|
||||
# while not holding the python GIL, or both.
|
||||
"VLLM_LOGITS_PROCESSOR_THREADS":
|
||||
lambda: int(os.getenv("VLLM_LOGITS_PROCESSOR_THREADS", "0"))
|
||||
if "VLLM_LOGITS_PROCESSOR_THREADS" in os.environ else None,
|
||||
|
||||
# Trace function calls
|
||||
# If set to 1, vllm will trace function calls
|
||||
# Useful for debugging
|
||||
|
||||
Reference in New Issue
Block a user