diff --git a/vllm/envs.py b/vllm/envs.py index cfbf56ee1..8c6eef3e7 100755 --- a/vllm/envs.py +++ b/vllm/envs.py @@ -35,7 +35,7 @@ if TYPE_CHECKING: VLLM_USAGE_STATS_SERVER: str = "https://stats.vllm.ai" VLLM_NO_USAGE_STATS: bool = False VLLM_DO_NOT_TRACK: bool = False - VLLM_USAGE_SOURCE: str = "" + VLLM_USAGE_SOURCE: str = "production" VLLM_CONFIGURE_LOGGING: bool = True VLLM_LOGGING_LEVEL: str = "INFO" VLLM_LOGGING_PREFIX: str = "" @@ -48,7 +48,7 @@ if TYPE_CHECKING: VLLM_USE_FLASHINFER_SAMPLER: bool | None = None VLLM_PP_LAYER_PARTITION: str | None = None VLLM_CPU_KVCACHE_SPACE: int | None = 0 - VLLM_CPU_OMP_THREADS_BIND: str = "" + VLLM_CPU_OMP_THREADS_BIND: str = "auto" VLLM_CPU_NUM_OF_RESERVED_CPU: int | None = None VLLM_CPU_SGL_KERNEL: bool = False VLLM_XLA_CACHE_PATH: str = os.path.join(VLLM_CACHE_ROOT, "xla_cache") @@ -89,7 +89,7 @@ if TYPE_CHECKING: VLLM_LORA_RESOLVER_CACHE_DIR: str | None = None VLLM_LORA_RESOLVER_HF_REPO_LIST: str | None = None VLLM_USE_AOT_COMPILE: bool = False - VLLM_USE_BYTECODE_HOOK: bool = False + VLLM_USE_BYTECODE_HOOK: bool = True VLLM_FORCE_AOT_LOAD: bool = False VLLM_USE_MEGA_AOT_ARTIFACT: bool = False VLLM_USE_TRITON_AWQ: bool = False