2025-05-24 07:16:26 +08:00
regex # Replace re for higher-performance regex matching
2025-03-15 17:52:05 +08:00
cachetools
2023-05-07 16:30:43 -07:00
psutil
sentencepiece # Required for LLaMA tokenizer.
2025-03-29 12:11:51 +08:00
numpy
2024-09-27 23:45:50 -07:00
requests >= 2.26.0
2024-06-19 22:37:33 +08:00
tqdm
2024-12-11 19:55:30 -05:00
blake3
2024-03-28 22:16:12 -07:00
py-cpuinfo
2025-11-01 13:11:07 +08:00
transformers >= 4.56.0, < 5
2025-04-17 07:45:24 -07:00
tokenizers >= 0.21.1 # Required for fast incremental detokenization.
2026-01-27 20:15:53 -08:00
protobuf # Required by LlamaTokenizer, gRPC.
2025-02-25 22:03:33 +08:00
fastapi[standard] >= 0.115.0 # Required by FastAPI's form models in the OpenAI API server's audio transcriptions endpoint.
2024-05-15 19:13:36 -04:00
aiohttp
2025-08-19 00:22:59 +02:00
openai >= 1.99.1 # For Responses API with reasoning content
2025-10-09 14:02:40 +01:00
pydantic >= 2.12.0
2024-02-28 13:38:26 +08:00
prometheus_client >= 0.18.0
2024-12-18 03:34:08 -03:00
pillow # Required for image processing
2024-04-29 01:59:33 +03:00
prometheus-fastapi-instrumentator >= 7.0.0
2024-05-17 14:58:52 -04:00
tiktoken >= 0.6.0 # Required for DBRX tokenizer
2025-08-25 05:31:22 +03:00
lm-format-enforcer == 0.11.3
2025-11-21 17:54:09 +05:30
llguidance >= 1.3.0, < 1.4.0; platform_machine == "x86_64" or platform_machine == "arm64" or platform_machine == "aarch64" or platform_machine == "s390x" or platform_machine == "ppc64le"
2025-09-10 02:59:46 +09:00
outlines_core == 0.2.11
2025-07-10 14:30:26 -05:00
# required for outlines backend disk cache
diskcache == 5.6.3
2025-02-22 05:17:44 -08:00
lark == 1.2.2
2025-12-28 02:08:29 -06:00
xgrammar == 0.1.29; platform_machine == "x86_64" or platform_machine == "aarch64" or platform_machine == "arm64" or platform_machine == "s390x" or platform_machine == "ppc64le"
2024-08-09 10:39:41 +08:00
typing_extensions >= 4.10
2024-11-27 19:54:58 -08:00
filelock >= 3.16.1 # need to contain https://github.com/tox-dev/filelock/pull/317
2024-09-04 15:18:13 -05:00
partial-json-parser # used for parsing partial JSON outputs
2025-04-14 22:06:03 -05:00
pyzmq >= 25.0.0
2024-08-18 17:57:20 -07:00
msgspec
2025-11-18 13:56:29 -03:00
gguf >= 0.17.0
2025-12-26 13:48:24 +01:00
mistral_common[image] >= 1.8.8
2026-01-22 23:51:15 +08:00
opencv-python-headless >= 4.13.0 # required for video IO
2024-09-01 14:46:57 -07:00
pyyaml
2024-09-07 14:03:16 -06:00
six>=1.16.0; python_version > '3.11' # transitive dependency of pandas that needs to be the latest version for python 3.12
2025-11-08 14:30:18 -08:00
setuptools>=77.0.3,<81.0.0; python_version > '3.11' # Setuptools is used by triton, we need to ensure a modern version is installed for 3.12+ so that it does not try to import distutils, which was removed in 3.12
2024-09-12 00:31:19 +08:00
einops # Required for Qwen2-VL.
2025-12-17 00:01:04 -05:00
compressed-tensors == 0.13.0 # required for compressed-tensors
2025-10-14 10:12:09 +08:00
depyf==0.20.0 # required for profiling and debugging with compilation config
2024-12-19 18:13:06 -08:00
cloudpickle # allows pickling lambda functions in model_executor/models/registry.py
2025-02-22 05:17:44 -08:00
watchfiles # required for http server to monitor the updates of TLS files
2025-05-26 22:38:04 +08:00
python-json-logger # Used by logging as per examples/others/logging_configuration.md
2025-03-14 17:25:28 -04:00
ninja # Required for xgrammar, rocm, tpu, xpu
2025-06-25 06:33:51 +03:00
pybase64 # fast base64 implementation
2025-07-14 05:45:31 +03:00
cbor2 # Required for cross-language serialization of hashable objects
2025-12-03 18:45:31 +01:00
ijson # Required for mistral streaming tool parser
2025-07-24 18:15:23 +08:00
setproctitle # Used to set process names for better debugging and monitoring
2025-08-06 00:10:14 -07:00
openai-harmony >= 0.0.3 # Required for gpt-oss
2026-01-14 02:21:39 -05:00
anthropic >= 0.71.0
2026-01-13 13:06:10 -08:00
model-hosting-container-standards >= 0.1.13, < 1.0.0
2025-12-16 20:52:14 -05:00
mcp
2026-01-27 20:15:53 -08:00
grpcio
grpcio-reflection