2023-06-05 20:03:14 -07:00
|
|
|
[build-system]
|
2025-03-08 17:44:35 +01:00
|
|
|
# Should be mirrored in requirements/build.txt
|
2023-06-05 20:03:14 -07:00
|
|
|
requires = [
|
2025-06-03 15:16:17 +08:00
|
|
|
"cmake>=3.26.1",
|
2023-06-05 20:03:14 -07:00
|
|
|
"ninja",
|
2025-05-01 02:25:36 -04:00
|
|
|
"packaging>=24.2",
|
|
|
|
|
"setuptools>=77.0.3,<80.0.0",
|
2024-09-23 18:44:26 +02:00
|
|
|
"setuptools-scm>=8.0",
|
2025-04-29 19:08:04 -07:00
|
|
|
"torch == 2.7.0",
|
2023-06-05 20:03:14 -07:00
|
|
|
"wheel",
|
2024-08-21 13:41:17 +00:00
|
|
|
"jinja2",
|
2023-06-05 20:03:14 -07:00
|
|
|
]
|
|
|
|
|
build-backend = "setuptools.build_meta"
|
2023-11-20 11:58:01 -08:00
|
|
|
|
2025-02-18 17:02:49 +01:00
|
|
|
[project]
|
|
|
|
|
name = "vllm"
|
|
|
|
|
authors = [{name = "vLLM Team"}]
|
2025-04-28 15:43:52 -04:00
|
|
|
license = "Apache-2.0"
|
|
|
|
|
license-files = ["LICENSE"]
|
2025-02-18 17:02:49 +01:00
|
|
|
readme = "README.md"
|
|
|
|
|
description = "A high-throughput and memory-efficient inference and serving engine for LLMs"
|
|
|
|
|
classifiers = [
|
|
|
|
|
"Programming Language :: Python :: 3.9",
|
|
|
|
|
"Programming Language :: Python :: 3.10",
|
|
|
|
|
"Programming Language :: Python :: 3.11",
|
|
|
|
|
"Programming Language :: Python :: 3.12",
|
|
|
|
|
"Intended Audience :: Developers",
|
|
|
|
|
"Intended Audience :: Information Technology",
|
|
|
|
|
"Intended Audience :: Science/Research",
|
|
|
|
|
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
|
|
|
|
"Topic :: Scientific/Engineering :: Information Analysis",
|
|
|
|
|
]
|
2025-04-07 04:22:25 +02:00
|
|
|
requires-python = ">=3.9,<3.13"
|
2025-02-18 17:02:49 +01:00
|
|
|
dynamic = [ "version", "dependencies", "optional-dependencies"]
|
|
|
|
|
|
|
|
|
|
[project.urls]
|
|
|
|
|
Homepage="https://github.com/vllm-project/vllm"
|
2025-05-22 11:17:34 +08:00
|
|
|
Documentation="https://docs.vllm.ai/en/latest/"
|
|
|
|
|
Slack="https://slack.vllm.ai/"
|
2025-02-18 17:02:49 +01:00
|
|
|
|
|
|
|
|
[project.scripts]
|
|
|
|
|
vllm = "vllm.entrypoints.cli.main:main"
|
|
|
|
|
|
2025-05-12 10:39:10 -07:00
|
|
|
[project.entry-points."vllm.general_plugins"]
|
|
|
|
|
lora_filesystem_resolver = "vllm.plugins.lora_resolvers.filesystem_resolver:register_filesystem_resolver"
|
|
|
|
|
|
2024-10-14 20:34:47 +02:00
|
|
|
[tool.setuptools_scm]
|
2025-02-19 11:48:03 +01:00
|
|
|
# no extra settings needed, presence enables setuptools-scm
|
2025-02-18 17:02:49 +01:00
|
|
|
|
|
|
|
|
[tool.setuptools.packages.find]
|
|
|
|
|
where = ["."]
|
2025-04-26 00:15:07 +02:00
|
|
|
include = ["vllm*"]
|
2024-10-14 20:34:47 +02:00
|
|
|
|
2025-01-20 06:58:01 +00:00
|
|
|
[tool.yapfignore]
|
|
|
|
|
ignore_patterns = [
|
2025-05-13 10:28:31 +01:00
|
|
|
".buildkite/**",
|
2025-05-13 14:43:29 +01:00
|
|
|
"benchmarks/**",
|
2025-01-20 06:58:01 +00:00
|
|
|
"build/**",
|
2025-05-26 17:57:54 +01:00
|
|
|
"examples/**",
|
2025-01-20 06:58:01 +00:00
|
|
|
]
|
|
|
|
|
|
2024-03-10 19:49:14 -07:00
|
|
|
[tool.ruff]
|
|
|
|
|
# Allow lines to be as long as 80.
|
|
|
|
|
line-length = 80
|
|
|
|
|
|
2024-09-23 18:44:26 +02:00
|
|
|
[tool.ruff.lint.per-file-ignores]
|
2025-03-08 23:16:40 +08:00
|
|
|
"vllm/third_party/**" = ["ALL"]
|
2024-09-23 18:44:26 +02:00
|
|
|
"vllm/version.py" = ["F401"]
|
|
|
|
|
"vllm/_version.py" = ["ALL"]
|
2025-05-15 12:00:21 +01:00
|
|
|
# Python 3.8 typing - skip V0 code
|
2025-03-03 01:34:51 +00:00
|
|
|
"vllm/attention/**/*.py" = ["UP006", "UP035"]
|
|
|
|
|
"vllm/core/**/*.py" = ["UP006", "UP035"]
|
|
|
|
|
"vllm/engine/**/*.py" = ["UP006", "UP035"]
|
|
|
|
|
"vllm/executor/**/*.py" = ["UP006", "UP035"]
|
|
|
|
|
"vllm/prompt_adapter/**/*.py" = ["UP006", "UP035"]
|
|
|
|
|
"vllm/spec_decode/**/*.py" = ["UP006", "UP035"]
|
|
|
|
|
"vllm/worker/**/*.py" = ["UP006", "UP035"]
|
2025-05-15 12:00:21 +01:00
|
|
|
# Python 3.8 typing - skip utils for ROCm
|
2025-03-24 19:37:54 -04:00
|
|
|
"vllm/utils.py" = ["UP006", "UP035"]
|
2024-09-23 18:44:26 +02:00
|
|
|
|
2023-11-20 11:58:01 -08:00
|
|
|
[tool.ruff.lint]
|
|
|
|
|
select = [
|
|
|
|
|
# pycodestyle
|
|
|
|
|
"E",
|
|
|
|
|
# Pyflakes
|
|
|
|
|
"F",
|
|
|
|
|
# pyupgrade
|
2024-11-06 02:11:55 -05:00
|
|
|
"UP",
|
2023-11-20 11:58:01 -08:00
|
|
|
# flake8-bugbear
|
|
|
|
|
"B",
|
|
|
|
|
# flake8-simplify
|
|
|
|
|
"SIM",
|
|
|
|
|
# isort
|
|
|
|
|
# "I",
|
2025-05-13 10:28:31 +01:00
|
|
|
# flake8-logging-format
|
2024-04-26 16:16:58 +09:00
|
|
|
"G",
|
2023-11-20 11:58:01 -08:00
|
|
|
]
|
|
|
|
|
ignore = [
|
|
|
|
|
# star imports
|
|
|
|
|
"F405", "F403",
|
|
|
|
|
# lambda expression assignment
|
|
|
|
|
"E731",
|
2024-02-22 02:56:01 +00:00
|
|
|
# Loop control variable not used within loop body
|
|
|
|
|
"B007",
|
2024-09-18 07:00:56 -04:00
|
|
|
# f-string format
|
|
|
|
|
"UP032",
|
2025-02-12 22:48:31 -05:00
|
|
|
# Can remove once 3.10+ is the minimum Python version
|
|
|
|
|
"UP007",
|
2023-11-20 11:58:01 -08:00
|
|
|
]
|
2024-02-22 02:56:01 +00:00
|
|
|
|
|
|
|
|
[tool.mypy]
|
2025-05-28 13:46:04 +01:00
|
|
|
plugins = ['pydantic.mypy']
|
2024-02-22 02:56:01 +00:00
|
|
|
ignore_missing_imports = true
|
2024-04-23 13:32:44 +09:00
|
|
|
check_untyped_defs = true
|
2024-07-31 10:38:03 +08:00
|
|
|
follow_imports = "silent"
|
2024-02-22 02:56:01 +00:00
|
|
|
|
2024-07-31 10:38:03 +08:00
|
|
|
# After fixing type errors resulting from follow_imports: "skip" -> "silent",
|
2024-11-07 02:54:16 -05:00
|
|
|
# move the directory here and remove it from tools/mypy.sh
|
2024-07-31 10:38:03 +08:00
|
|
|
files = [
|
|
|
|
|
"vllm/*.py",
|
|
|
|
|
"vllm/adapter_commons",
|
|
|
|
|
"vllm/assets",
|
2024-08-21 14:28:21 +08:00
|
|
|
"vllm/entrypoints",
|
2024-08-27 16:11:14 -07:00
|
|
|
"vllm/core",
|
2024-07-31 10:38:03 +08:00
|
|
|
"vllm/inputs",
|
2024-11-08 21:53:24 +01:00
|
|
|
"vllm/logging_utils",
|
2024-07-31 10:38:03 +08:00
|
|
|
"vllm/multimodal",
|
|
|
|
|
"vllm/platforms",
|
|
|
|
|
"vllm/transformers_utils",
|
|
|
|
|
"vllm/triton_utils",
|
|
|
|
|
"vllm/usage",
|
|
|
|
|
]
|
2024-02-22 02:56:01 +00:00
|
|
|
# TODO(woosuk): Include the code from Megatron and HuggingFace.
|
2024-04-13 06:35:50 +09:00
|
|
|
exclude = [
|
|
|
|
|
"vllm/model_executor/parallel_utils/|vllm/model_executor/models/",
|
2024-04-23 13:32:44 +09:00
|
|
|
# Ignore triton kernels in ops.
|
|
|
|
|
'vllm/attention/ops/.*\.py$'
|
2024-04-13 06:35:50 +09:00
|
|
|
]
|
2024-02-22 02:56:01 +00:00
|
|
|
|
|
|
|
|
[tool.codespell]
|
2024-12-18 09:57:16 -05:00
|
|
|
ignore-words-list = "dout, te, indicies, subtile, ElementE"
|
2025-02-13 23:19:43 -05:00
|
|
|
skip = "tests/models/fixtures/*,tests/prompts/*,benchmarks/sonnet.txt,tests/lora/data/*,build/*,vllm/third_party/*"
|
2024-03-25 23:59:47 +09:00
|
|
|
|
|
|
|
|
[tool.isort]
|
2025-05-13 14:43:29 +01:00
|
|
|
skip_glob = [
|
|
|
|
|
".buildkite/*",
|
|
|
|
|
"benchmarks/*",
|
2025-05-26 17:57:54 +01:00
|
|
|
"examples/*",
|
2025-05-13 14:43:29 +01:00
|
|
|
]
|
2024-03-25 23:59:47 +09:00
|
|
|
use_parentheses = true
|
|
|
|
|
skip_gitignore = true
|
2024-05-29 04:29:31 +08:00
|
|
|
|
|
|
|
|
[tool.pytest.ini_options]
|
|
|
|
|
markers = [
|
|
|
|
|
"skip_global_cleanup",
|
2024-11-08 23:30:04 +08:00
|
|
|
"core_model: enable this model test in each PR instead of only nightly",
|
|
|
|
|
"cpu_model: enable this model test in CPU tests",
|
2024-12-12 06:18:16 +08:00
|
|
|
"split: run this test as part of a split",
|
|
|
|
|
"distributed: run this test only in distributed GPU tests",
|
2024-11-06 12:57:35 -07:00
|
|
|
"skip_v1: do not run this test with v1",
|
2024-11-25 14:23:32 -03:00
|
|
|
"optional: optional tests that are automatically skipped, include --optional to run them",
|
2024-05-29 04:29:31 +08:00
|
|
|
]
|
2025-01-12 03:17:13 -05:00
|
|
|
|
|
|
|
|
[tool.pymarkdown]
|
2025-01-29 03:38:29 +00:00
|
|
|
plugins.md004.style = "sublist" # ul-style
|
2025-05-23 11:09:53 +02:00
|
|
|
plugins.md007.indent = 4 # ul-indent
|
2025-05-24 00:23:21 +09:00
|
|
|
plugins.md007.start_indented = true # ul-indent
|
2025-01-12 03:17:13 -05:00
|
|
|
plugins.md013.enabled = false # line-length
|
|
|
|
|
plugins.md041.enabled = false # first-line-h1
|
|
|
|
|
plugins.md033.enabled = false # inline-html
|
2025-05-23 11:09:53 +02:00
|
|
|
plugins.md046.enabled = false # code-block-style
|
2025-01-12 03:17:13 -05:00
|
|
|
plugins.md024.allow_different_nesting = true # no-duplicate-headers
|
2025-05-15 01:00:43 -04:00
|
|
|
|
2025-05-28 11:59:11 -04:00
|
|
|
[tool.ty.src]
|
|
|
|
|
root = "./vllm"
|
2025-05-15 01:00:43 -04:00
|
|
|
respect-ignore-files = true
|
|
|
|
|
|
|
|
|
|
[tool.ty.environment]
|
|
|
|
|
python = "./.venv"
|