diff --git a/.github/mergify.yml b/.github/mergify.yml index ccfd57162..e595060c3 100644 --- a/.github/mergify.yml +++ b/.github/mergify.yml @@ -58,7 +58,7 @@ pull_request_rules: - files~=^benchmarks/structured_schemas/ - files=benchmarks/benchmark_serving_structured_output.py - files=benchmarks/run_structured_output_benchmark.sh - - files=docs/source/features/structured_outputs.md + - files=docs/features/structured_outputs.md - files=examples/offline_inference/structured_outputs.py - files=examples/online_serving/openai_chat_completion_structured_outputs.py - files=examples/online_serving/openai_chat_completion_structured_outputs_with_reasoning.py @@ -135,9 +135,7 @@ pull_request_rules: - files~=^tests/entrypoints/openai/tool_parsers/ - files=tests/entrypoints/openai/test_chat_with_tool_reasoning.py - files~=^vllm/entrypoints/openai/tool_parsers/ - - files=docs/source/features/tool_calling.md - - files=docs/source/getting_started/examples/openai_chat_completion_client_with_tools.md - - files=docs/source/getting_started/examples/chat_with_tools.md + - files=docs/features/tool_calling.md - files~=^examples/tool_chat_* - files=examples/offline_inference/chat_with_tools.py - files=examples/online_serving/openai_chat_completion_client_with_tools_required.py diff --git a/docker/Dockerfile b/docker/Dockerfile index 9b232d1fe..24986a1b7 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -2,8 +2,8 @@ # to run the OpenAI compatible server. # Please update any changes made here to -# docs/source/contributing/dockerfile/dockerfile.md and -# docs/source/assets/contributing/dockerfile-stages-dependency.png +# docs/contributing/dockerfile/dockerfile.md and +# docs/assets/contributing/dockerfile-stages-dependency.png ARG CUDA_VERSION=12.8.1 #################### BASE BUILD IMAGE #################### diff --git a/docs/contributing/overview.md b/docs/contributing/overview.md index 48f0bab5e..2517436af 100644 --- a/docs/contributing/overview.md +++ b/docs/contributing/overview.md @@ -130,9 +130,8 @@ The PR needs to meet the following code quality standards: understand the code. - Include sufficient tests to ensure the project stays correct and robust. This includes both unit tests and integration tests. -- Please add documentation to `docs/source/` if the PR modifies the - user-facing behaviors of vLLM. It helps vLLM users understand and utilize the - new features or changes. +- Please add documentation to `docs/` if the PR modifies the user-facing behaviors of vLLM. + It helps vLLM users understand and utilize the new features or changes. ### Adding or Changing Kernels diff --git a/docs/source/assets/contributing/dockerfile-stages-dependency.png b/docs/source/assets/contributing/dockerfile-stages-dependency.png deleted file mode 100644 index 0838bfa37..000000000 Binary files a/docs/source/assets/contributing/dockerfile-stages-dependency.png and /dev/null differ diff --git a/tools/update-dockerfile-graph.sh b/tools/update-dockerfile-graph.sh index a1e22a69c..88189e8ab 100755 --- a/tools/update-dockerfile-graph.sh +++ b/tools/update-dockerfile-graph.sh @@ -24,7 +24,7 @@ if printf '%s\n' "${FILES[@]}" | grep -q "^docker/Dockerfile$"; then fi # Define the target file path - TARGET_GRAPH_FILE="docs/source/assets/contributing/dockerfile-stages-dependency.png" + TARGET_GRAPH_FILE="docs/assets/contributing/dockerfile-stages-dependency.png" # Ensure target directory exists mkdir -p "$(dirname "$TARGET_GRAPH_FILE")" diff --git a/vllm/config.py b/vllm/config.py index 5cd08db43..37cec8408 100644 --- a/vllm/config.py +++ b/vllm/config.py @@ -988,7 +988,7 @@ class ModelConfig: self.use_async_output_proc = False return - # Reminder: Please update docs/source/features/compatibility_matrix.md + # Reminder: Please update docs/features/compatibility_matrix.md # If the feature combo become valid from vllm.platforms import current_platform if not current_platform.is_async_output_supported(self.enforce_eager): @@ -1004,7 +1004,7 @@ class ModelConfig: if self.runner_type == "pooling": self.use_async_output_proc = False - # Reminder: Please update docs/source/features/compatibility_matrix.md + # Reminder: Please update docs/features/compatibility_matrix.md # If the feature combo become valid if speculative_config: self.use_async_output_proc = False diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py index b561a1a77..988ba14db 100644 --- a/vllm/engine/arg_utils.py +++ b/vllm/engine/arg_utils.py @@ -1084,7 +1084,7 @@ class EngineArgs: disable_log_stats=self.disable_log_stats, ) - # Reminder: Please update docs/source/features/compatibility_matrix.md + # Reminder: Please update docs/features/compatibility_matrix.md # If the feature combo become valid if self.num_scheduler_steps > 1: if speculative_config is not None: diff --git a/vllm/engine/output_processor/multi_step.py b/vllm/engine/output_processor/multi_step.py index 4cfb22c5a..323580fa7 100644 --- a/vllm/engine/output_processor/multi_step.py +++ b/vllm/engine/output_processor/multi_step.py @@ -67,7 +67,7 @@ class MultiStepOutputProcessor(SequenceGroupOutputProcessor): @staticmethod @functools.lru_cache def _log_prompt_logprob_unsupported_warning_once(): - # Reminder: Please update docs/source/features/compatibility_matrix.md + # Reminder: Please update docs/features/compatibility_matrix.md # If the feature combo become valid logger.warning( "Prompt logprob is not supported by multi step workers. " diff --git a/vllm/platforms/cpu.py b/vllm/platforms/cpu.py index 5c0c90972..c79c603c0 100644 --- a/vllm/platforms/cpu.py +++ b/vllm/platforms/cpu.py @@ -75,7 +75,7 @@ class CpuPlatform(Platform): import vllm.envs as envs from vllm.utils import GiB_bytes model_config = vllm_config.model_config - # Reminder: Please update docs/source/features/compatibility_matrix.md + # Reminder: Please update docs/features/compatibility_matrix.md # If the feature combo become valid if not model_config.enforce_eager: model_config.enforce_eager = True diff --git a/vllm/spec_decode/spec_decode_worker.py b/vllm/spec_decode/spec_decode_worker.py index 6ba5a5100..252c80957 100644 --- a/vllm/spec_decode/spec_decode_worker.py +++ b/vllm/spec_decode/spec_decode_worker.py @@ -114,7 +114,7 @@ def create_spec_worker(*args, **kwargs) -> "SpecDecodeWorker": return spec_decode_worker -# Reminder: Please update docs/source/features/compatibility_matrix.md +# Reminder: Please update docs/features/compatibility_matrix.md # If the feature combo become valid class SpecDecodeWorker(LoRANotSupportedWorkerBase): """Worker which implements speculative decoding. diff --git a/vllm/utils.py b/vllm/utils.py index 25694c121..511db7e67 100644 --- a/vllm/utils.py +++ b/vllm/utils.py @@ -85,7 +85,7 @@ MULTIMODAL_MODEL_MAX_NUM_BATCHED_TOKENS = 5120 # Exception strings for non-implemented encoder/decoder scenarios -# Reminder: Please update docs/source/features/compatibility_matrix.md +# Reminder: Please update docs/features/compatibility_matrix.md # If the feature combo become valid STR_NOT_IMPL_ENC_DEC_SWA = \ diff --git a/vllm/worker/multi_step_model_runner.py b/vllm/worker/multi_step_model_runner.py index 0825abbed..d9cf2055e 100644 --- a/vllm/worker/multi_step_model_runner.py +++ b/vllm/worker/multi_step_model_runner.py @@ -824,7 +824,7 @@ def _pythonize_sampler_output( for sgdx, (seq_group, sample_result) in enumerate(zip(seq_groups, samples_list)): - # Reminder: Please update docs/source/features/compatibility_matrix.md + # Reminder: Please update docs/features/compatibility_matrix.md # If the feature combo become valid # (Check for Guided Decoding) if seq_group.sampling_params.logits_processors: diff --git a/vllm/worker/utils.py b/vllm/worker/utils.py index d925f0883..e2854bcb3 100644 --- a/vllm/worker/utils.py +++ b/vllm/worker/utils.py @@ -14,7 +14,7 @@ def assert_enc_dec_mr_supported_scenario( a supported scenario. ''' - # Reminder: Please update docs/source/features/compatibility_matrix.md + # Reminder: Please update docs/features/compatibility_matrix.md # If the feature combo become valid if enc_dec_mr.cache_config.enable_prefix_caching: