diff --git a/.github/mergify.yml b/.github/mergify.yml index 080767ca7..9c53342d1 100644 --- a/.github/mergify.yml +++ b/.github/mergify.yml @@ -259,8 +259,7 @@ pull_request_rules: - files=benchmarks/run_structured_output_benchmark.sh - files=docs/features/structured_outputs.md - files=examples/offline_inference/structured_outputs.py - - files=examples/online_serving/openai_chat_completion_structured_outputs.py - - files=examples/online_serving/openai_chat_completion_structured_outputs_with_reasoning.py + - files=examples/online_serving/structured_outputs/structured_outputs.py - files~=^tests/v1/structured_output/ - files=tests/v1/entrypoints/llm/test_struct_output_generate.py - files~=^vllm/v1/structured_output/ diff --git a/docs/design/metrics.md b/docs/design/metrics.md index 37cc61d46..a977ce9b9 100644 --- a/docs/design/metrics.md +++ b/docs/design/metrics.md @@ -656,7 +656,7 @@ vLLM has support for OpenTelemetry tracing: - Added by and reinstated by - Configured with `--oltp-traces-endpoint` and `--collect-detailed-traces` - [OpenTelemetry blog post](https://opentelemetry.io/blog/2024/llm-observability/) -- [User-facing docs](../examples/online_serving/opentelemetry.md) +- [User-facing docs](../../examples/online_serving/opentelemetry/README.md) - [Blog post](https://medium.com/@ronen.schaffer/follow-the-trail-supercharging-vllm-with-opentelemetry-distributed-tracing-aa655229b46f) - [IBM product docs](https://www.ibm.com/docs/en/instana-observability/current?topic=mgaa-monitoring-large-language-models-llms-vllm-public-preview) diff --git a/tools/profiler/print_layerwise_table.py b/tools/profiler/print_layerwise_table.py index d7a24a598..06a8c5853 100644 --- a/tools/profiler/print_layerwise_table.py +++ b/tools/profiler/print_layerwise_table.py @@ -33,7 +33,10 @@ if __name__ == "__main__": "--json-trace", type=str, required=True, - help="json trace file output by examples/offline_inference/profiling.py", + help=( + "JSON trace file generated by scripts that use " + "vllm.profiler.layerwise_profile" + ), ) parser.add_argument( "--phase", diff --git a/tools/profiler/visualize_layerwise_profile.py b/tools/profiler/visualize_layerwise_profile.py index ed4bf0beb..83b8b3a75 100644 --- a/tools/profiler/visualize_layerwise_profile.py +++ b/tools/profiler/visualize_layerwise_profile.py @@ -564,8 +564,10 @@ if __name__ == "__main__": "--json-trace", type=str, required=True, - help="json trace file output by \ - examples/offline_inference/profiling.py", + help=( + "JSON trace file generated by scripts that use " + "vllm.profiler.layerwise_profile" + ), ) parser.add_argument( "--output-directory", type=str, required=False, help="Directory to output plots" diff --git a/vllm/model_executor/models/config.py b/vllm/model_executor/models/config.py index ea0f118a0..2ec219d40 100644 --- a/vllm/model_executor/models/config.py +++ b/vllm/model_executor/models/config.py @@ -213,7 +213,7 @@ class NomicBertModelConfig(VerifyAndUpdateConfig): "Nomic context extension is disabled. " "Changing max_model_len from %s to %s. " "To enable context extension, see: " - "https://github.com/vllm-project/vllm/tree/main/examples/offline_inference/context_extension.html", + "https://github.com/vllm-project/vllm/tree/main/examples/offline_inference/context_extension.py", max_model_len_before, model_config.max_model_len, )