Automatically add links to API docs for matching strings in docs (#37434)
Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
167
docs/mkdocs/hooks/autoref_code.py
Normal file
167
docs/mkdocs/hooks/autoref_code.py
Normal file
@@ -0,0 +1,167 @@
|
|||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||||
|
"""
|
||||||
|
MkDocs hook to automatically convert inline code references to API doc links.
|
||||||
|
|
||||||
|
For example, `WeightTransferConfig` becomes
|
||||||
|
[`WeightTransferConfig`][vllm.config.WeightTransferConfig]
|
||||||
|
|
||||||
|
This works with the `autorefs` plugin to create clickable cross-references
|
||||||
|
to API documentation pages generated by `mkdocstrings`.
|
||||||
|
|
||||||
|
The hook builds an index of all documented public Python names (classes and
|
||||||
|
functions with docstrings) from the vllm package at startup using AST parsing,
|
||||||
|
then substitutes matching inline code spans on each page. Names without
|
||||||
|
docstrings are excluded because mkdocstrings will not generate a page for them.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import ast
|
||||||
|
import logging
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import regex as re
|
||||||
|
from mkdocs.config.defaults import MkDocsConfig
|
||||||
|
from mkdocs.structure.files import Files
|
||||||
|
from mkdocs.structure.pages import Page
|
||||||
|
|
||||||
|
logger = logging.getLogger("mkdocs")
|
||||||
|
|
||||||
|
ROOT_DIR = Path(__file__).parent.parent.parent.parent.resolve()
|
||||||
|
VLLM_DIR = ROOT_DIR / "vllm"
|
||||||
|
|
||||||
|
# Maps short name -> qualified name (e.g. "ModelConfig" -> "vllm.config.ModelConfig")
|
||||||
|
_name_index: dict[str, str] = {}
|
||||||
|
|
||||||
|
# Fenced code block pattern (``` or ~~~, with optional language specifier).
|
||||||
|
_FENCED_BLOCK = re.compile(
|
||||||
|
r"(?:^|\n)(?P<fence>`{3,}|~{3,})[^\n]*\n.*?(?:\n(?P=fence))", re.DOTALL
|
||||||
|
)
|
||||||
|
|
||||||
|
# Inline code that is NOT already part of a markdown link.
|
||||||
|
# Matches `Name` but not [`Name`] and not [`Name`][...] or [`Name`](...).
|
||||||
|
_INLINE_CODE = re.compile(
|
||||||
|
r"(?<!\[)" # not preceded by [
|
||||||
|
r"`(?P<name>[A-Za-z0-9_]*)`" # `UpperCamelCase` or `UPPER_SNAKE`
|
||||||
|
r"(?!\])" # not followed by ]
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _has_docstring(node: ast.AST) -> bool:
|
||||||
|
"""Check if a class or function node has a docstring."""
|
||||||
|
if not isinstance(node, ast.ClassDef | ast.FunctionDef | ast.AsyncFunctionDef):
|
||||||
|
return False
|
||||||
|
return ast.get_docstring(node, clean=False) is not None
|
||||||
|
|
||||||
|
|
||||||
|
def _module_path(filepath: Path) -> str:
|
||||||
|
"""Convert a filesystem path to a dotted module path."""
|
||||||
|
rel = filepath.relative_to(ROOT_DIR)
|
||||||
|
parts = list(rel.with_suffix("").parts)
|
||||||
|
if parts[-1] == "__init__":
|
||||||
|
parts = parts[:-1]
|
||||||
|
return ".".join(parts)
|
||||||
|
|
||||||
|
|
||||||
|
def _index_file(filepath: Path) -> dict[str, str]:
|
||||||
|
"""Extract documented public names from a Python file using AST parsing.
|
||||||
|
|
||||||
|
Only classes and functions with docstrings are included, since
|
||||||
|
mkdocstrings won't generate a page for undocumented symbols.
|
||||||
|
"""
|
||||||
|
names: dict[str, str] = {}
|
||||||
|
try:
|
||||||
|
source = filepath.read_text(encoding="utf-8")
|
||||||
|
tree = ast.parse(source, filename=str(filepath))
|
||||||
|
except (SyntaxError, UnicodeDecodeError):
|
||||||
|
return names
|
||||||
|
|
||||||
|
module = _module_path(filepath)
|
||||||
|
|
||||||
|
for node in ast.iter_child_nodes(tree):
|
||||||
|
if (
|
||||||
|
# Class definitions (with docstring)
|
||||||
|
isinstance(node, ast.ClassDef)
|
||||||
|
and not node.name.startswith("_")
|
||||||
|
and _has_docstring(node)
|
||||||
|
) or (
|
||||||
|
# Function definitions (with docstring, only uppercase/CamelCase)
|
||||||
|
isinstance(node, ast.FunctionDef | ast.AsyncFunctionDef)
|
||||||
|
and not node.name.startswith("_")
|
||||||
|
and node.name[0].isupper()
|
||||||
|
and _has_docstring(node)
|
||||||
|
):
|
||||||
|
names[node.name] = f"{module}.{node.name}"
|
||||||
|
|
||||||
|
return names
|
||||||
|
|
||||||
|
|
||||||
|
def _build_index() -> dict[str, str]:
|
||||||
|
"""Walk the vllm package and build a name -> qualified path index."""
|
||||||
|
index: dict[str, str] = {}
|
||||||
|
# Track conflicts: if multiple modules define the same name,
|
||||||
|
# prefer shallower modules (more likely to be the public API).
|
||||||
|
depth: dict[str, int] = {}
|
||||||
|
|
||||||
|
for filepath in sorted(VLLM_DIR.rglob("*.py")):
|
||||||
|
# Skip internal/private modules
|
||||||
|
if any(part.startswith("_") and part != "__init__" for part in filepath.parts):
|
||||||
|
continue
|
||||||
|
# Skip third-party vendored code
|
||||||
|
rel = filepath.relative_to(VLLM_DIR)
|
||||||
|
if rel.parts and rel.parts[0] in ("third_party", "vllm_flash_attn"):
|
||||||
|
continue
|
||||||
|
|
||||||
|
module_depth = len(filepath.relative_to(ROOT_DIR).parts)
|
||||||
|
file_names = _index_file(filepath)
|
||||||
|
|
||||||
|
for name, qualified in file_names.items():
|
||||||
|
if name not in index or module_depth < depth[name]:
|
||||||
|
index[name] = qualified
|
||||||
|
depth[name] = module_depth
|
||||||
|
|
||||||
|
return index
|
||||||
|
|
||||||
|
|
||||||
|
def on_startup(*, command: str, dirty: bool) -> None:
|
||||||
|
"""Build the name index once at startup."""
|
||||||
|
global _name_index
|
||||||
|
_name_index = _build_index()
|
||||||
|
logger.info("autoref_code: indexed %d names from vllm/", len(_name_index))
|
||||||
|
|
||||||
|
|
||||||
|
def on_page_markdown(
|
||||||
|
markdown: str, *, page: Page, config: MkDocsConfig, files: Files
|
||||||
|
) -> str:
|
||||||
|
"""Replace inline code references with autoref links."""
|
||||||
|
if not _name_index:
|
||||||
|
return markdown
|
||||||
|
|
||||||
|
# Skip API reference pages to avoid circular/redundant links.
|
||||||
|
if page.file.src_path.startswith("api/"):
|
||||||
|
return markdown
|
||||||
|
|
||||||
|
# Step 1: Mask fenced code blocks so we don't touch code inside them.
|
||||||
|
masks: list[str] = []
|
||||||
|
|
||||||
|
def _mask_block(match: re.Match) -> str:
|
||||||
|
masks.append(match.group(0))
|
||||||
|
return f"\ue000CODEBLOCK{len(masks) - 1}\ue000"
|
||||||
|
|
||||||
|
masked = _FENCED_BLOCK.sub(_mask_block, markdown)
|
||||||
|
|
||||||
|
# Step 2: Replace inline code references.
|
||||||
|
def _replace(match: re.Match) -> str:
|
||||||
|
name = match.group("name")
|
||||||
|
qualified = _name_index.get(name)
|
||||||
|
if qualified is None:
|
||||||
|
return match.group(0)
|
||||||
|
logger.debug("autoref_code: linking `%s` to [%s]", name, qualified)
|
||||||
|
return f"[`{name}`][{qualified}]"
|
||||||
|
|
||||||
|
result = _INLINE_CODE.sub(_replace, masked)
|
||||||
|
|
||||||
|
# Step 3: Restore masked code blocks.
|
||||||
|
result = re.sub(
|
||||||
|
r"\ue000CODEBLOCK(\d+)\ue000", lambda m: masks[int(m.group(1))], result
|
||||||
|
)
|
||||||
|
return result
|
||||||
@@ -54,6 +54,7 @@ hooks:
|
|||||||
- docs/mkdocs/hooks/generate_argparse.py
|
- docs/mkdocs/hooks/generate_argparse.py
|
||||||
- docs/mkdocs/hooks/generate_metrics.py
|
- docs/mkdocs/hooks/generate_metrics.py
|
||||||
- docs/mkdocs/hooks/url_schemes.py
|
- docs/mkdocs/hooks/url_schemes.py
|
||||||
|
- docs/mkdocs/hooks/autoref_code.py
|
||||||
|
|
||||||
plugins:
|
plugins:
|
||||||
- meta
|
- meta
|
||||||
|
|||||||
Reference in New Issue
Block a user