Automatically add links to API docs for matching strings in docs (#37434)
Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
167
docs/mkdocs/hooks/autoref_code.py
Normal file
167
docs/mkdocs/hooks/autoref_code.py
Normal file
@@ -0,0 +1,167 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
"""
|
||||
MkDocs hook to automatically convert inline code references to API doc links.
|
||||
|
||||
For example, `WeightTransferConfig` becomes
|
||||
[`WeightTransferConfig`][vllm.config.WeightTransferConfig]
|
||||
|
||||
This works with the `autorefs` plugin to create clickable cross-references
|
||||
to API documentation pages generated by `mkdocstrings`.
|
||||
|
||||
The hook builds an index of all documented public Python names (classes and
|
||||
functions with docstrings) from the vllm package at startup using AST parsing,
|
||||
then substitutes matching inline code spans on each page. Names without
|
||||
docstrings are excluded because mkdocstrings will not generate a page for them.
|
||||
"""
|
||||
|
||||
import ast
|
||||
import logging
|
||||
from pathlib import Path
|
||||
|
||||
import regex as re
|
||||
from mkdocs.config.defaults import MkDocsConfig
|
||||
from mkdocs.structure.files import Files
|
||||
from mkdocs.structure.pages import Page
|
||||
|
||||
logger = logging.getLogger("mkdocs")
|
||||
|
||||
ROOT_DIR = Path(__file__).parent.parent.parent.parent.resolve()
|
||||
VLLM_DIR = ROOT_DIR / "vllm"
|
||||
|
||||
# Maps short name -> qualified name (e.g. "ModelConfig" -> "vllm.config.ModelConfig")
|
||||
_name_index: dict[str, str] = {}
|
||||
|
||||
# Fenced code block pattern (``` or ~~~, with optional language specifier).
|
||||
_FENCED_BLOCK = re.compile(
|
||||
r"(?:^|\n)(?P<fence>`{3,}|~{3,})[^\n]*\n.*?(?:\n(?P=fence))", re.DOTALL
|
||||
)
|
||||
|
||||
# Inline code that is NOT already part of a markdown link.
|
||||
# Matches `Name` but not [`Name`] and not [`Name`][...] or [`Name`](...).
|
||||
_INLINE_CODE = re.compile(
|
||||
r"(?<!\[)" # not preceded by [
|
||||
r"`(?P<name>[A-Za-z0-9_]*)`" # `UpperCamelCase` or `UPPER_SNAKE`
|
||||
r"(?!\])" # not followed by ]
|
||||
)
|
||||
|
||||
|
||||
def _has_docstring(node: ast.AST) -> bool:
|
||||
"""Check if a class or function node has a docstring."""
|
||||
if not isinstance(node, ast.ClassDef | ast.FunctionDef | ast.AsyncFunctionDef):
|
||||
return False
|
||||
return ast.get_docstring(node, clean=False) is not None
|
||||
|
||||
|
||||
def _module_path(filepath: Path) -> str:
|
||||
"""Convert a filesystem path to a dotted module path."""
|
||||
rel = filepath.relative_to(ROOT_DIR)
|
||||
parts = list(rel.with_suffix("").parts)
|
||||
if parts[-1] == "__init__":
|
||||
parts = parts[:-1]
|
||||
return ".".join(parts)
|
||||
|
||||
|
||||
def _index_file(filepath: Path) -> dict[str, str]:
|
||||
"""Extract documented public names from a Python file using AST parsing.
|
||||
|
||||
Only classes and functions with docstrings are included, since
|
||||
mkdocstrings won't generate a page for undocumented symbols.
|
||||
"""
|
||||
names: dict[str, str] = {}
|
||||
try:
|
||||
source = filepath.read_text(encoding="utf-8")
|
||||
tree = ast.parse(source, filename=str(filepath))
|
||||
except (SyntaxError, UnicodeDecodeError):
|
||||
return names
|
||||
|
||||
module = _module_path(filepath)
|
||||
|
||||
for node in ast.iter_child_nodes(tree):
|
||||
if (
|
||||
# Class definitions (with docstring)
|
||||
isinstance(node, ast.ClassDef)
|
||||
and not node.name.startswith("_")
|
||||
and _has_docstring(node)
|
||||
) or (
|
||||
# Function definitions (with docstring, only uppercase/CamelCase)
|
||||
isinstance(node, ast.FunctionDef | ast.AsyncFunctionDef)
|
||||
and not node.name.startswith("_")
|
||||
and node.name[0].isupper()
|
||||
and _has_docstring(node)
|
||||
):
|
||||
names[node.name] = f"{module}.{node.name}"
|
||||
|
||||
return names
|
||||
|
||||
|
||||
def _build_index() -> dict[str, str]:
|
||||
"""Walk the vllm package and build a name -> qualified path index."""
|
||||
index: dict[str, str] = {}
|
||||
# Track conflicts: if multiple modules define the same name,
|
||||
# prefer shallower modules (more likely to be the public API).
|
||||
depth: dict[str, int] = {}
|
||||
|
||||
for filepath in sorted(VLLM_DIR.rglob("*.py")):
|
||||
# Skip internal/private modules
|
||||
if any(part.startswith("_") and part != "__init__" for part in filepath.parts):
|
||||
continue
|
||||
# Skip third-party vendored code
|
||||
rel = filepath.relative_to(VLLM_DIR)
|
||||
if rel.parts and rel.parts[0] in ("third_party", "vllm_flash_attn"):
|
||||
continue
|
||||
|
||||
module_depth = len(filepath.relative_to(ROOT_DIR).parts)
|
||||
file_names = _index_file(filepath)
|
||||
|
||||
for name, qualified in file_names.items():
|
||||
if name not in index or module_depth < depth[name]:
|
||||
index[name] = qualified
|
||||
depth[name] = module_depth
|
||||
|
||||
return index
|
||||
|
||||
|
||||
def on_startup(*, command: str, dirty: bool) -> None:
|
||||
"""Build the name index once at startup."""
|
||||
global _name_index
|
||||
_name_index = _build_index()
|
||||
logger.info("autoref_code: indexed %d names from vllm/", len(_name_index))
|
||||
|
||||
|
||||
def on_page_markdown(
|
||||
markdown: str, *, page: Page, config: MkDocsConfig, files: Files
|
||||
) -> str:
|
||||
"""Replace inline code references with autoref links."""
|
||||
if not _name_index:
|
||||
return markdown
|
||||
|
||||
# Skip API reference pages to avoid circular/redundant links.
|
||||
if page.file.src_path.startswith("api/"):
|
||||
return markdown
|
||||
|
||||
# Step 1: Mask fenced code blocks so we don't touch code inside them.
|
||||
masks: list[str] = []
|
||||
|
||||
def _mask_block(match: re.Match) -> str:
|
||||
masks.append(match.group(0))
|
||||
return f"\ue000CODEBLOCK{len(masks) - 1}\ue000"
|
||||
|
||||
masked = _FENCED_BLOCK.sub(_mask_block, markdown)
|
||||
|
||||
# Step 2: Replace inline code references.
|
||||
def _replace(match: re.Match) -> str:
|
||||
name = match.group("name")
|
||||
qualified = _name_index.get(name)
|
||||
if qualified is None:
|
||||
return match.group(0)
|
||||
logger.debug("autoref_code: linking `%s` to [%s]", name, qualified)
|
||||
return f"[`{name}`][{qualified}]"
|
||||
|
||||
result = _INLINE_CODE.sub(_replace, masked)
|
||||
|
||||
# Step 3: Restore masked code blocks.
|
||||
result = re.sub(
|
||||
r"\ue000CODEBLOCK(\d+)\ue000", lambda m: masks[int(m.group(1))], result
|
||||
)
|
||||
return result
|
||||
@@ -54,6 +54,7 @@ hooks:
|
||||
- docs/mkdocs/hooks/generate_argparse.py
|
||||
- docs/mkdocs/hooks/generate_metrics.py
|
||||
- docs/mkdocs/hooks/url_schemes.py
|
||||
- docs/mkdocs/hooks/autoref_code.py
|
||||
|
||||
plugins:
|
||||
- meta
|
||||
|
||||
Reference in New Issue
Block a user