diff --git a/docs/mkdocs/hooks/autoref_code.py b/docs/mkdocs/hooks/autoref_code.py new file mode 100644 index 000000000..647f74f20 --- /dev/null +++ b/docs/mkdocs/hooks/autoref_code.py @@ -0,0 +1,167 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project +""" +MkDocs hook to automatically convert inline code references to API doc links. + +For example, `WeightTransferConfig` becomes +[`WeightTransferConfig`][vllm.config.WeightTransferConfig] + +This works with the `autorefs` plugin to create clickable cross-references +to API documentation pages generated by `mkdocstrings`. + +The hook builds an index of all documented public Python names (classes and +functions with docstrings) from the vllm package at startup using AST parsing, +then substitutes matching inline code spans on each page. Names without +docstrings are excluded because mkdocstrings will not generate a page for them. +""" + +import ast +import logging +from pathlib import Path + +import regex as re +from mkdocs.config.defaults import MkDocsConfig +from mkdocs.structure.files import Files +from mkdocs.structure.pages import Page + +logger = logging.getLogger("mkdocs") + +ROOT_DIR = Path(__file__).parent.parent.parent.parent.resolve() +VLLM_DIR = ROOT_DIR / "vllm" + +# Maps short name -> qualified name (e.g. "ModelConfig" -> "vllm.config.ModelConfig") +_name_index: dict[str, str] = {} + +# Fenced code block pattern (``` or ~~~, with optional language specifier). +_FENCED_BLOCK = re.compile( + r"(?:^|\n)(?P`{3,}|~{3,})[^\n]*\n.*?(?:\n(?P=fence))", re.DOTALL +) + +# Inline code that is NOT already part of a markdown link. +# Matches `Name` but not [`Name`] and not [`Name`][...] or [`Name`](...). +_INLINE_CODE = re.compile( + r"(?[A-Za-z0-9_]*)`" # `UpperCamelCase` or `UPPER_SNAKE` + r"(?!\])" # not followed by ] +) + + +def _has_docstring(node: ast.AST) -> bool: + """Check if a class or function node has a docstring.""" + if not isinstance(node, ast.ClassDef | ast.FunctionDef | ast.AsyncFunctionDef): + return False + return ast.get_docstring(node, clean=False) is not None + + +def _module_path(filepath: Path) -> str: + """Convert a filesystem path to a dotted module path.""" + rel = filepath.relative_to(ROOT_DIR) + parts = list(rel.with_suffix("").parts) + if parts[-1] == "__init__": + parts = parts[:-1] + return ".".join(parts) + + +def _index_file(filepath: Path) -> dict[str, str]: + """Extract documented public names from a Python file using AST parsing. + + Only classes and functions with docstrings are included, since + mkdocstrings won't generate a page for undocumented symbols. + """ + names: dict[str, str] = {} + try: + source = filepath.read_text(encoding="utf-8") + tree = ast.parse(source, filename=str(filepath)) + except (SyntaxError, UnicodeDecodeError): + return names + + module = _module_path(filepath) + + for node in ast.iter_child_nodes(tree): + if ( + # Class definitions (with docstring) + isinstance(node, ast.ClassDef) + and not node.name.startswith("_") + and _has_docstring(node) + ) or ( + # Function definitions (with docstring, only uppercase/CamelCase) + isinstance(node, ast.FunctionDef | ast.AsyncFunctionDef) + and not node.name.startswith("_") + and node.name[0].isupper() + and _has_docstring(node) + ): + names[node.name] = f"{module}.{node.name}" + + return names + + +def _build_index() -> dict[str, str]: + """Walk the vllm package and build a name -> qualified path index.""" + index: dict[str, str] = {} + # Track conflicts: if multiple modules define the same name, + # prefer shallower modules (more likely to be the public API). + depth: dict[str, int] = {} + + for filepath in sorted(VLLM_DIR.rglob("*.py")): + # Skip internal/private modules + if any(part.startswith("_") and part != "__init__" for part in filepath.parts): + continue + # Skip third-party vendored code + rel = filepath.relative_to(VLLM_DIR) + if rel.parts and rel.parts[0] in ("third_party", "vllm_flash_attn"): + continue + + module_depth = len(filepath.relative_to(ROOT_DIR).parts) + file_names = _index_file(filepath) + + for name, qualified in file_names.items(): + if name not in index or module_depth < depth[name]: + index[name] = qualified + depth[name] = module_depth + + return index + + +def on_startup(*, command: str, dirty: bool) -> None: + """Build the name index once at startup.""" + global _name_index + _name_index = _build_index() + logger.info("autoref_code: indexed %d names from vllm/", len(_name_index)) + + +def on_page_markdown( + markdown: str, *, page: Page, config: MkDocsConfig, files: Files +) -> str: + """Replace inline code references with autoref links.""" + if not _name_index: + return markdown + + # Skip API reference pages to avoid circular/redundant links. + if page.file.src_path.startswith("api/"): + return markdown + + # Step 1: Mask fenced code blocks so we don't touch code inside them. + masks: list[str] = [] + + def _mask_block(match: re.Match) -> str: + masks.append(match.group(0)) + return f"\ue000CODEBLOCK{len(masks) - 1}\ue000" + + masked = _FENCED_BLOCK.sub(_mask_block, markdown) + + # Step 2: Replace inline code references. + def _replace(match: re.Match) -> str: + name = match.group("name") + qualified = _name_index.get(name) + if qualified is None: + return match.group(0) + logger.debug("autoref_code: linking `%s` to [%s]", name, qualified) + return f"[`{name}`][{qualified}]" + + result = _INLINE_CODE.sub(_replace, masked) + + # Step 3: Restore masked code blocks. + result = re.sub( + r"\ue000CODEBLOCK(\d+)\ue000", lambda m: masks[int(m.group(1))], result + ) + return result diff --git a/mkdocs.yaml b/mkdocs.yaml index e37ae9b87..4b06b31eb 100644 --- a/mkdocs.yaml +++ b/mkdocs.yaml @@ -54,6 +54,7 @@ hooks: - docs/mkdocs/hooks/generate_argparse.py - docs/mkdocs/hooks/generate_metrics.py - docs/mkdocs/hooks/url_schemes.py + - docs/mkdocs/hooks/autoref_code.py plugins: - meta