# SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project """ MkDocs hook to automatically convert inline code references to API doc links. For example, `WeightTransferConfig` becomes [`WeightTransferConfig`][vllm.config.WeightTransferConfig] This works with the `autorefs` plugin to create clickable cross-references to API documentation pages generated by `mkdocstrings`. The hook builds an index of all documented public Python names (classes and functions with docstrings) from the vllm package at startup using AST parsing, then substitutes matching inline code spans on each page. Names without docstrings are excluded because mkdocstrings will not generate a page for them. """ import ast import logging from pathlib import Path import regex as re from mkdocs.config.defaults import MkDocsConfig from mkdocs.structure.files import Files from mkdocs.structure.pages import Page logger = logging.getLogger("mkdocs") ROOT_DIR = Path(__file__).parent.parent.parent.parent.resolve() VLLM_DIR = ROOT_DIR / "vllm" # Maps short name -> qualified name (e.g. "ModelConfig" -> "vllm.config.ModelConfig") _name_index: dict[str, str] = {} # Fenced code block pattern (``` or ~~~, with optional language specifier). _FENCED_BLOCK = re.compile( r"(?:^|\n)(?P`{3,}|~{3,})[^\n]*\n.*?(?:\n(?P=fence))", re.DOTALL ) # Inline code that is NOT already part of a markdown link. # Matches `Name` but not [`Name`] and not [`Name`][...] or [`Name`](...). _INLINE_CODE = re.compile( r"(?[A-Za-z0-9_]*)`" # `UpperCamelCase` or `UPPER_SNAKE` r"(?!\])" # not followed by ] ) def _has_docstring(node: ast.AST) -> bool: """Check if a class or function node has a docstring.""" if not isinstance(node, ast.ClassDef | ast.FunctionDef | ast.AsyncFunctionDef): return False return ast.get_docstring(node, clean=False) is not None def _module_path(filepath: Path) -> str: """Convert a filesystem path to a dotted module path.""" rel = filepath.relative_to(ROOT_DIR) parts = list(rel.with_suffix("").parts) if parts[-1] == "__init__": parts = parts[:-1] return ".".join(parts) def _index_file(filepath: Path) -> dict[str, str]: """Extract documented public names from a Python file using AST parsing. Only classes and functions with docstrings are included, since mkdocstrings won't generate a page for undocumented symbols. """ names: dict[str, str] = {} try: source = filepath.read_text(encoding="utf-8") tree = ast.parse(source, filename=str(filepath)) except (SyntaxError, UnicodeDecodeError): return names module = _module_path(filepath) for node in ast.iter_child_nodes(tree): if ( # Class definitions (with docstring) isinstance(node, ast.ClassDef) and not node.name.startswith("_") and _has_docstring(node) ) or ( # Function definitions (with docstring, only uppercase/CamelCase) isinstance(node, ast.FunctionDef | ast.AsyncFunctionDef) and not node.name.startswith("_") and node.name[0].isupper() and _has_docstring(node) ): names[node.name] = f"{module}.{node.name}" return names def _build_index() -> dict[str, str]: """Walk the vllm package and build a name -> qualified path index.""" index: dict[str, str] = {} # Track conflicts: if multiple modules define the same name, # prefer shallower modules (more likely to be the public API). depth: dict[str, int] = {} for filepath in sorted(VLLM_DIR.rglob("*.py")): # Skip internal/private modules if any(part.startswith("_") and part != "__init__" for part in filepath.parts): continue # Skip third-party vendored code rel = filepath.relative_to(VLLM_DIR) if rel.parts and rel.parts[0] in ("third_party", "vllm_flash_attn"): continue module_depth = len(filepath.relative_to(ROOT_DIR).parts) file_names = _index_file(filepath) for name, qualified in file_names.items(): if name not in index or module_depth < depth[name]: index[name] = qualified depth[name] = module_depth return index def on_startup(*, command: str, dirty: bool) -> None: """Build the name index once at startup.""" global _name_index _name_index = _build_index() logger.info("autoref_code: indexed %d names from vllm/", len(_name_index)) def on_page_markdown( markdown: str, *, page: Page, config: MkDocsConfig, files: Files ) -> str: """Replace inline code references with autoref links.""" if not _name_index: return markdown # Skip API reference pages to avoid circular/redundant links. if page.file.src_path.startswith("api/"): return markdown # Step 1: Mask fenced code blocks so we don't touch code inside them. masks: list[str] = [] def _mask_block(match: re.Match) -> str: masks.append(match.group(0)) return f"\ue000CODEBLOCK{len(masks) - 1}\ue000" masked = _FENCED_BLOCK.sub(_mask_block, markdown) # Step 2: Replace inline code references. def _replace(match: re.Match) -> str: name = match.group("name") qualified = _name_index.get(name) if qualified is None: return match.group(0) logger.debug("autoref_code: linking `%s` to [%s]", name, qualified) return f"[`{name}`][{qualified}]" result = _INLINE_CODE.sub(_replace, masked) # Step 3: Restore masked code blocks. result = re.sub( r"\ue000CODEBLOCK(\d+)\ue000", lambda m: masks[int(m.group(1))], result ) return result