[Docs] Convert rST to MyST (Markdown) (#11145)

Signed-off-by: Rafael Vasquez <rafvasq21@gmail.com>
2024-12-23 17:35:38 -05:00
parent 94d545a1a1
commit 32aa2059ad
167 changed files with 7863 additions and 8131 deletions
--- a/vllm/attention/backends/rocm_flash_attn.py
+++ b/vllm/attention/backends/rocm_flash_attn.py
@@ -430,7 +430,7 @@ class ROCmFlashAttentionImpl(AttentionImpl):
        Returns:
            shape = [num_tokens, num_heads * head_size]
        """
-        # Reminder: Please update docs/source/usage/compatibility_matrix.rst
+        # Reminder: Please update docs/source/usage/compatibility_matrix.md
        # If the feature combo become valid
        if attn_type != AttentionType.DECODER:
            raise NotImplementedError("Encoder self-attention and "
--- a/vllm/config.py
+++ b/vllm/config.py
@@ -638,7 +638,7 @@ class ModelConfig:
            self.use_async_output_proc = False
            return

-        # Reminder: Please update docs/source/usage/compatibility_matrix.rst
+        # Reminder: Please update docs/source/usage/compatibility_matrix.md
        # If the feature combo become valid
        if not current_platform.is_async_output_supported(self.enforce_eager):
            logger.warning(
@@ -658,7 +658,7 @@ class ModelConfig:
        if self.runner_type == "pooling":
            self.use_async_output_proc = False

-        # Reminder: Please update docs/source/usage/compatibility_matrix.rst
+        # Reminder: Please update docs/source/usage/compatibility_matrix.md
        # If the feature combo become valid
        if speculative_config:
            logger.warning("Async output processing is not supported with"
@@ -2058,7 +2058,7 @@ class LoRAConfig:
                           model_config.quantization)

    def verify_with_scheduler_config(self, scheduler_config: SchedulerConfig):
-        # Reminder: Please update docs/source/usage/compatibility_matrix.rst
+        # Reminder: Please update docs/source/usage/compatibility_matrix.md
        # If the feature combo become valid
        if scheduler_config.chunked_prefill_enabled:
            logger.warning("LoRA with chunked prefill is still experimental "
--- a/vllm/engine/arg_utils.py
+++ b/vllm/engine/arg_utils.py
@@ -1148,7 +1148,7 @@ class EngineArgs:
            disable_logprobs=self.disable_logprobs_during_spec_decoding,
        )

-        # Reminder: Please update docs/source/usage/compatibility_matrix.rst
+        # Reminder: Please update docs/source/usage/compatibility_matrix.md
        # If the feature combo become valid
        if self.num_scheduler_steps > 1:
            if speculative_config is not None:
--- a/vllm/engine/llm_engine.py
+++ b/vllm/engine/llm_engine.py
@@ -133,7 +133,7 @@ class LLMEngine:
    and the :class:`AsyncLLMEngine` class wraps this class for online serving.

    The config arguments are derived from :class:`~vllm.EngineArgs`. (See
-    :ref:`engine_args`)
+    :ref:`engine-args`)

    Args:
        model_config: The configuration related to the LLM model.
--- a/vllm/engine/output_processor/multi_step.py
+++ b/vllm/engine/output_processor/multi_step.py
@@ -65,7 +65,7 @@ class MultiStepOutputProcessor(SequenceGroupOutputProcessor):
    @staticmethod
    @functools.lru_cache
    def _log_prompt_logprob_unsupported_warning_once():
-        # Reminder: Please update docs/source/usage/compatibility_matrix.rst
+        # Reminder: Please update docs/source/usage/compatibility_matrix.md
        # If the feature combo become valid
        logger.warning(
            "Prompt logprob is not supported by multi step workers. "
--- a/vllm/entrypoints/llm.py
+++ b/vllm/entrypoints/llm.py
@@ -115,7 +115,7 @@ class LLM:
            integer, it is used as the level of compilation optimization. If it
            is a dictionary, it can specify the full compilation configuration.
        **kwargs: Arguments for :class:`~vllm.EngineArgs`. (See
-            :ref:`engine_args`)
+            :ref:`engine-args`)

    Note:
        This class is intended to be used for offline inference. For online
--- a/vllm/executor/cpu_executor.py
+++ b/vllm/executor/cpu_executor.py
@@ -22,7 +22,7 @@ class CPUExecutor(ExecutorBase):

    def _init_executor(self) -> None:
        assert self.device_config.device_type == "cpu"
-        # Reminder: Please update docs/source/usage/compatibility_matrix.rst
+        # Reminder: Please update docs/source/usage/compatibility_matrix.md
        # If the feature combo become valid
        assert self.lora_config is None, "cpu backend doesn't support LoRA"

--- a/vllm/inputs/init.py
+++ b/vllm/inputs/init.py
@@ -13,7 +13,7 @@ The global :class:`~InputRegistry` which is used by :class:`~vllm.LLMEngine`
 to dispatch data processing according to the target model.

 See also:
-    :ref:`input_processing_pipeline`
+    :ref:`input-processing-pipeline`
 """

 __all__ = [
--- a/vllm/inputs/registry.py
+++ b/vllm/inputs/registry.py
@@ -314,7 +314,7 @@ class InputRegistry:
        The model is identified by ``model_config``.

        See also:
-            :ref:`enabling_multimodal_inputs`
+            :ref:`enabling-multimodal-inputs`

        Note:
            This should be called after
@@ -391,7 +391,7 @@ class InputRegistry:
        happens before :meth:`~vllm.multimodal.MultiModalRegistry.map_input`.

        See also:
-            :ref:`input_processing_pipeline`
+            :ref:`input-processing-pipeline`
        """

        def wrapper(model_cls: N) -> N:
@@ -435,7 +435,7 @@ class InputRegistry:
        The model is identified by ``model_config``.

        See also:
-            :ref:`input_processing_pipeline`
+            :ref:`input-processing-pipeline`
        """
        # Avoid circular import
        from vllm.model_executor.model_loader import get_model_architecture
--- a/vllm/multimodal/init.py
+++ b/vllm/multimodal/init.py
@@ -11,7 +11,7 @@ The global :class:`~MultiModalRegistry` is used by model runners to
 dispatch data processing according to its modality and the target model.

 See also:
-    :ref:`input_processing_pipeline`
+    :ref:`input-processing-pipeline`
 """

 __all__ = [
--- a/vllm/multimodal/base.py
+++ b/vllm/multimodal/base.py
@@ -50,7 +50,7 @@ class MultiModalPlugin(ABC):
    (i.e., the modality of the data).

    See also:
-        :ref:`adding_multimodal_plugin`
+        :ref:`adding-multimodal-plugin`
    """

    def __init__(self) -> None:
@@ -94,8 +94,8 @@ class MultiModalPlugin(ABC):
        If `None` is provided, then the default input mapper is used instead.

        See also:
-            - :ref:`input_processing_pipeline`
-            - :ref:`enabling_multimodal_inputs`
+            - :ref:`input-processing-pipeline`
+            - :ref:`enabling-multimodal-inputs`
        """

        def wrapper(model_cls: N) -> N:
@@ -130,8 +130,8 @@ class MultiModalPlugin(ABC):
            TypeError: If the data type is not supported.

        See also:
-            - :ref:`input_processing_pipeline`
-            - :ref:`enabling_multimodal_inputs`
+            - :ref:`input-processing-pipeline`
+            - :ref:`enabling-multimodal-inputs`
        """

        # Avoid circular import
@@ -190,7 +190,7 @@ class MultiModalPlugin(ABC):
        If `None` is provided, then the default calculation is used instead.

        See also:
-            :ref:`enabling_multimodal_inputs`
+            :ref:`enabling-multimodal-inputs`
        """

        def wrapper(model_cls: N) -> N:
@@ -222,7 +222,7 @@ class MultiModalPlugin(ABC):
        The model is identified by ``model_config``.

        See also:
-            :ref:`enabling_multimodal_inputs`
+            :ref:`enabling-multimodal-inputs`
        """
        # Avoid circular import
        from vllm.model_executor.model_loader import get_model_architecture
--- a/vllm/multimodal/inputs.py
+++ b/vllm/multimodal/inputs.py
@@ -75,7 +75,7 @@ Note:
    This dictionary also accepts modality keys defined outside
    :class:`MultiModalDataBuiltins` as long as a customized plugin
    is registered through the :class:`~vllm.multimodal.MULTIMODAL_REGISTRY`.
-    Read more on that :ref:`here <adding_multimodal_plugin>`.
+    Read more on that :ref:`here <adding-multimodal-plugin>`.
 """


--- a/vllm/multimodal/registry.py
+++ b/vllm/multimodal/registry.py
@@ -76,7 +76,7 @@ class MultiModalRegistry:
        Register a multi-modal plugin so it can be recognized by vLLM.

        See also:
-            :ref:`adding_multimodal_plugin`
+            :ref:`adding-multimodal-plugin`
        """
        data_type_key = plugin.get_data_key()

@@ -311,8 +311,8 @@ class MultiModalRegistry:
        invoked to transform the data into a dictionary of model inputs.

        See also:
-            - :ref:`input_processing_pipeline`
-            - :ref:`enabling_multimodal_inputs`
+            - :ref:`input-processing-pipeline`
+            - :ref:`enabling-multimodal-inputs`
        """

        def wrapper(model_cls: N) -> N:
--- a/vllm/platforms/cpu.py
+++ b/vllm/platforms/cpu.py
@@ -50,7 +50,7 @@ class CpuPlatform(Platform):
        import vllm.envs as envs
        from vllm.utils import GiB_bytes
        model_config = vllm_config.model_config
-        # Reminder: Please update docs/source/usage/compatibility_matrix.rst
+        # Reminder: Please update docs/source/usage/compatibility_matrix.md
        # If the feature combo become valid
        if not model_config.enforce_eager:
            logger.warning(
--- a/vllm/scripts.py
+++ b/vllm/scripts.py
@@ -165,7 +165,7 @@ def main():
        required=False,
        help="Read CLI options from a config file."
        "Must be a YAML with the following options:"
-        "https://docs.vllm.ai/en/latest/serving/openai_compatible_server.html#command-line-arguments-for-the-server"
+        "https://docs.vllm.ai/en/latest/serving/openai_compatible_server.html#cli-reference"
    )
    serve_parser = make_arg_parser(serve_parser)
    serve_parser.set_defaults(dispatch_function=serve)
--- a/vllm/spec_decode/spec_decode_worker.py
+++ b/vllm/spec_decode/spec_decode_worker.py
@@ -108,7 +108,7 @@ def create_spec_worker(*args, **kwargs) -> "SpecDecodeWorker":
    return spec_decode_worker


-# Reminder: Please update docs/source/usage/compatibility_matrix.rst
+# Reminder: Please update docs/source/usage/compatibility_matrix.md
 # If the feature combo become valid
 class SpecDecodeWorker(LoraNotSupportedWorkerBase):
    """Worker which implements speculative decoding.
--- a/vllm/utils.py
+++ b/vllm/utils.py
@@ -51,7 +51,7 @@ logger = init_logger(__name__)

 # Exception strings for non-implemented encoder/decoder scenarios

-# Reminder: Please update docs/source/usage/compatibility_matrix.rst
+# Reminder: Please update docs/source/usage/compatibility_matrix.md
 # If the feature combo become valid

 STR_NOT_IMPL_ENC_DEC_SWA = \
--- a/vllm/worker/multi_step_model_runner.py
+++ b/vllm/worker/multi_step_model_runner.py
@@ -821,7 +821,7 @@ def _pythonize_sampler_output(

    for sgdx, (seq_group,
               sample_result) in enumerate(zip(seq_groups, samples_list)):
-        # Reminder: Please update docs/source/usage/compatibility_matrix.rst
+        # Reminder: Please update docs/source/usage/compatibility_matrix.md
        # If the feature combo become valid
        # (Check for Guided Decoding)
        if seq_group.sampling_params.logits_processors:
--- a/vllm/worker/utils.py
+++ b/vllm/worker/utils.py
@@ -13,7 +13,7 @@ def assert_enc_dec_mr_supported_scenario(
    a supported scenario.
    '''

-    # Reminder: Please update docs/source/usage/compatibility_matrix.rst
+    # Reminder: Please update docs/source/usage/compatibility_matrix.md
    # If the feature combo become valid

    if enc_dec_mr.cache_config.enable_prefix_caching: