[Docs] Convert rST to MyST (Markdown) (#11145)

Signed-off-by: Rafael Vasquez <rafvasq21@gmail.com>
This commit is contained in:
Rafael Vasquez
2024-12-23 17:35:38 -05:00
committed by GitHub
parent 94d545a1a1
commit 32aa2059ad
167 changed files with 7863 additions and 8131 deletions

View File

@@ -430,7 +430,7 @@ class ROCmFlashAttentionImpl(AttentionImpl):
Returns:
shape = [num_tokens, num_heads * head_size]
"""
# Reminder: Please update docs/source/usage/compatibility_matrix.rst
# Reminder: Please update docs/source/usage/compatibility_matrix.md
# If the feature combo become valid
if attn_type != AttentionType.DECODER:
raise NotImplementedError("Encoder self-attention and "

View File

@@ -638,7 +638,7 @@ class ModelConfig:
self.use_async_output_proc = False
return
# Reminder: Please update docs/source/usage/compatibility_matrix.rst
# Reminder: Please update docs/source/usage/compatibility_matrix.md
# If the feature combo become valid
if not current_platform.is_async_output_supported(self.enforce_eager):
logger.warning(
@@ -658,7 +658,7 @@ class ModelConfig:
if self.runner_type == "pooling":
self.use_async_output_proc = False
# Reminder: Please update docs/source/usage/compatibility_matrix.rst
# Reminder: Please update docs/source/usage/compatibility_matrix.md
# If the feature combo become valid
if speculative_config:
logger.warning("Async output processing is not supported with"
@@ -2058,7 +2058,7 @@ class LoRAConfig:
model_config.quantization)
def verify_with_scheduler_config(self, scheduler_config: SchedulerConfig):
# Reminder: Please update docs/source/usage/compatibility_matrix.rst
# Reminder: Please update docs/source/usage/compatibility_matrix.md
# If the feature combo become valid
if scheduler_config.chunked_prefill_enabled:
logger.warning("LoRA with chunked prefill is still experimental "

View File

@@ -1148,7 +1148,7 @@ class EngineArgs:
disable_logprobs=self.disable_logprobs_during_spec_decoding,
)
# Reminder: Please update docs/source/usage/compatibility_matrix.rst
# Reminder: Please update docs/source/usage/compatibility_matrix.md
# If the feature combo become valid
if self.num_scheduler_steps > 1:
if speculative_config is not None:

View File

@@ -133,7 +133,7 @@ class LLMEngine:
and the :class:`AsyncLLMEngine` class wraps this class for online serving.
The config arguments are derived from :class:`~vllm.EngineArgs`. (See
:ref:`engine_args`)
:ref:`engine-args`)
Args:
model_config: The configuration related to the LLM model.

View File

@@ -65,7 +65,7 @@ class MultiStepOutputProcessor(SequenceGroupOutputProcessor):
@staticmethod
@functools.lru_cache
def _log_prompt_logprob_unsupported_warning_once():
# Reminder: Please update docs/source/usage/compatibility_matrix.rst
# Reminder: Please update docs/source/usage/compatibility_matrix.md
# If the feature combo become valid
logger.warning(
"Prompt logprob is not supported by multi step workers. "

View File

@@ -115,7 +115,7 @@ class LLM:
integer, it is used as the level of compilation optimization. If it
is a dictionary, it can specify the full compilation configuration.
**kwargs: Arguments for :class:`~vllm.EngineArgs`. (See
:ref:`engine_args`)
:ref:`engine-args`)
Note:
This class is intended to be used for offline inference. For online

View File

@@ -22,7 +22,7 @@ class CPUExecutor(ExecutorBase):
def _init_executor(self) -> None:
assert self.device_config.device_type == "cpu"
# Reminder: Please update docs/source/usage/compatibility_matrix.rst
# Reminder: Please update docs/source/usage/compatibility_matrix.md
# If the feature combo become valid
assert self.lora_config is None, "cpu backend doesn't support LoRA"

View File

@@ -13,7 +13,7 @@ The global :class:`~InputRegistry` which is used by :class:`~vllm.LLMEngine`
to dispatch data processing according to the target model.
See also:
:ref:`input_processing_pipeline`
:ref:`input-processing-pipeline`
"""
__all__ = [

View File

@@ -314,7 +314,7 @@ class InputRegistry:
The model is identified by ``model_config``.
See also:
:ref:`enabling_multimodal_inputs`
:ref:`enabling-multimodal-inputs`
Note:
This should be called after
@@ -391,7 +391,7 @@ class InputRegistry:
happens before :meth:`~vllm.multimodal.MultiModalRegistry.map_input`.
See also:
:ref:`input_processing_pipeline`
:ref:`input-processing-pipeline`
"""
def wrapper(model_cls: N) -> N:
@@ -435,7 +435,7 @@ class InputRegistry:
The model is identified by ``model_config``.
See also:
:ref:`input_processing_pipeline`
:ref:`input-processing-pipeline`
"""
# Avoid circular import
from vllm.model_executor.model_loader import get_model_architecture

View File

@@ -11,7 +11,7 @@ The global :class:`~MultiModalRegistry` is used by model runners to
dispatch data processing according to its modality and the target model.
See also:
:ref:`input_processing_pipeline`
:ref:`input-processing-pipeline`
"""
__all__ = [

View File

@@ -50,7 +50,7 @@ class MultiModalPlugin(ABC):
(i.e., the modality of the data).
See also:
:ref:`adding_multimodal_plugin`
:ref:`adding-multimodal-plugin`
"""
def __init__(self) -> None:
@@ -94,8 +94,8 @@ class MultiModalPlugin(ABC):
If `None` is provided, then the default input mapper is used instead.
See also:
- :ref:`input_processing_pipeline`
- :ref:`enabling_multimodal_inputs`
- :ref:`input-processing-pipeline`
- :ref:`enabling-multimodal-inputs`
"""
def wrapper(model_cls: N) -> N:
@@ -130,8 +130,8 @@ class MultiModalPlugin(ABC):
TypeError: If the data type is not supported.
See also:
- :ref:`input_processing_pipeline`
- :ref:`enabling_multimodal_inputs`
- :ref:`input-processing-pipeline`
- :ref:`enabling-multimodal-inputs`
"""
# Avoid circular import
@@ -190,7 +190,7 @@ class MultiModalPlugin(ABC):
If `None` is provided, then the default calculation is used instead.
See also:
:ref:`enabling_multimodal_inputs`
:ref:`enabling-multimodal-inputs`
"""
def wrapper(model_cls: N) -> N:
@@ -222,7 +222,7 @@ class MultiModalPlugin(ABC):
The model is identified by ``model_config``.
See also:
:ref:`enabling_multimodal_inputs`
:ref:`enabling-multimodal-inputs`
"""
# Avoid circular import
from vllm.model_executor.model_loader import get_model_architecture

View File

@@ -75,7 +75,7 @@ Note:
This dictionary also accepts modality keys defined outside
:class:`MultiModalDataBuiltins` as long as a customized plugin
is registered through the :class:`~vllm.multimodal.MULTIMODAL_REGISTRY`.
Read more on that :ref:`here <adding_multimodal_plugin>`.
Read more on that :ref:`here <adding-multimodal-plugin>`.
"""

View File

@@ -76,7 +76,7 @@ class MultiModalRegistry:
Register a multi-modal plugin so it can be recognized by vLLM.
See also:
:ref:`adding_multimodal_plugin`
:ref:`adding-multimodal-plugin`
"""
data_type_key = plugin.get_data_key()
@@ -311,8 +311,8 @@ class MultiModalRegistry:
invoked to transform the data into a dictionary of model inputs.
See also:
- :ref:`input_processing_pipeline`
- :ref:`enabling_multimodal_inputs`
- :ref:`input-processing-pipeline`
- :ref:`enabling-multimodal-inputs`
"""
def wrapper(model_cls: N) -> N:

View File

@@ -50,7 +50,7 @@ class CpuPlatform(Platform):
import vllm.envs as envs
from vllm.utils import GiB_bytes
model_config = vllm_config.model_config
# Reminder: Please update docs/source/usage/compatibility_matrix.rst
# Reminder: Please update docs/source/usage/compatibility_matrix.md
# If the feature combo become valid
if not model_config.enforce_eager:
logger.warning(

View File

@@ -165,7 +165,7 @@ def main():
required=False,
help="Read CLI options from a config file."
"Must be a YAML with the following options:"
"https://docs.vllm.ai/en/latest/serving/openai_compatible_server.html#command-line-arguments-for-the-server"
"https://docs.vllm.ai/en/latest/serving/openai_compatible_server.html#cli-reference"
)
serve_parser = make_arg_parser(serve_parser)
serve_parser.set_defaults(dispatch_function=serve)

View File

@@ -108,7 +108,7 @@ def create_spec_worker(*args, **kwargs) -> "SpecDecodeWorker":
return spec_decode_worker
# Reminder: Please update docs/source/usage/compatibility_matrix.rst
# Reminder: Please update docs/source/usage/compatibility_matrix.md
# If the feature combo become valid
class SpecDecodeWorker(LoraNotSupportedWorkerBase):
"""Worker which implements speculative decoding.

View File

@@ -51,7 +51,7 @@ logger = init_logger(__name__)
# Exception strings for non-implemented encoder/decoder scenarios
# Reminder: Please update docs/source/usage/compatibility_matrix.rst
# Reminder: Please update docs/source/usage/compatibility_matrix.md
# If the feature combo become valid
STR_NOT_IMPL_ENC_DEC_SWA = \

View File

@@ -821,7 +821,7 @@ def _pythonize_sampler_output(
for sgdx, (seq_group,
sample_result) in enumerate(zip(seq_groups, samples_list)):
# Reminder: Please update docs/source/usage/compatibility_matrix.rst
# Reminder: Please update docs/source/usage/compatibility_matrix.md
# If the feature combo become valid
# (Check for Guided Decoding)
if seq_group.sampling_params.logits_processors:

View File

@@ -13,7 +13,7 @@ def assert_enc_dec_mr_supported_scenario(
a supported scenario.
'''
# Reminder: Please update docs/source/usage/compatibility_matrix.rst
# Reminder: Please update docs/source/usage/compatibility_matrix.md
# If the feature combo become valid
if enc_dec_mr.cache_config.enable_prefix_caching: