[Docs] Convert rST to MyST (Markdown) (#11145)
Signed-off-by: Rafael Vasquez <rafvasq21@gmail.com>
This commit is contained in:
@@ -430,7 +430,7 @@ class ROCmFlashAttentionImpl(AttentionImpl):
|
||||
Returns:
|
||||
shape = [num_tokens, num_heads * head_size]
|
||||
"""
|
||||
# Reminder: Please update docs/source/usage/compatibility_matrix.rst
|
||||
# Reminder: Please update docs/source/usage/compatibility_matrix.md
|
||||
# If the feature combo become valid
|
||||
if attn_type != AttentionType.DECODER:
|
||||
raise NotImplementedError("Encoder self-attention and "
|
||||
|
||||
@@ -638,7 +638,7 @@ class ModelConfig:
|
||||
self.use_async_output_proc = False
|
||||
return
|
||||
|
||||
# Reminder: Please update docs/source/usage/compatibility_matrix.rst
|
||||
# Reminder: Please update docs/source/usage/compatibility_matrix.md
|
||||
# If the feature combo become valid
|
||||
if not current_platform.is_async_output_supported(self.enforce_eager):
|
||||
logger.warning(
|
||||
@@ -658,7 +658,7 @@ class ModelConfig:
|
||||
if self.runner_type == "pooling":
|
||||
self.use_async_output_proc = False
|
||||
|
||||
# Reminder: Please update docs/source/usage/compatibility_matrix.rst
|
||||
# Reminder: Please update docs/source/usage/compatibility_matrix.md
|
||||
# If the feature combo become valid
|
||||
if speculative_config:
|
||||
logger.warning("Async output processing is not supported with"
|
||||
@@ -2058,7 +2058,7 @@ class LoRAConfig:
|
||||
model_config.quantization)
|
||||
|
||||
def verify_with_scheduler_config(self, scheduler_config: SchedulerConfig):
|
||||
# Reminder: Please update docs/source/usage/compatibility_matrix.rst
|
||||
# Reminder: Please update docs/source/usage/compatibility_matrix.md
|
||||
# If the feature combo become valid
|
||||
if scheduler_config.chunked_prefill_enabled:
|
||||
logger.warning("LoRA with chunked prefill is still experimental "
|
||||
|
||||
@@ -1148,7 +1148,7 @@ class EngineArgs:
|
||||
disable_logprobs=self.disable_logprobs_during_spec_decoding,
|
||||
)
|
||||
|
||||
# Reminder: Please update docs/source/usage/compatibility_matrix.rst
|
||||
# Reminder: Please update docs/source/usage/compatibility_matrix.md
|
||||
# If the feature combo become valid
|
||||
if self.num_scheduler_steps > 1:
|
||||
if speculative_config is not None:
|
||||
|
||||
@@ -133,7 +133,7 @@ class LLMEngine:
|
||||
and the :class:`AsyncLLMEngine` class wraps this class for online serving.
|
||||
|
||||
The config arguments are derived from :class:`~vllm.EngineArgs`. (See
|
||||
:ref:`engine_args`)
|
||||
:ref:`engine-args`)
|
||||
|
||||
Args:
|
||||
model_config: The configuration related to the LLM model.
|
||||
|
||||
@@ -65,7 +65,7 @@ class MultiStepOutputProcessor(SequenceGroupOutputProcessor):
|
||||
@staticmethod
|
||||
@functools.lru_cache
|
||||
def _log_prompt_logprob_unsupported_warning_once():
|
||||
# Reminder: Please update docs/source/usage/compatibility_matrix.rst
|
||||
# Reminder: Please update docs/source/usage/compatibility_matrix.md
|
||||
# If the feature combo become valid
|
||||
logger.warning(
|
||||
"Prompt logprob is not supported by multi step workers. "
|
||||
|
||||
@@ -115,7 +115,7 @@ class LLM:
|
||||
integer, it is used as the level of compilation optimization. If it
|
||||
is a dictionary, it can specify the full compilation configuration.
|
||||
**kwargs: Arguments for :class:`~vllm.EngineArgs`. (See
|
||||
:ref:`engine_args`)
|
||||
:ref:`engine-args`)
|
||||
|
||||
Note:
|
||||
This class is intended to be used for offline inference. For online
|
||||
|
||||
@@ -22,7 +22,7 @@ class CPUExecutor(ExecutorBase):
|
||||
|
||||
def _init_executor(self) -> None:
|
||||
assert self.device_config.device_type == "cpu"
|
||||
# Reminder: Please update docs/source/usage/compatibility_matrix.rst
|
||||
# Reminder: Please update docs/source/usage/compatibility_matrix.md
|
||||
# If the feature combo become valid
|
||||
assert self.lora_config is None, "cpu backend doesn't support LoRA"
|
||||
|
||||
|
||||
@@ -13,7 +13,7 @@ The global :class:`~InputRegistry` which is used by :class:`~vllm.LLMEngine`
|
||||
to dispatch data processing according to the target model.
|
||||
|
||||
See also:
|
||||
:ref:`input_processing_pipeline`
|
||||
:ref:`input-processing-pipeline`
|
||||
"""
|
||||
|
||||
__all__ = [
|
||||
|
||||
@@ -314,7 +314,7 @@ class InputRegistry:
|
||||
The model is identified by ``model_config``.
|
||||
|
||||
See also:
|
||||
:ref:`enabling_multimodal_inputs`
|
||||
:ref:`enabling-multimodal-inputs`
|
||||
|
||||
Note:
|
||||
This should be called after
|
||||
@@ -391,7 +391,7 @@ class InputRegistry:
|
||||
happens before :meth:`~vllm.multimodal.MultiModalRegistry.map_input`.
|
||||
|
||||
See also:
|
||||
:ref:`input_processing_pipeline`
|
||||
:ref:`input-processing-pipeline`
|
||||
"""
|
||||
|
||||
def wrapper(model_cls: N) -> N:
|
||||
@@ -435,7 +435,7 @@ class InputRegistry:
|
||||
The model is identified by ``model_config``.
|
||||
|
||||
See also:
|
||||
:ref:`input_processing_pipeline`
|
||||
:ref:`input-processing-pipeline`
|
||||
"""
|
||||
# Avoid circular import
|
||||
from vllm.model_executor.model_loader import get_model_architecture
|
||||
|
||||
@@ -11,7 +11,7 @@ The global :class:`~MultiModalRegistry` is used by model runners to
|
||||
dispatch data processing according to its modality and the target model.
|
||||
|
||||
See also:
|
||||
:ref:`input_processing_pipeline`
|
||||
:ref:`input-processing-pipeline`
|
||||
"""
|
||||
|
||||
__all__ = [
|
||||
|
||||
@@ -50,7 +50,7 @@ class MultiModalPlugin(ABC):
|
||||
(i.e., the modality of the data).
|
||||
|
||||
See also:
|
||||
:ref:`adding_multimodal_plugin`
|
||||
:ref:`adding-multimodal-plugin`
|
||||
"""
|
||||
|
||||
def __init__(self) -> None:
|
||||
@@ -94,8 +94,8 @@ class MultiModalPlugin(ABC):
|
||||
If `None` is provided, then the default input mapper is used instead.
|
||||
|
||||
See also:
|
||||
- :ref:`input_processing_pipeline`
|
||||
- :ref:`enabling_multimodal_inputs`
|
||||
- :ref:`input-processing-pipeline`
|
||||
- :ref:`enabling-multimodal-inputs`
|
||||
"""
|
||||
|
||||
def wrapper(model_cls: N) -> N:
|
||||
@@ -130,8 +130,8 @@ class MultiModalPlugin(ABC):
|
||||
TypeError: If the data type is not supported.
|
||||
|
||||
See also:
|
||||
- :ref:`input_processing_pipeline`
|
||||
- :ref:`enabling_multimodal_inputs`
|
||||
- :ref:`input-processing-pipeline`
|
||||
- :ref:`enabling-multimodal-inputs`
|
||||
"""
|
||||
|
||||
# Avoid circular import
|
||||
@@ -190,7 +190,7 @@ class MultiModalPlugin(ABC):
|
||||
If `None` is provided, then the default calculation is used instead.
|
||||
|
||||
See also:
|
||||
:ref:`enabling_multimodal_inputs`
|
||||
:ref:`enabling-multimodal-inputs`
|
||||
"""
|
||||
|
||||
def wrapper(model_cls: N) -> N:
|
||||
@@ -222,7 +222,7 @@ class MultiModalPlugin(ABC):
|
||||
The model is identified by ``model_config``.
|
||||
|
||||
See also:
|
||||
:ref:`enabling_multimodal_inputs`
|
||||
:ref:`enabling-multimodal-inputs`
|
||||
"""
|
||||
# Avoid circular import
|
||||
from vllm.model_executor.model_loader import get_model_architecture
|
||||
|
||||
@@ -75,7 +75,7 @@ Note:
|
||||
This dictionary also accepts modality keys defined outside
|
||||
:class:`MultiModalDataBuiltins` as long as a customized plugin
|
||||
is registered through the :class:`~vllm.multimodal.MULTIMODAL_REGISTRY`.
|
||||
Read more on that :ref:`here <adding_multimodal_plugin>`.
|
||||
Read more on that :ref:`here <adding-multimodal-plugin>`.
|
||||
"""
|
||||
|
||||
|
||||
|
||||
@@ -76,7 +76,7 @@ class MultiModalRegistry:
|
||||
Register a multi-modal plugin so it can be recognized by vLLM.
|
||||
|
||||
See also:
|
||||
:ref:`adding_multimodal_plugin`
|
||||
:ref:`adding-multimodal-plugin`
|
||||
"""
|
||||
data_type_key = plugin.get_data_key()
|
||||
|
||||
@@ -311,8 +311,8 @@ class MultiModalRegistry:
|
||||
invoked to transform the data into a dictionary of model inputs.
|
||||
|
||||
See also:
|
||||
- :ref:`input_processing_pipeline`
|
||||
- :ref:`enabling_multimodal_inputs`
|
||||
- :ref:`input-processing-pipeline`
|
||||
- :ref:`enabling-multimodal-inputs`
|
||||
"""
|
||||
|
||||
def wrapper(model_cls: N) -> N:
|
||||
|
||||
@@ -50,7 +50,7 @@ class CpuPlatform(Platform):
|
||||
import vllm.envs as envs
|
||||
from vllm.utils import GiB_bytes
|
||||
model_config = vllm_config.model_config
|
||||
# Reminder: Please update docs/source/usage/compatibility_matrix.rst
|
||||
# Reminder: Please update docs/source/usage/compatibility_matrix.md
|
||||
# If the feature combo become valid
|
||||
if not model_config.enforce_eager:
|
||||
logger.warning(
|
||||
|
||||
@@ -165,7 +165,7 @@ def main():
|
||||
required=False,
|
||||
help="Read CLI options from a config file."
|
||||
"Must be a YAML with the following options:"
|
||||
"https://docs.vllm.ai/en/latest/serving/openai_compatible_server.html#command-line-arguments-for-the-server"
|
||||
"https://docs.vllm.ai/en/latest/serving/openai_compatible_server.html#cli-reference"
|
||||
)
|
||||
serve_parser = make_arg_parser(serve_parser)
|
||||
serve_parser.set_defaults(dispatch_function=serve)
|
||||
|
||||
@@ -108,7 +108,7 @@ def create_spec_worker(*args, **kwargs) -> "SpecDecodeWorker":
|
||||
return spec_decode_worker
|
||||
|
||||
|
||||
# Reminder: Please update docs/source/usage/compatibility_matrix.rst
|
||||
# Reminder: Please update docs/source/usage/compatibility_matrix.md
|
||||
# If the feature combo become valid
|
||||
class SpecDecodeWorker(LoraNotSupportedWorkerBase):
|
||||
"""Worker which implements speculative decoding.
|
||||
|
||||
@@ -51,7 +51,7 @@ logger = init_logger(__name__)
|
||||
|
||||
# Exception strings for non-implemented encoder/decoder scenarios
|
||||
|
||||
# Reminder: Please update docs/source/usage/compatibility_matrix.rst
|
||||
# Reminder: Please update docs/source/usage/compatibility_matrix.md
|
||||
# If the feature combo become valid
|
||||
|
||||
STR_NOT_IMPL_ENC_DEC_SWA = \
|
||||
|
||||
@@ -821,7 +821,7 @@ def _pythonize_sampler_output(
|
||||
|
||||
for sgdx, (seq_group,
|
||||
sample_result) in enumerate(zip(seq_groups, samples_list)):
|
||||
# Reminder: Please update docs/source/usage/compatibility_matrix.rst
|
||||
# Reminder: Please update docs/source/usage/compatibility_matrix.md
|
||||
# If the feature combo become valid
|
||||
# (Check for Guided Decoding)
|
||||
if seq_group.sampling_params.logits_processors:
|
||||
|
||||
@@ -13,7 +13,7 @@ def assert_enc_dec_mr_supported_scenario(
|
||||
a supported scenario.
|
||||
'''
|
||||
|
||||
# Reminder: Please update docs/source/usage/compatibility_matrix.rst
|
||||
# Reminder: Please update docs/source/usage/compatibility_matrix.md
|
||||
# If the feature combo become valid
|
||||
|
||||
if enc_dec_mr.cache_config.enable_prefix_caching:
|
||||
|
||||
Reference in New Issue
Block a user