[Doc] Create a new "Usage" section (#10827)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
2024-12-05 11:19:35 +08:00
parent 8d370e91cb
commit aa39a8e175
25 changed files with 218 additions and 125 deletions
--- a/vllm/config.py
+++ b/vllm/config.py
@@ -509,7 +509,7 @@ class ModelConfig:
            self.use_async_output_proc = False
            return

-        # Reminder: Please update docs/source/serving/compatibility_matrix.rst
+        # Reminder: Please update docs/source/usage/compatibility_matrix.rst
        # If the feature combo become valid
        if device_config.device_type not in ("cuda", "tpu", "xpu", "hpu"):
            logger.warning(
@@ -525,7 +525,7 @@ class ModelConfig:
            self.use_async_output_proc = False
            return

-        # Reminder: Please update docs/source/serving/compatibility_matrix.rst
+        # Reminder: Please update docs/source/usage/compatibility_matrix.rst
        # If the feature combo become valid
        if device_config.device_type == "cuda" and self.enforce_eager:
            logger.warning(
@@ -540,7 +540,7 @@ class ModelConfig:
        if self.task == "embedding":
            self.use_async_output_proc = False

-        # Reminder: Please update docs/source/serving/compatibility_matrix.rst
+        # Reminder: Please update docs/source/usage/compatibility_matrix.rst
        # If the feature combo become valid
        if speculative_config:
            logger.warning("Async output processing is not supported with"
@@ -1704,7 +1704,7 @@ class LoRAConfig:
                           model_config.quantization)

    def verify_with_scheduler_config(self, scheduler_config: SchedulerConfig):
-        # Reminder: Please update docs/source/serving/compatibility_matrix.rst
+        # Reminder: Please update docs/source/usage/compatibility_matrix.rst
        # If the feature combo become valid
        if scheduler_config.chunked_prefill_enabled:
            raise ValueError("LoRA is not supported with chunked prefill yet.")