[Doc] Compatibility matrix for mutual exclusive features (#8512)

Signed-off-by: Wallas Santos <wallashss@ibm.com>
2024-10-11 15:18:50 -03:00
parent 1a1823871d
commit 8baf85e4e9
13 changed files with 467 additions and 0 deletions
--- a/vllm/config.py
+++ b/vllm/config.py
@@ -359,6 +359,8 @@ class ModelConfig:
            self.use_async_output_proc = False
            return

+        # Reminder: Please update docs/source/serving/compatibility_matrix.rst
+        # If the feature combo become valid
        if device_config.device_type not in ("cuda", "tpu"):
            logger.warning(
                "Async output processing is only supported for CUDA or TPU. "
@@ -372,6 +374,8 @@ class ModelConfig:
            self.use_async_output_proc = False
            return

+        # Reminder: Please update docs/source/serving/compatibility_matrix.rst
+        # If the feature combo become valid
        if device_config.device_type == "cuda" and self.enforce_eager:
            logger.warning(
                "To see benefits of async output processing, enable CUDA "
@@ -385,6 +389,8 @@ class ModelConfig:
        if self.embedding_mode:
            self.use_async_output_proc = False

+        # Reminder: Please update docs/source/serving/compatibility_matrix.rst
+        # If the feature combo become valid
        if speculative_config:
            logger.warning("Async output processing is not supported with"
                           " speculative decoding currently.")
@@ -1200,6 +1206,8 @@ class SpeculativeConfig:
                             "speculative decoding is > 1, but got "
                             f"{speculative_disable_by_batch_size=}")

+        # Reminder: Please update docs/source/serving/compatibility_matrix.rst
+        # If the feature combo become valid
        if enable_chunked_prefill:
            raise ValueError(
                "Speculative decoding and chunked prefill are "
@@ -1561,6 +1569,8 @@ class LoRAConfig:
                           model_config.quantization)

    def verify_with_scheduler_config(self, scheduler_config: SchedulerConfig):
+        # Reminder: Please update docs/source/serving/compatibility_matrix.rst
+        # If the feature combo become valid
        if scheduler_config.chunked_prefill_enabled:
            raise ValueError("LoRA is not supported with chunked prefill yet.")