diff --git a/vllm/entrypoints/cli/launch.py b/vllm/entrypoints/cli/launch.py
index 6afa24353..cc9e467c4 100644
--- a/vllm/entrypoints/cli/launch.py
+++ b/vllm/entrypoints/cli/launch.py
@@ -116,6 +116,11 @@ async def run_launch_fastapi(args: argparse.Namespace) -> None:
     # 2. Build and serve the API server
     engine_args = AsyncEngineArgs.from_cli_args(args)
     model_config = engine_args.create_model_config()
+
+    # Render servers preprocess data only — no inference, no quantized kernels.
+    # Clear quantization so VllmConfig skips quant dtype/capability validation.
+    model_config.quantization = None
+
     vllm_config = VllmConfig(model_config=model_config)
     shutdown_task = await build_and_serve_renderer(
         vllm_config, listen_address, sock, args