[VLM] Limit multimodal input cache by memory (#14805)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
Cyrus Leung
2025-03-15 17:52:05 +08:00
committed by GitHub
parent 9ed6ee92d6
commit 3556a41434
13 changed files with 159 additions and 55 deletions

View File

@@ -48,7 +48,7 @@ def _test_processing_correctness(
tokenizer=cached_tokenizer_from_config(model_config),
)
# Ensure that it can fit all of the data
cache = ProcessingCache(capacity=1 << 30)
cache = ProcessingCache(capacity_gb=2048)
processing_info = factories.info(ctx)
supported_mm_limits = processing_info.get_supported_mm_limits()