RUN sed -i "s/if self\.kv_events_config is not None:/if self.kv_events_config is not None and self.kv_events_config.enable_kv_cache_events:/" /usr/local/lib/python3.12/dist-packages/vllm/config/vllm.py
# Patch LMCacheConnectorV1 to support HMA (Hybrid Mamba/Attention KV cache manager)
# This is required for hybrid models like Nemotron that use both Mamba and Attention layers.
# Without this patch, LMCacheConnectorV1 fails with:
# "Connector LMCacheConnectorV1 does not support HMA but HMA is enabled"