[Core] Add fault tolerance for RayTokenizerGroupPool (#5748)

This commit is contained in:
Antoni Baum
2024-06-25 10:15:10 -07:00
committed by GitHub
parent 7b99314301
commit 67882dbb44
5 changed files with 194 additions and 23 deletions

View File

@@ -1013,6 +1013,8 @@ class LLMEngine:
return self.model_executor.pin_lora(lora_id)
def check_health(self) -> None:
if self.tokenizer:
self.tokenizer.check_health()
self.model_executor.check_health()
def is_tracing_enabled(self) -> bool: