[V1] Support long_prefill_token_threshold in v1 scheduler (#15419)

Signed-off-by: Lu Fang <lufang@fb.com>
This commit is contained in:
Lu Fang
2025-03-25 14:22:26 -07:00
committed by GitHub
parent 6aa196c8dc
commit 082ab86f5f
4 changed files with 113 additions and 4 deletions

View File

@@ -1625,9 +1625,7 @@ class EngineArgs:
if (self.max_num_partial_prefills
!= EngineArgs.max_num_partial_prefills
or self.max_long_partial_prefills
!= EngineArgs.max_long_partial_prefills
or self.long_prefill_token_threshold
!= EngineArgs.long_prefill_token_threshold):
!= EngineArgs.max_long_partial_prefills):
_raise_or_fallback(feature_name="Concurrent Partial Prefill",
recommend_to_remove=False)
return False