[V1] Support long_prefill_token_threshold in v1 scheduler (#15419)
Signed-off-by: Lu Fang <lufang@fb.com>
This commit is contained in:
@@ -1625,9 +1625,7 @@ class EngineArgs:
|
||||
if (self.max_num_partial_prefills
|
||||
!= EngineArgs.max_num_partial_prefills
|
||||
or self.max_long_partial_prefills
|
||||
!= EngineArgs.max_long_partial_prefills
|
||||
or self.long_prefill_token_threshold
|
||||
!= EngineArgs.long_prefill_token_threshold):
|
||||
!= EngineArgs.max_long_partial_prefills):
|
||||
_raise_or_fallback(feature_name="Concurrent Partial Prefill",
|
||||
recommend_to_remove=False)
|
||||
return False
|
||||
|
||||
Reference in New Issue
Block a user