[TPU] [V1] fix cases when max_num_reqs is set smaller than MIN_NUM_SEQS (#15583)

Signed-off-by: Chengji Yao <chengjiyao@google.com>
This commit is contained in:
Chengji Yao
2025-03-26 22:46:26 -07:00
committed by GitHub
parent ecff8309a3
commit 619d3de8bd
2 changed files with 2 additions and 5 deletions

View File

@@ -88,7 +88,7 @@ class TPUModelRunner:
self.max_model_len = model_config.max_model_len
self.max_num_blocks_per_req = cdiv(self.max_model_len, self.block_size)
self.max_num_tokens = scheduler_config.max_num_batched_tokens
self.max_num_reqs = scheduler_config.max_num_seqs
self.max_num_reqs = max(scheduler_config.max_num_seqs, MIN_NUM_SEQS)
# Model-related.
self.num_attn_layers = model_config.get_num_layers_by_block_type(