[BugFix] Remove incorrect assert in split_decodes_and_prefills (#36553)
Signed-off-by: Woosuk Kwon <woosuk@inferact.ai>
This commit is contained in:
@@ -528,7 +528,6 @@ def split_decodes_and_prefills(
|
||||
# requests may have a query length of 0 but since they are padding its fine
|
||||
# to treat them as decodes (ensures num_decodes matches the captured size)
|
||||
if torch.all((query_lens == query_lens[0]) | (query_lens == 0)):
|
||||
assert num_reqs * query_lens[0] == num_tokens, "tokens not padded correctly"
|
||||
return num_reqs, 0, num_tokens, 0 # all decodes
|
||||
is_prefill = query_lens != query_lens[0]
|
||||
else:
|
||||
|
||||
Reference in New Issue
Block a user