[V1] Optimize handling of sampling metadata and req_ids list (#13244)
Signed-off-by: Nick Hill <nhill@redhat.com>
This commit is contained in:
@@ -1,7 +1,7 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
from dataclasses import dataclass
|
||||
from typing import Dict, List, Optional, Set
|
||||
from typing import Dict, List, Optional, Set, Tuple
|
||||
|
||||
import torch
|
||||
|
||||
@@ -12,15 +12,13 @@ class SamplingMetadata:
|
||||
temperature: torch.Tensor
|
||||
all_greedy: bool
|
||||
all_random: bool
|
||||
rejection_sampling: bool
|
||||
spec_token_ids: List[List[int]]
|
||||
|
||||
top_p: torch.Tensor
|
||||
top_k: torch.Tensor
|
||||
no_top_p: bool
|
||||
no_top_k: bool
|
||||
min_p: torch.Tensor
|
||||
no_min_p: bool
|
||||
# None when there are no speculated tokens.
|
||||
spec_token_ids: Optional[List[List[int]]]
|
||||
|
||||
top_p: Optional[torch.Tensor]
|
||||
top_k: Optional[torch.Tensor]
|
||||
min_p: Optional[torch.Tensor]
|
||||
|
||||
generators: Dict[int, torch.Generator]
|
||||
|
||||
@@ -34,7 +32,8 @@ class SamplingMetadata:
|
||||
repetition_penalties: torch.Tensor
|
||||
|
||||
output_token_ids: List[List[int]]
|
||||
min_tokens: List[int]
|
||||
stop_token_ids: List[Set[int]]
|
||||
|
||||
# req_index -> (min_tokens, stop_token_ids)
|
||||
min_tokens: Dict[int, Tuple[int, Set[int]]]
|
||||
|
||||
logit_bias: List[Optional[Dict[int, float]]]
|
||||
|
||||
Reference in New Issue
Block a user