Convert formatting to use ruff instead of yapf + isort (#26247)
Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
@@ -82,11 +82,12 @@ def _create_random_top_logprob_test_matrix(
|
||||
|
||||
|
||||
def _create_random_top_token_test_vector(
|
||||
num_logprobs: int,
|
||||
lower: int,
|
||||
upper: int,
|
||||
sampled_token_id: int,
|
||||
adjust_num_logprobs: bool = True) -> tuple[torch.Tensor, int]:
|
||||
num_logprobs: int,
|
||||
lower: int,
|
||||
upper: int,
|
||||
sampled_token_id: int,
|
||||
adjust_num_logprobs: bool = True,
|
||||
) -> tuple[torch.Tensor, int]:
|
||||
"""Create a random vector of top logprob token indices
|
||||
|
||||
Use to create fake sample logprobs for testing. The sampled token
|
||||
@@ -127,8 +128,9 @@ def _create_random_top_token_test_vector(
|
||||
|
||||
# Check if the sampled_token_id occurs in choice_tensor[1:]
|
||||
if sampled_token_id in choice_tensor[1:]:
|
||||
sampled_token_rank = (choice_tensor[1:] == sampled_token_id).nonzero(
|
||||
as_tuple=True)[0].item()
|
||||
sampled_token_rank = (
|
||||
(choice_tensor[1:] == sampled_token_id).nonzero(as_tuple=True)[0].item()
|
||||
)
|
||||
else:
|
||||
# If not found, assign a random int between num_logprobs and 50700
|
||||
sampled_token_rank = random.randint(num_logprobs, 50700)
|
||||
@@ -164,9 +166,12 @@ def _create_random_top_token_test_matrix(
|
||||
num_elements = shape[0] * shape[1]
|
||||
choice_tensor = torch.randperm(upper - lower)[:num_elements] + lower
|
||||
matrix = torch.cat(
|
||||
(torch.tensor(tokens_list, dtype=torch.int).unsqueeze(-1),
|
||||
choice_tensor.view(shape)),
|
||||
dim=1)
|
||||
(
|
||||
torch.tensor(tokens_list, dtype=torch.int).unsqueeze(-1),
|
||||
choice_tensor.view(shape),
|
||||
),
|
||||
dim=1,
|
||||
)
|
||||
|
||||
# Initialize the tensor for storing the ranks
|
||||
prompt_token_ranks = torch.empty(shape[0], dtype=torch.int)
|
||||
@@ -174,8 +179,7 @@ def _create_random_top_token_test_matrix(
|
||||
# Iterate over each row to check presence of
|
||||
# tokens_list[rdx] and determine its index
|
||||
for rdx in range(shape[0]):
|
||||
row = matrix[rdx,
|
||||
1:] # Skip the first column as it contains the token list
|
||||
row = matrix[rdx, 1:] # Skip the first column as it contains the token list
|
||||
token_index = (row == tokens_list[rdx]).nonzero(as_tuple=True)[0]
|
||||
if token_index.numel() > 0:
|
||||
prompt_token_ranks[rdx] = token_index.item()
|
||||
@@ -229,19 +233,21 @@ def generate_dummy_sample_logprobs(
|
||||
(
|
||||
token_vector,
|
||||
sampled_token_rank,
|
||||
) = _create_random_top_token_test_vector(num_logprobs, 0,
|
||||
len(tokenizer.vocab) - 1,
|
||||
sampled_token_id)
|
||||
) = _create_random_top_token_test_vector(
|
||||
num_logprobs, 0, len(tokenizer.vocab) - 1, sampled_token_id
|
||||
)
|
||||
|
||||
res.append(
|
||||
(token_vector,
|
||||
_create_random_top_logprob_test_vector(num_logprobs + 1, -100,
|
||||
0), sampled_token_rank))
|
||||
(
|
||||
token_vector,
|
||||
_create_random_top_logprob_test_vector(num_logprobs + 1, -100, 0),
|
||||
sampled_token_rank,
|
||||
)
|
||||
)
|
||||
|
||||
# Convert tensors in the list tuples to Python lists
|
||||
res_list_format = [
|
||||
(log_probs_tensor.tolist(), token_ids_tensor.tolist(),
|
||||
sampled_token_rank)
|
||||
(log_probs_tensor.tolist(), token_ids_tensor.tolist(), sampled_token_rank)
|
||||
for log_probs_tensor, token_ids_tensor, sampled_token_rank in res
|
||||
]
|
||||
|
||||
@@ -282,18 +288,24 @@ def generate_dummy_prompt_logprobs_tensors(
|
||||
token_vector,
|
||||
prompt_token_ranks,
|
||||
) = _create_random_top_token_test_matrix(
|
||||
(num_prompt_logprobs, num_logprobs), 0,
|
||||
len(tokenizer.vocab) - 1, prompt_tokens_list[1:])
|
||||
(num_prompt_logprobs, num_logprobs),
|
||||
0,
|
||||
len(tokenizer.vocab) - 1,
|
||||
prompt_tokens_list[1:],
|
||||
)
|
||||
return LogprobsTensors(
|
||||
token_vector,
|
||||
_create_random_top_logprob_test_matrix(
|
||||
(num_prompt_logprobs, num_logprobs + 1), -100, 0),
|
||||
prompt_token_ranks)
|
||||
(num_prompt_logprobs, num_logprobs + 1), -100, 0
|
||||
),
|
||||
prompt_token_ranks,
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class DummyOutputProcessorTestVectors:
|
||||
"""Dummy test vectors for output processor tests"""
|
||||
|
||||
tokenizer: GeneralTokenizerType
|
||||
vllm_config: EngineArgs
|
||||
full_tokens: list[list[int]] # Prompt + generated tokens
|
||||
@@ -320,9 +332,9 @@ class MockEngineCore:
|
||||
# For each request, for each sampled token offset,
|
||||
# a tuple of
|
||||
# (list of topk token ids, list of sample logprob vals, rank)
|
||||
generated_logprobs_raw: Optional[list[list[tuple[list[int],
|
||||
list[float],
|
||||
int]]]] = None,
|
||||
generated_logprobs_raw: Optional[
|
||||
list[list[tuple[list[int], list[float], int]]]
|
||||
] = None,
|
||||
# For each request, a tuple of
|
||||
# (prompt logprob val matrix, prompt logprob tok id matrix);
|
||||
# each matrix has dimensions
|
||||
@@ -355,7 +367,8 @@ class MockEngineCore:
|
||||
if do_logprobs:
|
||||
assert self.generated_logprobs_raw is not None
|
||||
(logprobs_token_ids_, logprobs_, sampled_token_ranks_) = (
|
||||
self.generated_logprobs_raw[req_idx][token_idx])
|
||||
self.generated_logprobs_raw[req_idx][token_idx]
|
||||
)
|
||||
logprobs = LogprobsLists(
|
||||
[logprobs_token_ids_],
|
||||
[logprobs_],
|
||||
|
||||
Reference in New Issue
Block a user