Use monotonic time where appropriate (#1249)
This commit is contained in:
@@ -256,10 +256,10 @@ class LLMEngine:
|
||||
prompt_token_ids: The token IDs of the prompt. If None, we
|
||||
use the tokenizer to convert the prompts to token IDs.
|
||||
arrival_time: The arrival time of the request. If None, we use
|
||||
the current time.
|
||||
the current monotonic time.
|
||||
"""
|
||||
if arrival_time is None:
|
||||
arrival_time = time.time()
|
||||
arrival_time = time.monotonic()
|
||||
if prompt_token_ids is None:
|
||||
assert prompt is not None
|
||||
prompt_token_ids = self.tokenizer.encode(prompt)
|
||||
@@ -568,7 +568,7 @@ class LLMEngine:
|
||||
prompt_run: bool,
|
||||
num_batched_tokens: int,
|
||||
) -> None:
|
||||
now = time.time()
|
||||
now = time.monotonic()
|
||||
# Log the number of batched input tokens.
|
||||
if prompt_run:
|
||||
self.num_prompt_tokens.append((now, num_batched_tokens))
|
||||
|
||||
Reference in New Issue
Block a user