Use monotonic time where appropriate (#1249)

This commit is contained in:
Antoni Baum
2023-10-02 19:22:05 -07:00
committed by GitHub
parent 66d18a7fb0
commit acbed3ef40
7 changed files with 18 additions and 17 deletions

View File

@@ -121,7 +121,7 @@ class Scheduler:
blocks_to_copy: Dict[int, List[int]] = {}
# Fix the current time.
now = time.time()
now = time.monotonic()
# Join waiting sequences if possible.
if not self.swapped:

View File

@@ -417,7 +417,8 @@ class AsyncLLMEngine:
request.
"""
# Preprocess the request.
arrival_time = time.time()
# This should not be used for logging, as it is monotonic time.
arrival_time = time.monotonic()
try:
stream = await self.add_request(request_id,

View File

@@ -256,10 +256,10 @@ class LLMEngine:
prompt_token_ids: The token IDs of the prompt. If None, we
use the tokenizer to convert the prompts to token IDs.
arrival_time: The arrival time of the request. If None, we use
the current time.
the current monotonic time.
"""
if arrival_time is None:
arrival_time = time.time()
arrival_time = time.monotonic()
if prompt_token_ids is None:
assert prompt is not None
prompt_token_ids = self.tokenizer.encode(prompt)
@@ -568,7 +568,7 @@ class LLMEngine:
prompt_run: bool,
num_batched_tokens: int,
) -> None:
now = time.time()
now = time.monotonic()
# Log the number of batched input tokens.
if prompt_run:
self.num_prompt_tokens.append((now, num_batched_tokens))

View File

@@ -210,7 +210,7 @@ async def create_chat_completion(request: ChatCompletionRequest,
model_name = request.model
request_id = f"cmpl-{random_uuid()}"
created_time = int(time.time())
created_time = int(time.monotonic())
try:
sampling_params = SamplingParams(
n=request.n,
@@ -411,7 +411,7 @@ async def create_completion(request: CompletionRequest, raw_request: Request):
if error_check_ret is not None:
return error_check_ret
created_time = int(time.time())
created_time = int(time.monotonic())
try:
sampling_params = SamplingParams(
n=request.n,