Use monotonic time where appropriate (#1249)

2023-10-02 19:22:05 -07:00
parent 66d18a7fb0
commit acbed3ef40
7 changed files with 18 additions and 17 deletions
--- a/vllm/engine/llm_engine.py
+++ b/vllm/engine/llm_engine.py
@@ -256,10 +256,10 @@ class LLMEngine:
            prompt_token_ids: The token IDs of the prompt. If None, we
                use the tokenizer to convert the prompts to token IDs.
            arrival_time: The arrival time of the request. If None, we use
-                the current time.
+                the current monotonic time.
        """
        if arrival_time is None:
-            arrival_time = time.time()
+            arrival_time = time.monotonic()
        if prompt_token_ids is None:
            assert prompt is not None
            prompt_token_ids = self.tokenizer.encode(prompt)
@@ -568,7 +568,7 @@ class LLMEngine:
        prompt_run: bool,
        num_batched_tokens: int,
    ) -> None:
-        now = time.time()
+        now = time.monotonic()
        # Log the number of batched input tokens.
        if prompt_run:
            self.num_prompt_tokens.append((now, num_batched_tokens))