Replace "online inference" with "online serving" (#11923)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
2025-01-10 12:05:56 +00:00
parent ef725feafc
commit d85c47d6ad
11 changed files with 16 additions and 16 deletions
--- a/vllm/model_executor/models/molmo.py
+++ b/vllm/model_executor/models/molmo.py
@@ -1068,7 +1068,7 @@ def input_processor_for_molmo(ctx: InputContext, inputs: DecoderOnlyInputs):
        trust_remote_code=model_config.trust_remote_code)

    # NOTE: message formatting for raw text prompt is only applied for
-    # offline inference; for online inference, the prompt is always in
+    # offline inference; for online serving, the prompt is always in
    # instruction format and tokenized.
    if prompt is not None and re.match(r"^User:[\s\S]*?(Assistant:)*$",
                                       prompt):