From eca7a8fb59c223c603922be0bd62f5c460972a50 Mon Sep 17 00:00:00 2001 From: Didier Durand <2927957+didier-durand@users.noreply.github.com> Date: Mon, 24 Nov 2025 12:10:48 +0100 Subject: [PATCH] [Doc]: fix typos in various files (#29230) Signed-off-by: Didier Durand Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- benchmarks/kernels/deepgemm/README.md | 2 +- vllm/config/vllm.py | 2 +- vllm/forward_context.py | 2 +- vllm/v1/worker/gpu_input_batch.py | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/benchmarks/kernels/deepgemm/README.md b/benchmarks/kernels/deepgemm/README.md index 41e68e047..a28c6956b 100644 --- a/benchmarks/kernels/deepgemm/README.md +++ b/benchmarks/kernels/deepgemm/README.md @@ -2,7 +2,7 @@ This directory includes benchmarks between DeepSeek's DeepGEMM block fp8 kernels against vLLM's existing triton and CUTLASS-based kernels. -Currently this just includes dense GEMMs and only works on Hopper GPUs. +Currently, this just includes dense GEMMs and only works on Hopper GPUs. ## Setup diff --git a/vllm/config/vllm.py b/vllm/config/vllm.py index d64e315b4..8a3599416 100644 --- a/vllm/config/vllm.py +++ b/vllm/config/vllm.py @@ -96,7 +96,7 @@ class VllmConfig: """`torch.compile` and cudagraph capture configuration for the model. As a shorthand, one can append compilation arguments via - -0.parameter=arguement such as `-O.mode=3` (same as `-O='{"mode":3}'`). + -0.parameter=argument such as `-O.mode=3` (same as `-O='{"mode":3}'`). You can specify the full compilation config like so: `{"mode": 3, "cudagraph_capture_sizes": [1, 2, 4, 8]}` diff --git a/vllm/forward_context.py b/vllm/forward_context.py index 25fb7181a..7cb490e39 100644 --- a/vllm/forward_context.py +++ b/vllm/forward_context.py @@ -153,7 +153,7 @@ class DPMetadata: @contextmanager def sp_local_sizes(self, sequence_parallel_size: int): """ - Context mamager for setting self.local_sizes. Same as self.chunked_sizes + Context manager for setting self.local_sizes. Same as self.chunked_sizes but without any chunking. """ self.local_sizes = _compute_sp_num_tokens( diff --git a/vllm/v1/worker/gpu_input_batch.py b/vllm/v1/worker/gpu_input_batch.py index d6fef450c..4a2818ab1 100644 --- a/vllm/v1/worker/gpu_input_batch.py +++ b/vllm/v1/worker/gpu_input_batch.py @@ -525,7 +525,7 @@ class InputBatch: # NOTE: the following is unsafe # self.token_ids_cpu[i1, ...], self.token_ids_cpu[i2, ...], =\ # self.token_ids_cpu[i2, ...], self.token_ids_cpu[i1, ...] - # instead, we need to temporiarily copy the data for one of the indices + # instead, we need to temporarily copy the data for one of the indices # TODO(lucas): optimize this by only copying valid indices tmp = self.token_ids_cpu[i1, ...].copy() self.token_ids_cpu[i1, ...] = self.token_ids_cpu[i2, ...]