fix(docs): fix typos in comments and docstrings (#34836)

Signed-off-by: machov <mv1742@nyu.edu>
2026-02-19 02:17:41 -05:00
parent 9681068cf9
commit ad5aa6bd9f
5 changed files with 6 additions and 6 deletions
--- a/vllm/compilation/backends.py
+++ b/vllm/compilation/backends.py
@@ -285,7 +285,7 @@ class CompilerManager:
        with self.compile_context(compile_range):
            # There is a compilation time optimization here.
            #
-            # If the (input metdata, graph, compiler config) are the same, then
+            # If the (input metadata, graph, compiler config) are the same, then
            # we want to avoid compiling the same artifact again. If we didn't
            # do this optimization, the backend compilation (InductorAdaptor or
            # InductorStandaloneAdaptor)
--- a/vllm/model_executor/layers/fused_moe/oracle/fp8.py
+++ b/vllm/model_executor/layers/fused_moe/oracle/fp8.py
@@ -420,7 +420,7 @@ def make_fp8_moe_quant_config(
    per_out_ch_quant: bool = False,
 ) -> FusedMoEQuantConfig | None:
    """
-    Create FusedMoEQuantConfig for the specifed FP8 Backend.
+    Create FusedMoEQuantConfig for the specified FP8 Backend.
    The FusedMoEQuantConfig holds the scales that are used
    at runtime by the Modular Kernel abstraction.

--- a/vllm/model_executor/layers/fused_moe/runner/default_moe_runner.py
+++ b/vllm/model_executor/layers/fused_moe/runner/default_moe_runner.py
@@ -151,7 +151,7 @@ class DefaultMoERunner(MoERunner):
    kernels for different parallel execution modes.

    Eventually, this class will be split up and specialized for different
-    configurations, e.g. the presense or absence of shared experts, a gate, etc.
+    configurations, e.g. the presence or absence of shared experts, a gate, etc.
    """

    def __init__(
--- a/vllm/model_executor/models/gpt_oss.py
+++ b/vllm/model_executor/models/gpt_oss.py
@@ -586,7 +586,7 @@ class GptOssModel(nn.Module):
                parts = name.split(".")
                ids = [s for s in parts if s.isdigit()]

-                # for amd-quark format that each expert is seperated
+                # for amd-quark format that each expert is separated
                # need to extract the parameter name with experts fused.
                # example model: amd/gpt-oss-20b-MoE-Quant-W-MXFP4-A-FP8-KV-FP8
                if len(ids) == 2:
--- a/vllm/utils/torch_utils.py
+++ b/vllm/utils/torch_utils.py
@@ -567,8 +567,8 @@ def current_stream() -> torch.cuda.Stream:
    return _current_stream_tls.value


-# Global auxilary stream for running operations in background streams.
-# We have single global auxilary stream to avoid an explosion of streams
+# Global auxiliary stream for running operations in background streams.
+# We have single global auxiliary stream to avoid an explosion of streams
 # for every layer (and make profiling look sane).
 #
 # aux_stream() is currently used for: