fix some typos (#24071)
Signed-off-by: co63oc <co63oc@users.noreply.github.com>
This commit is contained in:
@@ -234,7 +234,7 @@ class CompilationConfig:
|
||||
- FULL_AND_PIECEWISE.
|
||||
|
||||
PIECEWISE mode build piecewise cudagraph only, keeping the cudagraph
|
||||
incompatiable ops (i.e. some attention ops) outside the cudagraph
|
||||
incompatible ops (i.e. some attention ops) outside the cudagraph
|
||||
for general flexibility.
|
||||
This is the default mode.
|
||||
|
||||
|
||||
@@ -87,7 +87,7 @@ class ParallelConfig:
|
||||
data_parallel_external_lb: bool = False
|
||||
"""Whether to use "external" DP LB mode. Applies only to online serving
|
||||
and when data_parallel_size > 0. This is useful for a "one-pod-per-rank"
|
||||
wide-EP setup in Kuberentes. Set implicitly when --data-parallel-rank
|
||||
wide-EP setup in Kubernetes. Set implicitly when --data-parallel-rank
|
||||
is provided explicitly to vllm serve."""
|
||||
data_parallel_hybrid_lb: bool = False
|
||||
"""Whether to use "hybrid" DP LB mode. Applies only to online serving
|
||||
|
||||
Reference in New Issue
Block a user