Move decode context parallel validationn to ParallelConfig (#33239)
Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
@@ -348,6 +348,17 @@ class ParallelConfig:
|
||||
"num_redundant_experts."
|
||||
)
|
||||
|
||||
# Note(hc): In the current implementation of decode context
|
||||
# parallel(DCP), tp_size needs to be divisible by dcp_size,
|
||||
# because the world size does not change by dcp, it simply
|
||||
# reuses the GPUs of TP group, and split one TP group into
|
||||
# tp_size//dcp_size DCP groups.
|
||||
if self.tensor_parallel_size % self.decode_context_parallel_size != 0:
|
||||
raise ValueError(
|
||||
f"tp_size={self.tensor_parallel_size} must be divisible by"
|
||||
f"dcp_size={self.decode_context_parallel_size}."
|
||||
)
|
||||
|
||||
return self
|
||||
|
||||
@property
|
||||
|
||||
@@ -1389,16 +1389,6 @@ class EngineArgs:
|
||||
# global layers in interleaved sliding window models.
|
||||
sliding_window = model_config.get_sliding_window()
|
||||
|
||||
# Note(hc): In the current implementation of decode context
|
||||
# parallel(DCP), tp_size needs to be divisible by dcp_size,
|
||||
# because the world size does not change by dcp, it simply
|
||||
# reuses the GPUs of TP group, and split one TP group into
|
||||
# tp_size//dcp_size DCP groups.
|
||||
assert self.tensor_parallel_size % self.decode_context_parallel_size == 0, (
|
||||
f"tp_size={self.tensor_parallel_size} must be divisible by"
|
||||
f"dcp_size={self.decode_context_parallel_size}."
|
||||
)
|
||||
|
||||
# Resolve "auto" kv_cache_dtype to actual value from model config
|
||||
resolved_cache_dtype = resolve_kv_cache_dtype_string(
|
||||
self.kv_cache_dtype, model_config
|
||||
|
||||
Reference in New Issue
Block a user