[Misc] Remove pad_for_cudagraphs from config (#30143)
Signed-off-by: Lucas Wilkinson <lwilkins@redhat.com> Signed-off-by: Matthew Bonanni <mbonanni@redhat.com> Co-authored-by: Matthew Bonanni <mbonanni@redhat.com>
This commit is contained in:
@@ -581,15 +581,6 @@ class CompilationConfig:
|
||||
local_cache_dir: str = field(default=None, init=False) # type: ignore
|
||||
"""local cache dir for each rank"""
|
||||
|
||||
bs_to_padded_graph_size: list[int] = field(
|
||||
default=None, # type: ignore
|
||||
init=False,
|
||||
)
|
||||
"""optimization:
|
||||
Intuitively, bs_to_padded_graph_size should be dict[int, int].
|
||||
since we know all keys are in a range [0, max_cudagraph_capture_size],
|
||||
we can optimize it to list[int] for better lookup performance."""
|
||||
|
||||
# keep track of enabled and disabled custom ops
|
||||
enabled_custom_ops: Counter[str] = field(default_factory=Counter, init=False)
|
||||
"""custom ops that are enabled"""
|
||||
@@ -639,7 +630,6 @@ class CompilationConfig:
|
||||
"debug_dump_path",
|
||||
"cache_dir",
|
||||
"local_cache_dir",
|
||||
"bs_to_padded_graph_size",
|
||||
"traced_files",
|
||||
"compilation_time",
|
||||
"static_forward_context",
|
||||
@@ -661,7 +651,6 @@ class CompilationConfig:
|
||||
"enabled_custom_ops": True,
|
||||
"disabled_custom_ops": True,
|
||||
"compilation_time": True,
|
||||
"bs_to_padded_graph_size": True,
|
||||
"traced_files": True,
|
||||
"inductor_compile_config": {
|
||||
"post_grad_custom_post_pass": True,
|
||||
@@ -882,7 +871,6 @@ class CompilationConfig:
|
||||
"""To complete the initialization after cudagraph related
|
||||
configs are set. This includes:
|
||||
- initialize compile_sizes
|
||||
- pre-compute the mapping bs_to_padded_graph_size
|
||||
"""
|
||||
|
||||
computed_compile_sizes = []
|
||||
@@ -906,23 +894,6 @@ class CompilationConfig:
|
||||
if self.cudagraph_capture_sizes:
|
||||
assert self.cudagraph_capture_sizes[-1] == self.max_cudagraph_capture_size
|
||||
|
||||
# May get recomputed in the model runner if adjustment is needed for spec-decode
|
||||
self.compute_bs_to_padded_graph_size()
|
||||
|
||||
# Validate that compile_sizes won't be changed by padding.
|
||||
# Only validate when cudagraphs are actually being used.
|
||||
if self.compile_sizes and self.cudagraph_mode != CUDAGraphMode.NONE:
|
||||
for size in self.compile_sizes:
|
||||
if size <= self.max_cudagraph_capture_size:
|
||||
padded = self.bs_to_padded_graph_size[size]
|
||||
if padded != size:
|
||||
raise ValueError(
|
||||
f"compile_sizes contains {size} which would be "
|
||||
f"padded to {padded}. All compile_sizes must be "
|
||||
"values that won't be changed by cudagraph padding. "
|
||||
"Use values from cudagraph_capture_sizes."
|
||||
)
|
||||
|
||||
def set_splitting_ops_for_v1(
|
||||
self, all2all_backend: str, data_parallel_size: int = 1
|
||||
):
|
||||
@@ -1134,24 +1105,6 @@ class CompilationConfig:
|
||||
self.max_cudagraph_capture_size = rounded_sizes[-1]
|
||||
self.cudagraph_capture_sizes = rounded_sizes
|
||||
|
||||
# Recompute after adjusting the cudagraph sizes
|
||||
self.compute_bs_to_padded_graph_size()
|
||||
|
||||
def compute_bs_to_padded_graph_size(self):
|
||||
# pre-compute the mapping from batch size to padded graph size
|
||||
self.bs_to_padded_graph_size = [
|
||||
0 for i in range(self.max_cudagraph_capture_size + 1)
|
||||
]
|
||||
for end, start in zip(
|
||||
self.cudagraph_capture_sizes + [self.max_cudagraph_capture_size + 1],
|
||||
[0] + self.cudagraph_capture_sizes,
|
||||
):
|
||||
for bs in range(start, end):
|
||||
if bs == start:
|
||||
self.bs_to_padded_graph_size[bs] = start
|
||||
else:
|
||||
self.bs_to_padded_graph_size[bs] = end
|
||||
|
||||
def get_compile_ranges(self) -> list[Range]:
|
||||
"""Get the compile ranges for the compilation config."""
|
||||
if self.compile_ranges_split_points is None:
|
||||
|
||||
Reference in New Issue
Block a user