[torch.compile] Document the workaround to standalone_compile failing (#33571)
Signed-off-by: Richard Zou <zou3519@gmail.com>
This commit is contained in:
@@ -282,6 +282,15 @@ If vLLM's compile cache is wrong, this usually means that a factor is missing.
|
||||
Please see [this example](https://github.com/vllm-project/vllm/blob/18b39828d90413d05d770dfd2e2f48304f4ca0eb/vllm/config/model.py#L310)
|
||||
of how vLLM computes part of the cache key.
|
||||
|
||||
vLLM's compilation cache requires that the code being compiled ends up being serializable.
|
||||
If this is not the case, then it will error out on save. Usually the fixes are to either:
|
||||
|
||||
- rewrite the non-serializable pieces (perhaps difficult because it's difficult to
|
||||
tell right now what is serializable and what isn't)
|
||||
- file a bug report
|
||||
- ignore the error by setting `VLLM_DISABLE_COMPILE_CACHE=1` (note that this will
|
||||
make warm server starts a lot slower).
|
||||
|
||||
## Debugging CUDAGraphs
|
||||
|
||||
CUDAGraphs is a feature that allows one to:
|
||||
|
||||
@@ -273,7 +273,26 @@ class InductorStandaloneAdaptor(CompilerInterface):
|
||||
assert key is not None
|
||||
path = os.path.join(self.cache_dir, key)
|
||||
|
||||
def is_saveable_2_10(compiled_artifact):
|
||||
# can just use compiled_artifact.is_saveable in 2.11
|
||||
if compiled_artifact._artifacts is None:
|
||||
return False
|
||||
_, cache_info = compiled_artifact._artifacts
|
||||
return len(cache_info.aot_autograd_artifacts) == 1
|
||||
|
||||
if is_compile_cache_enabled(compiler_config):
|
||||
if not is_saveable_2_10(compiled_graph):
|
||||
raise RuntimeError(
|
||||
"The compiled artifact is not serializable. This usually means "
|
||||
"that the model code has something that is not serializable "
|
||||
"by torch.compile in it. You can fix this by either "
|
||||
"figuring out what is not serializable and rewriting it, "
|
||||
"filing a bug report, "
|
||||
"or suppressing this error by "
|
||||
"disabling vLLM's compilation cache via "
|
||||
"VLLM_DISABLE_COMPILE_CACHE=1 "
|
||||
"(this will greatly increase vLLM server warm start times)."
|
||||
)
|
||||
compiled_graph.save(path=path, format=self.save_format)
|
||||
compilation_counter.num_compiled_artifacts_saved += 1
|
||||
return compiled_graph, (key, path)
|
||||
|
||||
Reference in New Issue
Block a user