[compile] Recompile graph module during Dynamo cache loading. (#30743)
Signed-off-by: Zhengxu Chen <zhxchen17@fb.com>
This commit is contained in:
@@ -104,6 +104,7 @@ class VllmSerializableFunction(SerializableCallable):
|
|||||||
state = pickle.loads(data)
|
state = pickle.loads(data)
|
||||||
fake_mode = FakeTensorMode(shape_env=ShapeEnv())
|
fake_mode = FakeTensorMode(shape_env=ShapeEnv())
|
||||||
state["graph_module"] = GraphPickler.loads(state["graph_module"], fake_mode)
|
state["graph_module"] = GraphPickler.loads(state["graph_module"], fake_mode)
|
||||||
|
state["graph_module"].recompile()
|
||||||
state["example_inputs"] = GraphPickler.loads(state["example_inputs"], fake_mode)
|
state["example_inputs"] = GraphPickler.loads(state["example_inputs"], fake_mode)
|
||||||
vllm_backend = VllmBackend(get_current_vllm_config(), state["prefix"])
|
vllm_backend = VllmBackend(get_current_vllm_config(), state["prefix"])
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user