[BugFix] Fix cuda graph for MLPSpeculator (#5875)

Co-authored-by: Abhinav Goyal <abhinav.goyal@flipkart.com>
This commit is contained in:
Nick Hill
2024-06-26 21:12:10 -07:00
committed by GitHub
parent b9e84259e9
commit 2110557dab
2 changed files with 6 additions and 4 deletions

View File

@@ -52,7 +52,6 @@ if __name__ == "__main__":
speculative_model="ibm-fms/llama-13b-accelerator",
# These are currently required for MLPSpeculator decoding
use_v2_block_manager=True,
enforce_eager=True,
)
print("With speculation")