[Core] draft_model_runner: Implement prepare_inputs on GPU for advance_step (#6338)

2024-07-17 17:30:28 -04:00
parent 5f0b9933e6
commit e76466dde2
12 changed files with 568 additions and 130 deletions
--- a/csrc/prepare_inputs/advance_step.cuh
+++ b/csrc/prepare_inputs/advance_step.cuh
@@ -0,0 +1,19 @@
+#pragma once
+
+#include <torch/all.h>
+
+#include <ATen/cuda/CUDAContext.h>
+#include <c10/cuda/CUDAGuard.h>
+#include <cuda.h>
+#include <cuda_fp16.h>
+#include <cuda_runtime.h>
+#include <iostream>
+
+namespace prepare_inputs {
+
+static constexpr int max_threads = 256;
+static constexpr bool logging = false;
+
+constexpr int div_ceil(int a, int b) { return (a + b - 1) / b; }
+
+}  // namespace prepare_inputs