[Core] draft_model_runner: Implement prepare_inputs on GPU for advance_step (#6338)

This commit is contained in:
Alexander Matveev
2024-07-17 17:30:28 -04:00
committed by GitHub
parent 5f0b9933e6
commit e76466dde2
12 changed files with 568 additions and 130 deletions

View File

@@ -72,6 +72,10 @@ TORCH_LIBRARY_EXPAND(TORCH_EXTENSION_NAME, ops) {
ops.def("gelu_quick(Tensor! out, Tensor input) -> ()");
ops.impl("gelu_quick", torch::kCUDA, &gelu_quick);
// prepare_inputs advance_step
ops.def("advance_step", &advance_step);
ops.impl("advance_step", torch::kCUDA, &advance_step);
// Layernorm
// Apply Root Mean Square (RMS) Normalization to the input tensor.
ops.def(