[Core] draft_model_runner: Implement prepare_inputs on GPU for advance_step (#6338)

This commit is contained in:
Alexander Matveev
2024-07-17 17:30:28 -04:00
committed by GitHub
parent 5f0b9933e6
commit e76466dde2
12 changed files with 568 additions and 130 deletions

View File

@@ -0,0 +1,19 @@
#pragma once
#include <torch/all.h>
#include <ATen/cuda/CUDAContext.h>
#include <c10/cuda/CUDAGuard.h>
#include <cuda.h>
#include <cuda_fp16.h>
#include <cuda_runtime.h>
#include <iostream>
namespace prepare_inputs {
static constexpr int max_threads = 256;
static constexpr bool logging = false;
constexpr int div_ceil(int a, int b) { return (a + b - 1) / b; }
} // namespace prepare_inputs