[2/n] Migrate per_token_group_quant to torch stable ABI (#36058)
Signed-off-by: Mikayla Gawarecki <mikaylagawarecki@gmail.com>
This commit is contained in:
@@ -1,11 +1,13 @@
|
||||
#pragma once
|
||||
|
||||
#include <torch/csrc/inductor/aoti_torch/c/shim.h>
|
||||
#include <torch/headeronly/util/shim_utils.h>
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
// Utility to get the current CUDA stream for a given device using stable APIs.
|
||||
// Returns a cudaStream_t for use in kernel launches.
|
||||
inline cudaStream_t get_current_cuda_stream(int32_t device_index) {
|
||||
inline cudaStream_t get_current_cuda_stream(int32_t device_index = -1) {
|
||||
void* stream_ptr = nullptr;
|
||||
TORCH_ERROR_CODE_CHECK(
|
||||
aoti_torch_get_current_cuda_stream(device_index, &stream_ptr));
|
||||
|
||||
Reference in New Issue
Block a user