@@ -7,7 +7,7 @@
|
||||
#include "../cuda_compat.h"
|
||||
#include "dispatch_utils.h"
|
||||
|
||||
#include "quantization/fp8/common.cuh"
|
||||
#include "quantization/w8a8/fp8/common.cuh"
|
||||
|
||||
namespace vllm {
|
||||
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
|
||||
#include "quantization/vectorization.cuh"
|
||||
// TODO(luka/varun):refactor common.cuh to use this file instead
|
||||
#include "quantization/fp8/common.cuh"
|
||||
#include "quantization/w8a8/fp8/common.cuh"
|
||||
|
||||
namespace vllm {
|
||||
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
#include <hip/hip_bf16.h>
|
||||
#include <hip/hip_bfloat16.h>
|
||||
|
||||
#include "../../../attention/attention_dtypes.h"
|
||||
#include "../../../../attention/attention_dtypes.h"
|
||||
|
||||
namespace vllm {
|
||||
#ifdef USE_ROCM
|
||||
@@ -1,6 +1,6 @@
|
||||
#include "common.cuh"
|
||||
#include "dispatch_utils.h"
|
||||
#include "../vectorization_utils.cuh"
|
||||
#include "quantization/vectorization_utils.cuh"
|
||||
#include <c10/cuda/CUDAGuard.h>
|
||||
#include <ATen/cuda/Exceptions.h>
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
#pragma once
|
||||
|
||||
#include "../../../attention/attention_dtypes.h"
|
||||
#include "../../../../attention/attention_dtypes.h"
|
||||
#include <assert.h>
|
||||
#include <float.h>
|
||||
#include <stdint.h>
|
||||
Reference in New Issue
Block a user