update for fp8

Signed-off-by: yewentao256 <zhyanwentao@126.com>
This commit is contained in:
yewentao256
2025-08-27 21:36:03 +00:00
parent 57f2f26a05
commit e92676ef4e
12 changed files with 13 additions and 13 deletions

View File

@@ -7,7 +7,7 @@
#include "../cuda_compat.h"
#include "dispatch_utils.h"
#include "quantization/fp8/common.cuh"
#include "quantization/w8a8/fp8/common.cuh"
namespace vllm {

View File

@@ -6,7 +6,7 @@
#include "quantization/vectorization.cuh"
// TODO(luka/varun):refactor common.cuh to use this file instead
#include "quantization/fp8/common.cuh"
#include "quantization/w8a8/fp8/common.cuh"
namespace vllm {

View File

@@ -5,7 +5,7 @@
#include <hip/hip_bf16.h>
#include <hip/hip_bfloat16.h>
#include "../../../attention/attention_dtypes.h"
#include "../../../../attention/attention_dtypes.h"
namespace vllm {
#ifdef USE_ROCM

View File

@@ -1,6 +1,6 @@
#include "common.cuh"
#include "dispatch_utils.h"
#include "../vectorization_utils.cuh"
#include "quantization/vectorization_utils.cuh"
#include <c10/cuda/CUDAGuard.h>
#include <ATen/cuda/Exceptions.h>

View File

@@ -1,6 +1,6 @@
#pragma once
#include "../../../attention/attention_dtypes.h"
#include "../../../../attention/attention_dtypes.h"
#include <assert.h>
#include <float.h>
#include <stdint.h>