[Bugfix] Add kv_scale input parameter to CPU backend (#3840)

This commit is contained in:
Woosuk Kwon
2024-04-03 21:33:08 -07:00
committed by GitHub
parent 537ee25f43
commit 498eb5cfa3
4 changed files with 12 additions and 5 deletions

View File

@@ -111,7 +111,9 @@ void copy_blocks(std::vector<torch::Tensor> &key_caches,
void reshape_and_cache(torch::Tensor &key, torch::Tensor &value,
torch::Tensor &key_cache, torch::Tensor &value_cache,
torch::Tensor &slot_mapping,
const std::string &kv_cache_dtype) {
const std::string &kv_cache_dtype, float kv_scale) {
TORCH_CHECK(kv_scale == 1.0f);
int num_tokens = key.size(0);
int num_heads = key.size(1);
int head_size = key.size(2);