Support YaRN models (#1264)

Signed-off-by: Antoni Baum <antoni.baum@protonmail.com>
Co-authored-by: Viktor Ferenczi <viktor@ferenczi.eu>
Co-authored-by: Woosuk Kwon <woosuk.kwon@berkeley.edu>
This commit is contained in:
Antoni Baum
2023-11-03 14:12:48 -07:00
committed by GitHub
parent 555bdcc5a3
commit 9f669a9a7c
5 changed files with 128 additions and 8 deletions

View File

@@ -84,7 +84,7 @@ void rotary_embedding(
int head_size,
torch::Tensor& cos_sin_cache, // [max_position, rot_dim]
bool is_neox) {
int num_tokens = query.numel() / query.size(-1);
int64_t num_tokens = query.numel() / query.size(-1);
int rot_dim = cos_sin_cache.size(1);
int num_heads = query.size(-1) / head_size;
int num_kv_heads = key.size(-1) / head_size;