Support YaRN models (#1264)

Signed-off-by: Antoni Baum <antoni.baum@protonmail.com> Co-authored-by: Viktor Ferenczi <viktor@ferenczi.eu> Co-authored-by: Woosuk Kwon <woosuk.kwon@berkeley.edu>
2023-11-03 14:12:48 -07:00
parent 555bdcc5a3
commit 9f669a9a7c
5 changed files with 128 additions and 8 deletions
--- a/csrc/pos_encoding_kernels.cu
+++ b/csrc/pos_encoding_kernels.cu
@@ -84,7 +84,7 @@ void rotary_embedding(
  int head_size,
  torch::Tensor& cos_sin_cache,     // [max_position, rot_dim]
  bool is_neox) {
-  int num_tokens = query.numel() / query.size(-1);
+  int64_t num_tokens = query.numel() / query.size(-1);
  int rot_dim = cos_sin_cache.size(1);
  int num_heads = query.size(-1) / head_size;
  int num_kv_heads = key.size(-1) / head_size;