Re-enable the 80 char line width limit (#3305)
This commit is contained in:
@@ -114,7 +114,8 @@ def test_contexted_kv_attention(
|
||||
v_cache = v_cache.view(-1, block_size, num_kv_heads,
|
||||
head_size).permute(0, 2, 3, 1).contiguous()
|
||||
|
||||
# Warm up the Triton kernel by calling it once before actually measuring generation time
|
||||
# Warm up the Triton kernel by calling it once before actually measuring
|
||||
# generation time
|
||||
context_attention_fwd(query, k, v, output, k_cache, v_cache, block_table,
|
||||
b_start_loc, b_seq_len, b_ctx_len, max_input_len)
|
||||
torch.cuda.synchronize()
|
||||
|
||||
Reference in New Issue
Block a user