[torch.compile] use empty tensor instead of None for profiling (#8875)

This commit is contained in:
youkaichao
2024-09-27 08:11:32 -07:00
committed by GitHub
parent 8df2dc3c88
commit a9b15c606f
15 changed files with 84 additions and 32 deletions

View File

@@ -136,7 +136,9 @@ def _make_test_resources(test_pt: TestPoint, ) -> TestResources:
)
if test_pt.num_blocks is None or test_pt.num_heads is None:
# Caller does not require a KV cache
return TestResources(scale, attn_backend, attn, None)
return TestResources(
scale, attn_backend, attn,
torch.tensor([], dtype=torch.float32, device=CUDA_DEVICE))
# Construct KV cache
kv_cache = make_kv_cache(test_pt.num_blocks,
@@ -620,7 +622,9 @@ def _run_encoder_attention_test(
return attn.forward(packed_qkv.query,
packed_qkv.key,
packed_qkv.value,
None,
torch.tensor([],
dtype=torch.float32,
device=packed_qkv.query.device),
attn_metadata,
attn_type=attn_type)