[torch.compile] use empty tensor instead of None for profiling (#8875)
This commit is contained in:
@@ -136,7 +136,9 @@ def _make_test_resources(test_pt: TestPoint, ) -> TestResources:
|
||||
)
|
||||
if test_pt.num_blocks is None or test_pt.num_heads is None:
|
||||
# Caller does not require a KV cache
|
||||
return TestResources(scale, attn_backend, attn, None)
|
||||
return TestResources(
|
||||
scale, attn_backend, attn,
|
||||
torch.tensor([], dtype=torch.float32, device=CUDA_DEVICE))
|
||||
|
||||
# Construct KV cache
|
||||
kv_cache = make_kv_cache(test_pt.num_blocks,
|
||||
@@ -620,7 +622,9 @@ def _run_encoder_attention_test(
|
||||
return attn.forward(packed_qkv.query,
|
||||
packed_qkv.key,
|
||||
packed_qkv.value,
|
||||
None,
|
||||
torch.tensor([],
|
||||
dtype=torch.float32,
|
||||
device=packed_qkv.query.device),
|
||||
attn_metadata,
|
||||
attn_type=attn_type)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user