diff --git a/tests/kernels/helion/test_utils.py b/tests/kernels/helion/test_utils.py index 807aa4606..540cc4f8b 100644 --- a/tests/kernels/helion/test_utils.py +++ b/tests/kernels/helion/test_utils.py @@ -11,11 +11,13 @@ from vllm.kernels.helion.utils import canonicalize_gpu_name "driver_reported_name,expected", [ ("NVIDIA H200", "nvidia_h200"), - ("NVIDIA A100-SXM4-80GB", "nvidia_a100_sxm4_80gb"), - ("NVIDIA H100 80GB HBM3", "nvidia_h100_80gb_hbm3"), + ("NVIDIA A100-SXM4-80GB", "nvidia_a100"), + ("NVIDIA H100 80GB HBM3", "nvidia_h100"), + ("NVIDIA H100 PCIe", "nvidia_h100"), + ("NVIDIA H100 SXM5", "nvidia_h100"), ("NVIDIA GeForce RTX 4090", "nvidia_geforce_rtx_4090"), ("AMD Instinct MI300X", "amd_instinct_mi300x"), - ("Tesla V100-SXM2-32GB", "tesla_v100_sxm2_32gb"), + ("Tesla V100-SXM2-32GB", "tesla_v100"), ], ) def test_canonicalize_gpu_name(driver_reported_name, expected): diff --git a/vllm/kernels/helion/config_manager.py b/vllm/kernels/helion/config_manager.py index 3c53106ce..7a6836ac8 100644 --- a/vllm/kernels/helion/config_manager.py +++ b/vllm/kernels/helion/config_manager.py @@ -71,10 +71,18 @@ class ConfigSet: platform_dict = self._configs.get(platform) if platform_dict is None: avail_platforms = self.get_platforms() + # TODO(@gmagogsfm): add a CLI/env override flag so users can + # directly specify a platform name instead of relying on + # auto-detection, and suggest it in this error message. raise KeyError( f"Config not found for kernel '{self._kernel_name}': " f"platform '{platform}' not found. " - f"Available platforms: {avail_platforms or '(none)'}" + f"Available platforms: {avail_platforms or '(none)'}. " + f"If your GPU is a variant of a supported platform, " + f"consider adding a mapping in _GPU_NAME_ALIASES in " + f"vllm/kernels/helion/utils.py, or run " + f"scripts/autotune_helion_kernels.py to generate configs " + f"for your platform." ) config = platform_dict.get(config_key) diff --git a/vllm/kernels/helion/configs/silu_mul_fp8.json b/vllm/kernels/helion/configs/silu_mul_fp8.json index 0f0de04a1..b8f091d66 100644 --- a/vllm/kernels/helion/configs/silu_mul_fp8.json +++ b/vllm/kernels/helion/configs/silu_mul_fp8.json @@ -13861,27730 +13861,6 @@ "range_warp_specializes": [] } }, - "nvidia_h100_pcie": { - "intermediate_2048_numtokens_256": { - "block_sizes": [ - 64, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 8, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_4096_numtokens_256": { - "block_sizes": [ - 32, - 512 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 2 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 2, - "num_stages": 2, - "indexing": [ - "pointer", - "tensor_descriptor", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "default": { - "block_sizes": [ - 1, - 512 - ], - "loop_orders": [ - [ - 1, - 0 - ] - ], - "flatten_loops": [ - false - ], - "l2_groupings": [ - 4 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "first", - "" - ], - "num_warps": 8, - "num_stages": 2, - "indexing": [ - "tensor_descriptor", - "tensor_descriptor", - "tensor_descriptor", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_8192_numtokens_256": { - "block_sizes": [ - 32, - 8 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "first", - "" - ], - "num_warps": 4, - "num_stages": 1, - "indexing": [ - "tensor_descriptor", - "pointer", - "pointer", - "tensor_descriptor" - ], - "pid_type": "flat" - }, - "intermediate_14336_numtokens_256": { - "block_sizes": [ - 16, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 1, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "tensor_descriptor" - ], - "pid_type": "flat" - }, - "intermediate_11008_numtokens_256": { - "block_sizes": [ - 64, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 2, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_7688_numtokens_256": { - "block_sizes": [ - 8, - 16 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "last", - "" - ], - "num_warps": 1, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2880_numtokens_256": { - "block_sizes": [ - 32, - 64 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 2 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 8, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2048_numtokens_1": { - "block_sizes": [ - 1, - 16 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - false - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 16, - "num_stages": 2, - "indexing": [ - "pointer", - "pointer", - "pointer", - "tensor_descriptor" - ], - "pid_type": "flat" - }, - "intermediate_2880_numtokens_1": { - "block_sizes": [ - 1, - 1 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - false - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 16, - "num_stages": 2, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_4096_numtokens_1": { - "block_sizes": [ - 1, - 32 - ], - "loop_orders": [ - [ - 1, - 0 - ] - ], - "flatten_loops": [ - false - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 4, - "num_stages": 2, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_8192_numtokens_1": { - "block_sizes": [ - 1, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - false - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 1, - "num_stages": 2, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_11008_numtokens_1": { - "block_sizes": [ - 1, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "first", - "", - "" - ], - "num_warps": 4, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_14336_numtokens_1": { - "block_sizes": [ - 1, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - false - ], - "l2_groupings": [ - 2 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "first" - ], - "num_warps": 8, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2048_numtokens_2": { - "block_sizes": [ - 2, - 16 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 2 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 2, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "tensor_descriptor", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2880_numtokens_2": { - "block_sizes": [ - 1, - 4 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - false - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "first", - "", - "" - ], - "num_warps": 32, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_4096_numtokens_2": { - "block_sizes": [ - 2, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 8, - "num_stages": 2, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_8192_numtokens_2": { - "block_sizes": [ - 1, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - false - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "first", - "", - "" - ], - "num_warps": 8, - "num_stages": 2, - "indexing": [ - "tensor_descriptor", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_11008_numtokens_2": { - "block_sizes": [ - 1, - 256 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - false - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "first", - "" - ], - "num_warps": 4, - "num_stages": 2, - "indexing": [ - "pointer", - "pointer", - "pointer", - "tensor_descriptor" - ], - "pid_type": "flat" - }, - "intermediate_14336_numtokens_2": { - "block_sizes": [ - 1, - 64 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - false - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 8, - "num_stages": 2, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2048_numtokens_4": { - "block_sizes": [ - 1, - 256 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 32, - "num_stages": 2, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2880_numtokens_4": { - "block_sizes": [ - 1, - 8 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 1, - "num_stages": 2, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_4096_numtokens_4": { - "block_sizes": [ - 4, - 16 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 2 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "last", - "", - "" - ], - "num_warps": 4, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_8192_numtokens_4": { - "block_sizes": [ - 1, - 16 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 4, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_11008_numtokens_4": { - "block_sizes": [ - 1, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - false - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "first", - "" - ], - "num_warps": 16, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "tensor_descriptor", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_14336_numtokens_4": { - "block_sizes": [ - 4, - 16 - ], - "loop_orders": [ - [ - 1, - 0 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 16, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2048_numtokens_8": { - "block_sizes": [ - 8, - 256 - ], - "loop_orders": [ - [ - 1, - 0 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "last", - "", - "" - ], - "num_warps": 32, - "num_stages": 1, - "indexing": [ - "tensor_descriptor", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2880_numtokens_8": { - "block_sizes": [ - 8, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 4, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_4096_numtokens_8": { - "block_sizes": [ - 2, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 2 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 32, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_8192_numtokens_8": { - "block_sizes": [ - 4, - 64 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - false - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 1, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_11008_numtokens_8": { - "block_sizes": [ - 8, - 128 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - false - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "last", - "", - "" - ], - "num_warps": 4, - "num_stages": 2, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_14336_numtokens_8": { - "block_sizes": [ - 8, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - false - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "first", - "" - ], - "num_warps": 4, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2048_numtokens_16": { - "block_sizes": [ - 16, - 64 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - false - ], - "l2_groupings": [ - 2 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 4, - "num_stages": 2, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2880_numtokens_16": { - "block_sizes": [ - 16, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 8, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_4096_numtokens_16": { - "block_sizes": [ - 16, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 8, - "num_stages": 2, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_8192_numtokens_16": { - "block_sizes": [ - 4, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - false - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 1, - "num_stages": 1, - "indexing": [ - "pointer", - "tensor_descriptor", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_11008_numtokens_16": { - "block_sizes": [ - 8, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "first" - ], - "num_warps": 16, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_14336_numtokens_16": { - "block_sizes": [ - 16, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 1, - "num_stages": 1, - "indexing": [ - "tensor_descriptor", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2048_numtokens_24": { - "block_sizes": [ - 16, - 8 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "last", - "last" - ], - "num_warps": 1, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2880_numtokens_24": { - "block_sizes": [ - 32, - 64 - ], - "loop_orders": [ - [ - 1, - 0 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 4, - "num_stages": 1, - "indexing": [ - "tensor_descriptor", - "pointer", - "pointer", - "tensor_descriptor" - ], - "pid_type": "flat" - }, - "intermediate_4096_numtokens_24": { - "block_sizes": [ - 32, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 2 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "first", - "" - ], - "num_warps": 1, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "tensor_descriptor" - ], - "pid_type": "flat" - }, - "intermediate_8192_numtokens_24": { - "block_sizes": [ - 16, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - false - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 2, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_11008_numtokens_24": { - "block_sizes": [ - 32, - 8 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - false - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 1, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_14336_numtokens_24": { - "block_sizes": [ - 8, - 32 - ], - "loop_orders": [ - [ - 1, - 0 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "last" - ], - "num_warps": 2, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2048_numtokens_32": { - "block_sizes": [ - 32, - 64 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 1, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2880_numtokens_32": { - "block_sizes": [ - 32, - 64 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 2 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 1, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_4096_numtokens_32": { - "block_sizes": [ - 32, - 16 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 1, - "num_stages": 2, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_8192_numtokens_32": { - "block_sizes": [ - 32, - 128 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "last" - ], - "num_warps": 2, - "num_stages": 2, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_11008_numtokens_32": { - "block_sizes": [ - 16, - 8 - ], - "loop_orders": [ - [ - 1, - 0 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 16, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "tensor_descriptor" - ], - "pid_type": "flat" - }, - "intermediate_14336_numtokens_32": { - "block_sizes": [ - 32, - 8 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 8, - "num_stages": 2, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2048_numtokens_40": { - "block_sizes": [ - 64, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 2 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 2, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2880_numtokens_40": { - "block_sizes": [ - 64, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "last" - ], - "num_warps": 8, - "num_stages": 2, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_4096_numtokens_40": { - "block_sizes": [ - 32, - 32 - ], - "loop_orders": [ - [ - 1, - 0 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 8, - "num_stages": 1, - "indexing": [ - "tensor_descriptor", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_8192_numtokens_40": { - "block_sizes": [ - 64, - 128 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 2 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 4, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_11008_numtokens_40": { - "block_sizes": [ - 64, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 4, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_14336_numtokens_40": { - "block_sizes": [ - 32, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 2, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2048_numtokens_48": { - "block_sizes": [ - 32, - 8 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 2 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "first", - "" - ], - "num_warps": 16, - "num_stages": 2, - "indexing": [ - "pointer", - "pointer", - "tensor_descriptor", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2880_numtokens_48": { - "block_sizes": [ - 8, - 8 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 1, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "tensor_descriptor", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_4096_numtokens_48": { - "block_sizes": [ - 32, - 64 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 1, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "tensor_descriptor" - ], - "pid_type": "flat" - }, - "intermediate_8192_numtokens_48": { - "block_sizes": [ - 64, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 4, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_11008_numtokens_48": { - "block_sizes": [ - 16, - 256 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 2 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 32, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "tensor_descriptor" - ], - "pid_type": "flat" - }, - "intermediate_14336_numtokens_48": { - "block_sizes": [ - 64, - 4 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 2, - "num_stages": 1, - "indexing": [ - "tensor_descriptor", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2048_numtokens_56": { - "block_sizes": [ - 2, - 8 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 1, - "num_stages": 1, - "indexing": [ - "tensor_descriptor", - "pointer", - "tensor_descriptor", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2880_numtokens_56": { - "block_sizes": [ - 8, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 4, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_4096_numtokens_56": { - "block_sizes": [ - 32, - 4 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "first", - "" - ], - "num_warps": 4, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "tensor_descriptor", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_8192_numtokens_56": { - "block_sizes": [ - 32, - 128 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 1, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_11008_numtokens_56": { - "block_sizes": [ - 32, - 8 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "first", - "last" - ], - "num_warps": 1, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_14336_numtokens_56": { - "block_sizes": [ - 64, - 64 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 2 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 16, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "tensor_descriptor" - ], - "pid_type": "flat" - }, - "intermediate_2048_numtokens_64": { - "block_sizes": [ - 16, - 128 - ], - "loop_orders": [ - [ - 1, - 0 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 8, - "num_stages": 1, - "indexing": [ - "tensor_descriptor", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2880_numtokens_64": { - "block_sizes": [ - 4, - 64 - ], - "loop_orders": [ - [ - 1, - 0 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 4, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_4096_numtokens_64": { - "block_sizes": [ - 2, - 16 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "first", - "", - "" - ], - "num_warps": 1, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_8192_numtokens_64": { - "block_sizes": [ - 8, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 16, - "num_stages": 2, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_11008_numtokens_64": { - "block_sizes": [ - 32, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - false - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 2, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "tensor_descriptor", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_14336_numtokens_64": { - "block_sizes": [ - 32, - 8 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 16, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2048_numtokens_72": { - "block_sizes": [ - 4, - 16 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "last" - ], - "num_warps": 1, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2880_numtokens_72": { - "block_sizes": [ - 64, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "first" - ], - "num_warps": 16, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_4096_numtokens_72": { - "block_sizes": [ - 64, - 16 - ], - "loop_orders": [ - [ - 1, - 0 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "last", - "", - "" - ], - "num_warps": 32, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_8192_numtokens_72": { - "block_sizes": [ - 32, - 8 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 32, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_11008_numtokens_72": { - "block_sizes": [ - 32, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 1, - "num_stages": 2, - "indexing": [ - "pointer", - "pointer", - "tensor_descriptor", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_14336_numtokens_72": { - "block_sizes": [ - 128, - 128 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 16, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2048_numtokens_80": { - "block_sizes": [ - 32, - 64 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 4, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2880_numtokens_80": { - "block_sizes": [ - 32, - 128 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 16, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_4096_numtokens_80": { - "block_sizes": [ - 32, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 2, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_8192_numtokens_80": { - "block_sizes": [ - 32, - 8 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 4 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 1, - "num_stages": 1, - "indexing": [ - "tensor_descriptor", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_11008_numtokens_80": { - "block_sizes": [ - 64, - 8 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 8, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_14336_numtokens_80": { - "block_sizes": [ - 32, - 512 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 2 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "first", - "" - ], - "num_warps": 16, - "num_stages": 1, - "indexing": [ - "pointer", - "tensor_descriptor", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2048_numtokens_88": { - "block_sizes": [ - 32, - 16 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 4, - "num_stages": 2, - "indexing": [ - "pointer", - "pointer", - "pointer", - "tensor_descriptor" - ], - "pid_type": "flat" - }, - "intermediate_2880_numtokens_88": { - "block_sizes": [ - 16, - 128 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 4, - "num_stages": 1, - "indexing": [ - "tensor_descriptor", - "pointer", - "pointer", - "tensor_descriptor" - ], - "pid_type": "flat" - }, - "intermediate_4096_numtokens_88": { - "block_sizes": [ - 64, - 32 - ], - "loop_orders": [ - [ - 1, - 0 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 32, - "num_stages": 1, - "indexing": [ - "tensor_descriptor", - "tensor_descriptor", - "tensor_descriptor", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_8192_numtokens_88": { - "block_sizes": [ - 128, - 64 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "first", - "first", - "last" - ], - "num_warps": 2, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_11008_numtokens_88": { - "block_sizes": [ - 32, - 128 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 4 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 32, - "num_stages": 1, - "indexing": [ - "pointer", - "tensor_descriptor", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_14336_numtokens_88": { - "block_sizes": [ - 16, - 128 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "first", - "last", - "" - ], - "num_warps": 8, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2048_numtokens_96": { - "block_sizes": [ - 128, - 4 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 2 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "last", - "" - ], - "num_warps": 4, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2880_numtokens_96": { - "block_sizes": [ - 32, - 128 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 16, - "num_stages": 1, - "indexing": [ - "pointer", - "tensor_descriptor", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_4096_numtokens_96": { - "block_sizes": [ - 16, - 256 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 2 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 1, - "num_stages": 3, - "indexing": [ - "pointer", - "pointer", - "pointer", - "tensor_descriptor" - ], - "pid_type": "flat" - }, - "intermediate_8192_numtokens_96": { - "block_sizes": [ - 64, - 64 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 2 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "first", - "", - "last" - ], - "num_warps": 1, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_11008_numtokens_96": { - "block_sizes": [ - 64, - 256 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "first", - "", - "" - ], - "num_warps": 2, - "num_stages": 2, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_14336_numtokens_96": { - "block_sizes": [ - 32, - 64 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "first" - ], - "num_warps": 1, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "tensor_descriptor", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2048_numtokens_104": { - "block_sizes": [ - 32, - 8 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 16, - "num_stages": 2, - "indexing": [ - "tensor_descriptor", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2880_numtokens_104": { - "block_sizes": [ - 64, - 64 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 32, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_4096_numtokens_104": { - "block_sizes": [ - 32, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - false - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 8, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_8192_numtokens_104": { - "block_sizes": [ - 8, - 8 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 2 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 8, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "tensor_descriptor" - ], - "pid_type": "flat" - }, - "intermediate_11008_numtokens_104": { - "block_sizes": [ - 128, - 16 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 2, - "num_stages": 1, - "indexing": [ - "pointer", - "tensor_descriptor", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_14336_numtokens_104": { - "block_sizes": [ - 32, - 16 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 2, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "tensor_descriptor" - ], - "pid_type": "flat" - }, - "intermediate_2048_numtokens_112": { - "block_sizes": [ - 32, - 1024 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "first", - "", - "" - ], - "num_warps": 32, - "num_stages": 1, - "indexing": [ - "pointer", - "tensor_descriptor", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2880_numtokens_112": { - "block_sizes": [ - 32, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - false - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 1, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_4096_numtokens_112": { - "block_sizes": [ - 32, - 128 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "last", - "", - "" - ], - "num_warps": 16, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_8192_numtokens_112": { - "block_sizes": [ - 32, - 128 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 8, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "tensor_descriptor" - ], - "pid_type": "flat" - }, - "intermediate_11008_numtokens_112": { - "block_sizes": [ - 16, - 64 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 8, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_14336_numtokens_112": { - "block_sizes": [ - 32, - 8 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 2 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 1, - "num_stages": 2, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2048_numtokens_120": { - "block_sizes": [ - 32, - 64 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 2, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2880_numtokens_120": { - "block_sizes": [ - 32, - 16 - ], - "loop_orders": [ - [ - 1, - 0 - ] - ], - "flatten_loops": [ - false - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 4, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_4096_numtokens_120": { - "block_sizes": [ - 32, - 16 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 2 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 2, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_8192_numtokens_120": { - "block_sizes": [ - 64, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "last", - "" - ], - "num_warps": 2, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_11008_numtokens_120": { - "block_sizes": [ - 64, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 16, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_14336_numtokens_120": { - "block_sizes": [ - 128, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 8, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2048_numtokens_128": { - "block_sizes": [ - 32, - 64 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 2, - "num_stages": 2, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2880_numtokens_128": { - "block_sizes": [ - 128, - 64 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "last", - "" - ], - "num_warps": 4, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_4096_numtokens_128": { - "block_sizes": [ - 128, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "last" - ], - "num_warps": 32, - "num_stages": 2, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_8192_numtokens_128": { - "block_sizes": [ - 32, - 64 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 32, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_11008_numtokens_128": { - "block_sizes": [ - 128, - 128 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "last" - ], - "num_warps": 8, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_14336_numtokens_128": { - "block_sizes": [ - 16, - 128 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 2 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 4, - "num_stages": 3, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2048_numtokens_136": { - "block_sizes": [ - 128, - 16 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 2 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 1, - "num_stages": 3, - "indexing": [ - "tensor_descriptor", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2880_numtokens_136": { - "block_sizes": [ - 8, - 64 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "first", - "" - ], - "num_warps": 8, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_4096_numtokens_136": { - "block_sizes": [ - 32, - 16 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 2, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_8192_numtokens_136": { - "block_sizes": [ - 32, - 128 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 4, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_11008_numtokens_136": { - "block_sizes": [ - 16, - 8 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 4, - "num_stages": 3, - "indexing": [ - "pointer", - "tensor_descriptor", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_14336_numtokens_136": { - "block_sizes": [ - 32, - 8 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 1, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2048_numtokens_144": { - "block_sizes": [ - 8, - 16 - ], - "loop_orders": [ - [ - 1, - 0 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 2 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 2, - "num_stages": 1, - "indexing": [ - "pointer", - "tensor_descriptor", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2880_numtokens_144": { - "block_sizes": [ - 256, - 8 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 32, - "num_stages": 2, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_4096_numtokens_144": { - "block_sizes": [ - 128, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - false - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 4, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_8192_numtokens_144": { - "block_sizes": [ - 128, - 64 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - false - ], - "l2_groupings": [ - 2 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 4, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_11008_numtokens_144": { - "block_sizes": [ - 32, - 4 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "last", - "" - ], - "num_warps": 8, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_14336_numtokens_144": { - "block_sizes": [ - 32, - 8 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "first" - ], - "num_warps": 1, - "num_stages": 2, - "indexing": [ - "pointer", - "tensor_descriptor", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2048_numtokens_152": { - "block_sizes": [ - 32, - 8 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 16, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2880_numtokens_152": { - "block_sizes": [ - 16, - 64 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 4, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_4096_numtokens_152": { - "block_sizes": [ - 64, - 4 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 2 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 2, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_8192_numtokens_152": { - "block_sizes": [ - 32, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - false - ], - "l2_groupings": [ - 2 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "first", - "", - "" - ], - "num_warps": 1, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_11008_numtokens_152": { - "block_sizes": [ - 32, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 2 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 8, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_14336_numtokens_152": { - "block_sizes": [ - 64, - 16 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 2, - "num_stages": 1, - "indexing": [ - "pointer", - "tensor_descriptor", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2048_numtokens_160": { - "block_sizes": [ - 32, - 16 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 2 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 2, - "num_stages": 2, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2880_numtokens_160": { - "block_sizes": [ - 128, - 128 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 2, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_4096_numtokens_160": { - "block_sizes": [ - 32, - 8 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 2, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_8192_numtokens_160": { - "block_sizes": [ - 64, - 4 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "first", - "", - "" - ], - "num_warps": 4, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "tensor_descriptor" - ], - "pid_type": "flat" - }, - "intermediate_11008_numtokens_160": { - "block_sizes": [ - 32, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - false - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 1, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_14336_numtokens_160": { - "block_sizes": [ - 128, - 128 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 2, - "num_stages": 1, - "indexing": [ - "pointer", - "tensor_descriptor", - "tensor_descriptor", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2048_numtokens_168": { - "block_sizes": [ - 128, - 16 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 8, - "num_stages": 1, - "indexing": [ - "tensor_descriptor", - "tensor_descriptor", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2880_numtokens_168": { - "block_sizes": [ - 32, - 16 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 4, - "num_stages": 2, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_4096_numtokens_168": { - "block_sizes": [ - 64, - 32 - ], - "loop_orders": [ - [ - 1, - 0 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 2 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "last" - ], - "num_warps": 1, - "num_stages": 2, - "indexing": [ - "pointer", - "pointer", - "tensor_descriptor", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_8192_numtokens_168": { - "block_sizes": [ - 64, - 8 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 8, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_11008_numtokens_168": { - "block_sizes": [ - 64, - 4 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 16, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_14336_numtokens_168": { - "block_sizes": [ - 32, - 512 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 2 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 2, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "tensor_descriptor" - ], - "pid_type": "flat" - }, - "intermediate_2048_numtokens_176": { - "block_sizes": [ - 32, - 128 - ], - "loop_orders": [ - [ - 1, - 0 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 2 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 32, - "num_stages": 2, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2880_numtokens_176": { - "block_sizes": [ - 32, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 2, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_4096_numtokens_176": { - "block_sizes": [ - 4, - 8 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "first" - ], - "num_warps": 8, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_8192_numtokens_176": { - "block_sizes": [ - 8, - 16 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 1, - "num_stages": 2, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_11008_numtokens_176": { - "block_sizes": [ - 8, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - false - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 1, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_14336_numtokens_176": { - "block_sizes": [ - 8, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 2, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2048_numtokens_184": { - "block_sizes": [ - 32, - 8 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - false - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 1, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2880_numtokens_184": { - "block_sizes": [ - 8, - 16 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 16, - "num_stages": 2, - "indexing": [ - "pointer", - "pointer", - "pointer", - "tensor_descriptor" - ], - "pid_type": "flat" - }, - "intermediate_4096_numtokens_184": { - "block_sizes": [ - 32, - 8 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 8, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_8192_numtokens_184": { - "block_sizes": [ - 8, - 64 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 2 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 8, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_11008_numtokens_184": { - "block_sizes": [ - 32, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 2 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 32, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_14336_numtokens_184": { - "block_sizes": [ - 16, - 64 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "last", - "last" - ], - "num_warps": 1, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2048_numtokens_192": { - "block_sizes": [ - 32, - 64 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - false - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 1, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2880_numtokens_192": { - "block_sizes": [ - 8, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 8, - "num_stages": 2, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_4096_numtokens_192": { - "block_sizes": [ - 32, - 8 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 4 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "first", - "", - "" - ], - "num_warps": 16, - "num_stages": 2, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_8192_numtokens_192": { - "block_sizes": [ - 4, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 2 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 8, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_11008_numtokens_192": { - "block_sizes": [ - 32, - 256 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 4 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 2, - "num_stages": 2, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_14336_numtokens_192": { - "block_sizes": [ - 8, - 16 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 4, - "num_stages": 1, - "indexing": [ - "pointer", - "tensor_descriptor", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2048_numtokens_200": { - "block_sizes": [ - 32, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 2 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 1, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2880_numtokens_200": { - "block_sizes": [ - 32, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "last", - "" - ], - "num_warps": 2, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_4096_numtokens_200": { - "block_sizes": [ - 64, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 2, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_8192_numtokens_200": { - "block_sizes": [ - 32, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 1, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_11008_numtokens_200": { - "block_sizes": [ - 8, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 1, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_14336_numtokens_200": { - "block_sizes": [ - 16, - 8 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "first" - ], - "num_warps": 4, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2048_numtokens_208": { - "block_sizes": [ - 32, - 128 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - false - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 2, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2880_numtokens_208": { - "block_sizes": [ - 64, - 64 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 4, - "num_stages": 2, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_4096_numtokens_208": { - "block_sizes": [ - 32, - 128 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - false - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 8, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_8192_numtokens_208": { - "block_sizes": [ - 256, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 16, - "num_stages": 2, - "indexing": [ - "tensor_descriptor", - "tensor_descriptor", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_11008_numtokens_208": { - "block_sizes": [ - 64, - 64 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "last", - "last", - "" - ], - "num_warps": 8, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_14336_numtokens_208": { - "block_sizes": [ - 16, - 128 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 4 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 32, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2048_numtokens_216": { - "block_sizes": [ - 32, - 128 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - false - ], - "l2_groupings": [ - 2 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 2, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2880_numtokens_216": { - "block_sizes": [ - 16, - 128 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "last", - "" - ], - "num_warps": 32, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_4096_numtokens_216": { - "block_sizes": [ - 32, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 4, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_8192_numtokens_216": { - "block_sizes": [ - 16, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 4, - "num_stages": 2, - "indexing": [ - "pointer", - "pointer", - "tensor_descriptor", - "tensor_descriptor" - ], - "pid_type": "flat" - }, - "intermediate_11008_numtokens_216": { - "block_sizes": [ - 32, - 4 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "first" - ], - "num_warps": 1, - "num_stages": 2, - "indexing": [ - "pointer", - "pointer", - "tensor_descriptor", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_14336_numtokens_216": { - "block_sizes": [ - 64, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 2 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "last", - "", - "last" - ], - "num_warps": 32, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2048_numtokens_224": { - "block_sizes": [ - 32, - 16 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - false - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "last", - "" - ], - "num_warps": 2, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2880_numtokens_224": { - "block_sizes": [ - 64, - 64 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "last", - "" - ], - "num_warps": 4, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "tensor_descriptor", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_4096_numtokens_224": { - "block_sizes": [ - 64, - 128 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 4 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 1, - "num_stages": 1, - "indexing": [ - "tensor_descriptor", - "pointer", - "tensor_descriptor", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_8192_numtokens_224": { - "block_sizes": [ - 16, - 64 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 2 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "last" - ], - "num_warps": 16, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_11008_numtokens_224": { - "block_sizes": [ - 256, - 64 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 32, - "num_stages": 1, - "indexing": [ - "tensor_descriptor", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_14336_numtokens_224": { - "block_sizes": [ - 32, - 8 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "last", - "", - "first" - ], - "num_warps": 4, - "num_stages": 1, - "indexing": [ - "pointer", - "tensor_descriptor", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2048_numtokens_232": { - "block_sizes": [ - 16, - 8 - ], - "loop_orders": [ - [ - 1, - 0 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 4 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 1, - "num_stages": 2, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2880_numtokens_232": { - "block_sizes": [ - 64, - 16 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 4, - "num_stages": 1, - "indexing": [ - "tensor_descriptor", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_4096_numtokens_232": { - "block_sizes": [ - 16, - 4 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 8, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_8192_numtokens_232": { - "block_sizes": [ - 32, - 32 - ], - "loop_orders": [ - [ - 1, - 0 - ] - ], - "flatten_loops": [ - false - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 4, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_11008_numtokens_232": { - "block_sizes": [ - 16, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "last", - "" - ], - "num_warps": 2, - "num_stages": 2, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_14336_numtokens_232": { - "block_sizes": [ - 32, - 8 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 2, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2048_numtokens_240": { - "block_sizes": [ - 32, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 1, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2880_numtokens_240": { - "block_sizes": [ - 8, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 16, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_4096_numtokens_240": { - "block_sizes": [ - 16, - 16 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "last", - "" - ], - "num_warps": 16, - "num_stages": 1, - "indexing": [ - "pointer", - "tensor_descriptor", - "tensor_descriptor", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_8192_numtokens_240": { - "block_sizes": [ - 32, - 8 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 8, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_11008_numtokens_240": { - "block_sizes": [ - 8, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 4, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_14336_numtokens_240": { - "block_sizes": [ - 32, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 8, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2048_numtokens_248": { - "block_sizes": [ - 16, - 8 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 16, - "num_stages": 2, - "indexing": [ - "pointer", - "pointer", - "tensor_descriptor", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2880_numtokens_248": { - "block_sizes": [ - 16, - 128 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 32, - "num_stages": 2, - "indexing": [ - "pointer", - "pointer", - "pointer", - "tensor_descriptor" - ], - "pid_type": "flat" - }, - "intermediate_4096_numtokens_248": { - "block_sizes": [ - 256, - 16 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 2 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "first", - "", - "" - ], - "num_warps": 16, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_8192_numtokens_248": { - "block_sizes": [ - 64, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "first" - ], - "num_warps": 4, - "num_stages": 2, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_11008_numtokens_248": { - "block_sizes": [ - 64, - 4 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "last", - "", - "" - ], - "num_warps": 1, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_14336_numtokens_248": { - "block_sizes": [ - 64, - 256 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 8, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2048_numtokens_272": { - "block_sizes": [ - 128, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - false - ], - "l2_groupings": [ - 2 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 1, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2880_numtokens_272": { - "block_sizes": [ - 8, - 128 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "last", - "" - ], - "num_warps": 2, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "tensor_descriptor", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_4096_numtokens_272": { - "block_sizes": [ - 128, - 32 - ], - "loop_orders": [ - [ - 1, - 0 - ] - ], - "flatten_loops": [ - false - ], - "l2_groupings": [ - 2 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 8, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_8192_numtokens_272": { - "block_sizes": [ - 128, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - false - ], - "l2_groupings": [ - 2 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 2, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_11008_numtokens_272": { - "block_sizes": [ - 16, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - false - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 4, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_14336_numtokens_272": { - "block_sizes": [ - 64, - 64 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 2 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 32, - "num_stages": 2, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2048_numtokens_288": { - "block_sizes": [ - 4, - 128 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "last", - "", - "" - ], - "num_warps": 8, - "num_stages": 1, - "indexing": [ - "tensor_descriptor", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2880_numtokens_288": { - "block_sizes": [ - 8, - 16 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 16, - "num_stages": 2, - "indexing": [ - "tensor_descriptor", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_4096_numtokens_288": { - "block_sizes": [ - 64, - 8 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 8, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_8192_numtokens_288": { - "block_sizes": [ - 128, - 64 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 2 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 1, - "num_stages": 2, - "indexing": [ - "pointer", - "pointer", - "tensor_descriptor", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_11008_numtokens_288": { - "block_sizes": [ - 256, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 2 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 1, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_14336_numtokens_288": { - "block_sizes": [ - 16, - 16 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 1, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2048_numtokens_304": { - "block_sizes": [ - 8, - 64 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 32, - "num_stages": 2, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2880_numtokens_304": { - "block_sizes": [ - 32, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 32, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_4096_numtokens_304": { - "block_sizes": [ - 128, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - false - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 8, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_8192_numtokens_304": { - "block_sizes": [ - 8, - 64 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "last", - "" - ], - "num_warps": 16, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_11008_numtokens_304": { - "block_sizes": [ - 64, - 128 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 1, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_14336_numtokens_304": { - "block_sizes": [ - 64, - 4 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 2 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 4, - "num_stages": 2, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2048_numtokens_320": { - "block_sizes": [ - 128, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - false - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 1, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "tensor_descriptor", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2880_numtokens_320": { - "block_sizes": [ - 64, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 4, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_4096_numtokens_320": { - "block_sizes": [ - 512, - 4 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "last", - "", - "" - ], - "num_warps": 1, - "num_stages": 2, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_8192_numtokens_320": { - "block_sizes": [ - 64, - 128 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 8, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_11008_numtokens_320": { - "block_sizes": [ - 32, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 2 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 8, - "num_stages": 1, - "indexing": [ - "tensor_descriptor", - "tensor_descriptor", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_14336_numtokens_320": { - "block_sizes": [ - 128, - 16 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 32, - "num_stages": 1, - "indexing": [ - "pointer", - "tensor_descriptor", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2048_numtokens_336": { - "block_sizes": [ - 2, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 8, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "tensor_descriptor", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2880_numtokens_336": { - "block_sizes": [ - 8, - 16 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 2 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 2, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_4096_numtokens_336": { - "block_sizes": [ - 32, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 16, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_8192_numtokens_336": { - "block_sizes": [ - 64, - 8 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "last" - ], - "num_warps": 8, - "num_stages": 2, - "indexing": [ - "pointer", - "tensor_descriptor", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_11008_numtokens_336": { - "block_sizes": [ - 8, - 16 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "last", - "", - "" - ], - "num_warps": 2, - "num_stages": 2, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_14336_numtokens_336": { - "block_sizes": [ - 8, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 2, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2048_numtokens_352": { - "block_sizes": [ - 32, - 64 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - false - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 2, - "num_stages": 2, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2880_numtokens_352": { - "block_sizes": [ - 32, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 2 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 1, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_4096_numtokens_352": { - "block_sizes": [ - 16, - 128 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "last" - ], - "num_warps": 4, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_8192_numtokens_352": { - "block_sizes": [ - 64, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - false - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 2, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_11008_numtokens_352": { - "block_sizes": [ - 8, - 128 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 4, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_14336_numtokens_352": { - "block_sizes": [ - 32, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "last" - ], - "num_warps": 1, - "num_stages": 2, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2048_numtokens_368": { - "block_sizes": [ - 32, - 8 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 16, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2880_numtokens_368": { - "block_sizes": [ - 8, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - false - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 2, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_4096_numtokens_368": { - "block_sizes": [ - 8, - 64 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 2 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 16, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_8192_numtokens_368": { - "block_sizes": [ - 32, - 128 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 32, - "num_stages": 1, - "indexing": [ - "tensor_descriptor", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_11008_numtokens_368": { - "block_sizes": [ - 32, - 4 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 1, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_14336_numtokens_368": { - "block_sizes": [ - 32, - 128 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - false - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "first", - "" - ], - "num_warps": 2, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2048_numtokens_384": { - "block_sizes": [ - 32, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - false - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 16, - "num_stages": 2, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2880_numtokens_384": { - "block_sizes": [ - 64, - 256 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "first" - ], - "num_warps": 16, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_4096_numtokens_384": { - "block_sizes": [ - 32, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 2 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "last" - ], - "num_warps": 2, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "tensor_descriptor" - ], - "pid_type": "flat" - }, - "intermediate_8192_numtokens_384": { - "block_sizes": [ - 32, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 2, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_11008_numtokens_384": { - "block_sizes": [ - 8, - 256 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - false - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 1, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "tensor_descriptor", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_14336_numtokens_384": { - "block_sizes": [ - 32, - 16 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 2 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 1, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2048_numtokens_400": { - "block_sizes": [ - 64, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "first" - ], - "num_warps": 4, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "tensor_descriptor" - ], - "pid_type": "flat" - }, - "intermediate_2880_numtokens_400": { - "block_sizes": [ - 8, - 64 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "last", - "" - ], - "num_warps": 4, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_4096_numtokens_400": { - "block_sizes": [ - 64, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - false - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 16, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "tensor_descriptor", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_8192_numtokens_400": { - "block_sizes": [ - 32, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 1, - "num_stages": 1, - "indexing": [ - "tensor_descriptor", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_11008_numtokens_400": { - "block_sizes": [ - 256, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "last" - ], - "num_warps": 32, - "num_stages": 2, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_14336_numtokens_400": { - "block_sizes": [ - 8, - 64 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - false - ], - "l2_groupings": [ - 2 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 2, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "tensor_descriptor" - ], - "pid_type": "flat" - }, - "intermediate_2048_numtokens_416": { - "block_sizes": [ - 128, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - false - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 16, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "tensor_descriptor", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2880_numtokens_416": { - "block_sizes": [ - 32, - 8 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 2 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "first" - ], - "num_warps": 8, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_4096_numtokens_416": { - "block_sizes": [ - 64, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "first", - "", - "last" - ], - "num_warps": 1, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "tensor_descriptor" - ], - "pid_type": "flat" - }, - "intermediate_8192_numtokens_416": { - "block_sizes": [ - 128, - 64 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 2 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 1, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_11008_numtokens_416": { - "block_sizes": [ - 64, - 128 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 2 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "first", - "", - "" - ], - "num_warps": 2, - "num_stages": 1, - "indexing": [ - "tensor_descriptor", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_14336_numtokens_416": { - "block_sizes": [ - 32, - 256 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 32, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2048_numtokens_432": { - "block_sizes": [ - 16, - 16 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 2, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2880_numtokens_432": { - "block_sizes": [ - 32, - 16 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 4, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_4096_numtokens_432": { - "block_sizes": [ - 16, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 2 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "last", - "" - ], - "num_warps": 4, - "num_stages": 2, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_8192_numtokens_432": { - "block_sizes": [ - 16, - 64 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 8, - "num_stages": 2, - "indexing": [ - "pointer", - "pointer", - "pointer", - "tensor_descriptor" - ], - "pid_type": "flat" - }, - "intermediate_11008_numtokens_432": { - "block_sizes": [ - 16, - 8 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 1, - "num_stages": 1, - "indexing": [ - "tensor_descriptor", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_14336_numtokens_432": { - "block_sizes": [ - 32, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 2 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "last", - "" - ], - "num_warps": 2, - "num_stages": 2, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2048_numtokens_448": { - "block_sizes": [ - 4, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 16, - "num_stages": 2, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2880_numtokens_448": { - "block_sizes": [ - 8, - 64 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 2 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "last", - "" - ], - "num_warps": 1, - "num_stages": 1, - "indexing": [ - "pointer", - "tensor_descriptor", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_4096_numtokens_448": { - "block_sizes": [ - 4, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 1, - "num_stages": 2, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_8192_numtokens_448": { - "block_sizes": [ - 32, - 256 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "last", - "", - "" - ], - "num_warps": 16, - "num_stages": 2, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_11008_numtokens_448": { - "block_sizes": [ - 16, - 256 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "last", - "" - ], - "num_warps": 1, - "num_stages": 2, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_14336_numtokens_448": { - "block_sizes": [ - 16, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "last", - "" - ], - "num_warps": 1, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2048_numtokens_464": { - "block_sizes": [ - 32, - 64 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - false - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "last" - ], - "num_warps": 16, - "num_stages": 2, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2880_numtokens_464": { - "block_sizes": [ - 32, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 4, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_4096_numtokens_464": { - "block_sizes": [ - 16, - 64 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 2 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 2, - "num_stages": 1, - "indexing": [ - "pointer", - "tensor_descriptor", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_8192_numtokens_464": { - "block_sizes": [ - 8, - 16 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 1, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_11008_numtokens_464": { - "block_sizes": [ - 128, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 2 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "first" - ], - "num_warps": 16, - "num_stages": 2, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_14336_numtokens_464": { - "block_sizes": [ - 128, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - false - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "first", - "" - ], - "num_warps": 1, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "tensor_descriptor", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2048_numtokens_480": { - "block_sizes": [ - 4, - 16 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "last", - "", - "" - ], - "num_warps": 2, - "num_stages": 2, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2880_numtokens_480": { - "block_sizes": [ - 4, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "first", - "first" - ], - "num_warps": 1, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_4096_numtokens_480": { - "block_sizes": [ - 8, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "last", - "", - "" - ], - "num_warps": 1, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "tensor_descriptor", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_8192_numtokens_480": { - "block_sizes": [ - 32, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - false - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 16, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_11008_numtokens_480": { - "block_sizes": [ - 64, - 128 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "last", - "", - "last" - ], - "num_warps": 1, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_14336_numtokens_480": { - "block_sizes": [ - 16, - 128 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 4 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "last", - "", - "" - ], - "num_warps": 8, - "num_stages": 2, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2048_numtokens_496": { - "block_sizes": [ - 32, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - false - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 2, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2880_numtokens_496": { - "block_sizes": [ - 32, - 128 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "first", - "" - ], - "num_warps": 8, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_4096_numtokens_496": { - "block_sizes": [ - 16, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 4 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 2, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "tensor_descriptor", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_8192_numtokens_496": { - "block_sizes": [ - 32, - 8 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 4, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "tensor_descriptor" - ], - "pid_type": "flat" - }, - "intermediate_11008_numtokens_496": { - "block_sizes": [ - 32, - 128 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 4, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_14336_numtokens_496": { - "block_sizes": [ - 256, - 8 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 1, - "num_stages": 2, - "indexing": [ - "pointer", - "tensor_descriptor", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2048_numtokens_512": { - "block_sizes": [ - 32, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 2 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "last" - ], - "num_warps": 32, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2880_numtokens_512": { - "block_sizes": [ - 16, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 16, - "num_stages": 1, - "indexing": [ - "pointer", - "tensor_descriptor", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_4096_numtokens_512": { - "block_sizes": [ - 128, - 512 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 16, - "num_stages": 2, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_8192_numtokens_512": { - "block_sizes": [ - 32, - 128 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - false - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 32, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_11008_numtokens_512": { - "block_sizes": [ - 32, - 128 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 1, - "num_stages": 1, - "indexing": [ - "tensor_descriptor", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_14336_numtokens_512": { - "block_sizes": [ - 16, - 128 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 2 - ], - "range_unroll_factors": [ - 0 - ], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "first", - "", - "" - ], - "num_warps": 1, - "num_stages": 2, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat", - "range_warp_specializes": [] - } - }, - "nvidia_h100_80gb_hbm3": { - "intermediate_2048_numtokens_256": { - "block_sizes": [ - 64, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 8, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_4096_numtokens_256": { - "block_sizes": [ - 32, - 512 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 2 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 2, - "num_stages": 2, - "indexing": [ - "pointer", - "tensor_descriptor", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "default": { - "block_sizes": [ - 1, - 512 - ], - "loop_orders": [ - [ - 1, - 0 - ] - ], - "flatten_loops": [ - false - ], - "l2_groupings": [ - 4 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "first", - "" - ], - "num_warps": 8, - "num_stages": 2, - "indexing": [ - "tensor_descriptor", - "tensor_descriptor", - "tensor_descriptor", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_8192_numtokens_256": { - "block_sizes": [ - 32, - 8 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "first", - "" - ], - "num_warps": 4, - "num_stages": 1, - "indexing": [ - "tensor_descriptor", - "pointer", - "pointer", - "tensor_descriptor" - ], - "pid_type": "flat" - }, - "intermediate_14336_numtokens_256": { - "block_sizes": [ - 16, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 1, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "tensor_descriptor" - ], - "pid_type": "flat" - }, - "intermediate_11008_numtokens_256": { - "block_sizes": [ - 64, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 2, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_7688_numtokens_256": { - "block_sizes": [ - 8, - 16 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "last", - "" - ], - "num_warps": 1, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2880_numtokens_256": { - "block_sizes": [ - 32, - 64 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 2 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 8, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2048_numtokens_1": { - "block_sizes": [ - 1, - 16 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - false - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 16, - "num_stages": 2, - "indexing": [ - "pointer", - "pointer", - "pointer", - "tensor_descriptor" - ], - "pid_type": "flat" - }, - "intermediate_2880_numtokens_1": { - "block_sizes": [ - 1, - 1 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - false - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 16, - "num_stages": 2, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_4096_numtokens_1": { - "block_sizes": [ - 1, - 32 - ], - "loop_orders": [ - [ - 1, - 0 - ] - ], - "flatten_loops": [ - false - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 4, - "num_stages": 2, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_8192_numtokens_1": { - "block_sizes": [ - 1, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - false - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 1, - "num_stages": 2, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_11008_numtokens_1": { - "block_sizes": [ - 1, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "first", - "", - "" - ], - "num_warps": 4, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_14336_numtokens_1": { - "block_sizes": [ - 1, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - false - ], - "l2_groupings": [ - 2 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "first" - ], - "num_warps": 8, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2048_numtokens_2": { - "block_sizes": [ - 2, - 16 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 2 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 2, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "tensor_descriptor", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2880_numtokens_2": { - "block_sizes": [ - 1, - 4 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - false - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "first", - "", - "" - ], - "num_warps": 32, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_4096_numtokens_2": { - "block_sizes": [ - 2, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 8, - "num_stages": 2, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_8192_numtokens_2": { - "block_sizes": [ - 1, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - false - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "first", - "", - "" - ], - "num_warps": 8, - "num_stages": 2, - "indexing": [ - "tensor_descriptor", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_11008_numtokens_2": { - "block_sizes": [ - 1, - 256 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - false - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "first", - "" - ], - "num_warps": 4, - "num_stages": 2, - "indexing": [ - "pointer", - "pointer", - "pointer", - "tensor_descriptor" - ], - "pid_type": "flat" - }, - "intermediate_14336_numtokens_2": { - "block_sizes": [ - 1, - 64 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - false - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 8, - "num_stages": 2, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2048_numtokens_4": { - "block_sizes": [ - 1, - 256 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 32, - "num_stages": 2, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2880_numtokens_4": { - "block_sizes": [ - 1, - 8 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 1, - "num_stages": 2, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_4096_numtokens_4": { - "block_sizes": [ - 4, - 16 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 2 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "last", - "", - "" - ], - "num_warps": 4, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_8192_numtokens_4": { - "block_sizes": [ - 1, - 16 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 4, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_11008_numtokens_4": { - "block_sizes": [ - 1, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - false - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "first", - "" - ], - "num_warps": 16, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "tensor_descriptor", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_14336_numtokens_4": { - "block_sizes": [ - 4, - 16 - ], - "loop_orders": [ - [ - 1, - 0 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 16, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2048_numtokens_8": { - "block_sizes": [ - 8, - 256 - ], - "loop_orders": [ - [ - 1, - 0 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "last", - "", - "" - ], - "num_warps": 32, - "num_stages": 1, - "indexing": [ - "tensor_descriptor", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2880_numtokens_8": { - "block_sizes": [ - 8, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 4, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_4096_numtokens_8": { - "block_sizes": [ - 2, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 2 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 32, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_8192_numtokens_8": { - "block_sizes": [ - 4, - 64 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - false - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 1, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_11008_numtokens_8": { - "block_sizes": [ - 8, - 128 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - false - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "last", - "", - "" - ], - "num_warps": 4, - "num_stages": 2, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_14336_numtokens_8": { - "block_sizes": [ - 8, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - false - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "first", - "" - ], - "num_warps": 4, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2048_numtokens_16": { - "block_sizes": [ - 16, - 64 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - false - ], - "l2_groupings": [ - 2 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 4, - "num_stages": 2, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2880_numtokens_16": { - "block_sizes": [ - 16, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 8, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_4096_numtokens_16": { - "block_sizes": [ - 16, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 8, - "num_stages": 2, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_8192_numtokens_16": { - "block_sizes": [ - 4, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - false - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 1, - "num_stages": 1, - "indexing": [ - "pointer", - "tensor_descriptor", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_11008_numtokens_16": { - "block_sizes": [ - 8, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "first" - ], - "num_warps": 16, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_14336_numtokens_16": { - "block_sizes": [ - 16, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 1, - "num_stages": 1, - "indexing": [ - "tensor_descriptor", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2048_numtokens_24": { - "block_sizes": [ - 16, - 8 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "last", - "last" - ], - "num_warps": 1, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2880_numtokens_24": { - "block_sizes": [ - 32, - 64 - ], - "loop_orders": [ - [ - 1, - 0 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 4, - "num_stages": 1, - "indexing": [ - "tensor_descriptor", - "pointer", - "pointer", - "tensor_descriptor" - ], - "pid_type": "flat" - }, - "intermediate_4096_numtokens_24": { - "block_sizes": [ - 32, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 2 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "first", - "" - ], - "num_warps": 1, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "tensor_descriptor" - ], - "pid_type": "flat" - }, - "intermediate_8192_numtokens_24": { - "block_sizes": [ - 16, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - false - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 2, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_11008_numtokens_24": { - "block_sizes": [ - 32, - 8 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - false - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 1, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_14336_numtokens_24": { - "block_sizes": [ - 8, - 32 - ], - "loop_orders": [ - [ - 1, - 0 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "last" - ], - "num_warps": 2, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2048_numtokens_32": { - "block_sizes": [ - 32, - 64 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 1, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2880_numtokens_32": { - "block_sizes": [ - 32, - 64 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 2 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 1, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_4096_numtokens_32": { - "block_sizes": [ - 32, - 16 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 1, - "num_stages": 2, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_8192_numtokens_32": { - "block_sizes": [ - 32, - 128 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "last" - ], - "num_warps": 2, - "num_stages": 2, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_11008_numtokens_32": { - "block_sizes": [ - 16, - 8 - ], - "loop_orders": [ - [ - 1, - 0 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 16, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "tensor_descriptor" - ], - "pid_type": "flat" - }, - "intermediate_14336_numtokens_32": { - "block_sizes": [ - 32, - 8 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 8, - "num_stages": 2, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2048_numtokens_40": { - "block_sizes": [ - 64, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 2 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 2, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2880_numtokens_40": { - "block_sizes": [ - 64, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "last" - ], - "num_warps": 8, - "num_stages": 2, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_4096_numtokens_40": { - "block_sizes": [ - 32, - 32 - ], - "loop_orders": [ - [ - 1, - 0 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 8, - "num_stages": 1, - "indexing": [ - "tensor_descriptor", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_8192_numtokens_40": { - "block_sizes": [ - 64, - 128 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 2 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 4, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_11008_numtokens_40": { - "block_sizes": [ - 64, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 4, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_14336_numtokens_40": { - "block_sizes": [ - 32, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 2, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2048_numtokens_48": { - "block_sizes": [ - 32, - 8 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 2 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "first", - "" - ], - "num_warps": 16, - "num_stages": 2, - "indexing": [ - "pointer", - "pointer", - "tensor_descriptor", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2880_numtokens_48": { - "block_sizes": [ - 8, - 8 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 1, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "tensor_descriptor", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_4096_numtokens_48": { - "block_sizes": [ - 32, - 64 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 1, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "tensor_descriptor" - ], - "pid_type": "flat" - }, - "intermediate_8192_numtokens_48": { - "block_sizes": [ - 64, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 4, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_11008_numtokens_48": { - "block_sizes": [ - 16, - 256 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 2 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 32, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "tensor_descriptor" - ], - "pid_type": "flat" - }, - "intermediate_14336_numtokens_48": { - "block_sizes": [ - 64, - 4 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 2, - "num_stages": 1, - "indexing": [ - "tensor_descriptor", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2048_numtokens_56": { - "block_sizes": [ - 2, - 8 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 1, - "num_stages": 1, - "indexing": [ - "tensor_descriptor", - "pointer", - "tensor_descriptor", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2880_numtokens_56": { - "block_sizes": [ - 8, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 4, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_4096_numtokens_56": { - "block_sizes": [ - 32, - 4 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "first", - "" - ], - "num_warps": 4, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "tensor_descriptor", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_8192_numtokens_56": { - "block_sizes": [ - 32, - 128 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 1, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_11008_numtokens_56": { - "block_sizes": [ - 32, - 8 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "first", - "last" - ], - "num_warps": 1, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_14336_numtokens_56": { - "block_sizes": [ - 64, - 64 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 2 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 16, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "tensor_descriptor" - ], - "pid_type": "flat" - }, - "intermediate_2048_numtokens_64": { - "block_sizes": [ - 16, - 128 - ], - "loop_orders": [ - [ - 1, - 0 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 8, - "num_stages": 1, - "indexing": [ - "tensor_descriptor", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2880_numtokens_64": { - "block_sizes": [ - 4, - 64 - ], - "loop_orders": [ - [ - 1, - 0 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 4, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_4096_numtokens_64": { - "block_sizes": [ - 2, - 16 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "first", - "", - "" - ], - "num_warps": 1, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_8192_numtokens_64": { - "block_sizes": [ - 8, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 16, - "num_stages": 2, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_11008_numtokens_64": { - "block_sizes": [ - 32, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - false - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 2, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "tensor_descriptor", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_14336_numtokens_64": { - "block_sizes": [ - 32, - 8 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 16, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2048_numtokens_72": { - "block_sizes": [ - 4, - 16 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "last" - ], - "num_warps": 1, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2880_numtokens_72": { - "block_sizes": [ - 64, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "first" - ], - "num_warps": 16, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_4096_numtokens_72": { - "block_sizes": [ - 64, - 16 - ], - "loop_orders": [ - [ - 1, - 0 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "last", - "", - "" - ], - "num_warps": 32, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_8192_numtokens_72": { - "block_sizes": [ - 32, - 8 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 32, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_11008_numtokens_72": { - "block_sizes": [ - 32, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 1, - "num_stages": 2, - "indexing": [ - "pointer", - "pointer", - "tensor_descriptor", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_14336_numtokens_72": { - "block_sizes": [ - 128, - 128 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 16, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2048_numtokens_80": { - "block_sizes": [ - 32, - 64 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 4, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2880_numtokens_80": { - "block_sizes": [ - 32, - 128 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 16, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_4096_numtokens_80": { - "block_sizes": [ - 32, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 2, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_8192_numtokens_80": { - "block_sizes": [ - 32, - 8 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 4 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 1, - "num_stages": 1, - "indexing": [ - "tensor_descriptor", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_11008_numtokens_80": { - "block_sizes": [ - 64, - 8 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 8, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_14336_numtokens_80": { - "block_sizes": [ - 32, - 512 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 2 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "first", - "" - ], - "num_warps": 16, - "num_stages": 1, - "indexing": [ - "pointer", - "tensor_descriptor", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2048_numtokens_88": { - "block_sizes": [ - 32, - 16 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 4, - "num_stages": 2, - "indexing": [ - "pointer", - "pointer", - "pointer", - "tensor_descriptor" - ], - "pid_type": "flat" - }, - "intermediate_2880_numtokens_88": { - "block_sizes": [ - 16, - 128 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 4, - "num_stages": 1, - "indexing": [ - "tensor_descriptor", - "pointer", - "pointer", - "tensor_descriptor" - ], - "pid_type": "flat" - }, - "intermediate_4096_numtokens_88": { - "block_sizes": [ - 64, - 32 - ], - "loop_orders": [ - [ - 1, - 0 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 32, - "num_stages": 1, - "indexing": [ - "tensor_descriptor", - "tensor_descriptor", - "tensor_descriptor", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_8192_numtokens_88": { - "block_sizes": [ - 128, - 64 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "first", - "first", - "last" - ], - "num_warps": 2, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_11008_numtokens_88": { - "block_sizes": [ - 32, - 128 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 4 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 32, - "num_stages": 1, - "indexing": [ - "pointer", - "tensor_descriptor", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_14336_numtokens_88": { - "block_sizes": [ - 16, - 128 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "first", - "last", - "" - ], - "num_warps": 8, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2048_numtokens_96": { - "block_sizes": [ - 128, - 4 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 2 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "last", - "" - ], - "num_warps": 4, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2880_numtokens_96": { - "block_sizes": [ - 32, - 128 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 16, - "num_stages": 1, - "indexing": [ - "pointer", - "tensor_descriptor", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_4096_numtokens_96": { - "block_sizes": [ - 16, - 256 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 2 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 1, - "num_stages": 3, - "indexing": [ - "pointer", - "pointer", - "pointer", - "tensor_descriptor" - ], - "pid_type": "flat" - }, - "intermediate_8192_numtokens_96": { - "block_sizes": [ - 64, - 64 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 2 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "first", - "", - "last" - ], - "num_warps": 1, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_11008_numtokens_96": { - "block_sizes": [ - 64, - 256 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "first", - "", - "" - ], - "num_warps": 2, - "num_stages": 2, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_14336_numtokens_96": { - "block_sizes": [ - 32, - 64 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "first" - ], - "num_warps": 1, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "tensor_descriptor", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2048_numtokens_104": { - "block_sizes": [ - 32, - 8 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 16, - "num_stages": 2, - "indexing": [ - "tensor_descriptor", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2880_numtokens_104": { - "block_sizes": [ - 64, - 64 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 32, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_4096_numtokens_104": { - "block_sizes": [ - 32, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - false - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 8, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_8192_numtokens_104": { - "block_sizes": [ - 8, - 8 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 2 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 8, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "tensor_descriptor" - ], - "pid_type": "flat" - }, - "intermediate_11008_numtokens_104": { - "block_sizes": [ - 128, - 16 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 2, - "num_stages": 1, - "indexing": [ - "pointer", - "tensor_descriptor", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_14336_numtokens_104": { - "block_sizes": [ - 32, - 16 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 2, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "tensor_descriptor" - ], - "pid_type": "flat" - }, - "intermediate_2048_numtokens_112": { - "block_sizes": [ - 32, - 1024 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "first", - "", - "" - ], - "num_warps": 32, - "num_stages": 1, - "indexing": [ - "pointer", - "tensor_descriptor", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2880_numtokens_112": { - "block_sizes": [ - 32, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - false - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 1, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_4096_numtokens_112": { - "block_sizes": [ - 32, - 128 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "last", - "", - "" - ], - "num_warps": 16, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_8192_numtokens_112": { - "block_sizes": [ - 32, - 128 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 8, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "tensor_descriptor" - ], - "pid_type": "flat" - }, - "intermediate_11008_numtokens_112": { - "block_sizes": [ - 16, - 64 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 8, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_14336_numtokens_112": { - "block_sizes": [ - 32, - 8 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 2 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 1, - "num_stages": 2, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2048_numtokens_120": { - "block_sizes": [ - 32, - 64 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 2, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2880_numtokens_120": { - "block_sizes": [ - 32, - 16 - ], - "loop_orders": [ - [ - 1, - 0 - ] - ], - "flatten_loops": [ - false - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 4, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_4096_numtokens_120": { - "block_sizes": [ - 32, - 16 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 2 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 2, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_8192_numtokens_120": { - "block_sizes": [ - 64, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "last", - "" - ], - "num_warps": 2, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_11008_numtokens_120": { - "block_sizes": [ - 64, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 16, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_14336_numtokens_120": { - "block_sizes": [ - 128, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 8, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2048_numtokens_128": { - "block_sizes": [ - 32, - 64 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 2, - "num_stages": 2, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2880_numtokens_128": { - "block_sizes": [ - 128, - 64 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "last", - "" - ], - "num_warps": 4, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_4096_numtokens_128": { - "block_sizes": [ - 128, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "last" - ], - "num_warps": 32, - "num_stages": 2, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_8192_numtokens_128": { - "block_sizes": [ - 32, - 64 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 32, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_11008_numtokens_128": { - "block_sizes": [ - 128, - 128 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "last" - ], - "num_warps": 8, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_14336_numtokens_128": { - "block_sizes": [ - 16, - 128 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 2 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 4, - "num_stages": 3, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2048_numtokens_136": { - "block_sizes": [ - 128, - 16 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 2 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 1, - "num_stages": 3, - "indexing": [ - "tensor_descriptor", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2880_numtokens_136": { - "block_sizes": [ - 8, - 64 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "first", - "" - ], - "num_warps": 8, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_4096_numtokens_136": { - "block_sizes": [ - 32, - 16 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 2, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_8192_numtokens_136": { - "block_sizes": [ - 32, - 128 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 4, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_11008_numtokens_136": { - "block_sizes": [ - 16, - 8 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 4, - "num_stages": 3, - "indexing": [ - "pointer", - "tensor_descriptor", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_14336_numtokens_136": { - "block_sizes": [ - 32, - 8 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 1, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2048_numtokens_144": { - "block_sizes": [ - 8, - 16 - ], - "loop_orders": [ - [ - 1, - 0 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 2 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 2, - "num_stages": 1, - "indexing": [ - "pointer", - "tensor_descriptor", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2880_numtokens_144": { - "block_sizes": [ - 256, - 8 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 32, - "num_stages": 2, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_4096_numtokens_144": { - "block_sizes": [ - 128, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - false - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 4, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_8192_numtokens_144": { - "block_sizes": [ - 128, - 64 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - false - ], - "l2_groupings": [ - 2 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 4, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_11008_numtokens_144": { - "block_sizes": [ - 32, - 4 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "last", - "" - ], - "num_warps": 8, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_14336_numtokens_144": { - "block_sizes": [ - 32, - 8 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "first" - ], - "num_warps": 1, - "num_stages": 2, - "indexing": [ - "pointer", - "tensor_descriptor", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2048_numtokens_152": { - "block_sizes": [ - 32, - 8 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 16, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2880_numtokens_152": { - "block_sizes": [ - 16, - 64 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 4, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_4096_numtokens_152": { - "block_sizes": [ - 64, - 4 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 2 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 2, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_8192_numtokens_152": { - "block_sizes": [ - 32, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - false - ], - "l2_groupings": [ - 2 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "first", - "", - "" - ], - "num_warps": 1, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_11008_numtokens_152": { - "block_sizes": [ - 32, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 2 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 8, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_14336_numtokens_152": { - "block_sizes": [ - 64, - 16 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 2, - "num_stages": 1, - "indexing": [ - "pointer", - "tensor_descriptor", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2048_numtokens_160": { - "block_sizes": [ - 32, - 16 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 2 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 2, - "num_stages": 2, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2880_numtokens_160": { - "block_sizes": [ - 128, - 128 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 2, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_4096_numtokens_160": { - "block_sizes": [ - 32, - 8 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 2, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_8192_numtokens_160": { - "block_sizes": [ - 64, - 4 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "first", - "", - "" - ], - "num_warps": 4, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "tensor_descriptor" - ], - "pid_type": "flat" - }, - "intermediate_11008_numtokens_160": { - "block_sizes": [ - 32, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - false - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 1, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_14336_numtokens_160": { - "block_sizes": [ - 128, - 128 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 2, - "num_stages": 1, - "indexing": [ - "pointer", - "tensor_descriptor", - "tensor_descriptor", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2048_numtokens_168": { - "block_sizes": [ - 128, - 16 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 8, - "num_stages": 1, - "indexing": [ - "tensor_descriptor", - "tensor_descriptor", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2880_numtokens_168": { - "block_sizes": [ - 32, - 16 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 4, - "num_stages": 2, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_4096_numtokens_168": { - "block_sizes": [ - 64, - 32 - ], - "loop_orders": [ - [ - 1, - 0 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 2 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "last" - ], - "num_warps": 1, - "num_stages": 2, - "indexing": [ - "pointer", - "pointer", - "tensor_descriptor", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_8192_numtokens_168": { - "block_sizes": [ - 64, - 8 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 8, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_11008_numtokens_168": { - "block_sizes": [ - 64, - 4 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 16, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_14336_numtokens_168": { - "block_sizes": [ - 32, - 512 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 2 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 2, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "tensor_descriptor" - ], - "pid_type": "flat" - }, - "intermediate_2048_numtokens_176": { - "block_sizes": [ - 32, - 128 - ], - "loop_orders": [ - [ - 1, - 0 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 2 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 32, - "num_stages": 2, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2880_numtokens_176": { - "block_sizes": [ - 32, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 2, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_4096_numtokens_176": { - "block_sizes": [ - 4, - 8 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "first" - ], - "num_warps": 8, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_8192_numtokens_176": { - "block_sizes": [ - 8, - 16 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 1, - "num_stages": 2, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_11008_numtokens_176": { - "block_sizes": [ - 8, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - false - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 1, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_14336_numtokens_176": { - "block_sizes": [ - 8, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 2, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2048_numtokens_184": { - "block_sizes": [ - 32, - 8 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - false - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 1, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2880_numtokens_184": { - "block_sizes": [ - 8, - 16 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 16, - "num_stages": 2, - "indexing": [ - "pointer", - "pointer", - "pointer", - "tensor_descriptor" - ], - "pid_type": "flat" - }, - "intermediate_4096_numtokens_184": { - "block_sizes": [ - 32, - 8 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 8, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_8192_numtokens_184": { - "block_sizes": [ - 8, - 64 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 2 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 8, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_11008_numtokens_184": { - "block_sizes": [ - 32, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 2 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 32, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_14336_numtokens_184": { - "block_sizes": [ - 16, - 64 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "last", - "last" - ], - "num_warps": 1, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2048_numtokens_192": { - "block_sizes": [ - 32, - 64 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - false - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 1, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2880_numtokens_192": { - "block_sizes": [ - 8, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 8, - "num_stages": 2, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_4096_numtokens_192": { - "block_sizes": [ - 32, - 8 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 4 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "first", - "", - "" - ], - "num_warps": 16, - "num_stages": 2, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_8192_numtokens_192": { - "block_sizes": [ - 4, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 2 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 8, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_11008_numtokens_192": { - "block_sizes": [ - 32, - 256 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 4 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 2, - "num_stages": 2, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_14336_numtokens_192": { - "block_sizes": [ - 8, - 16 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 4, - "num_stages": 1, - "indexing": [ - "pointer", - "tensor_descriptor", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2048_numtokens_200": { - "block_sizes": [ - 32, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 2 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 1, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2880_numtokens_200": { - "block_sizes": [ - 32, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "last", - "" - ], - "num_warps": 2, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_4096_numtokens_200": { - "block_sizes": [ - 64, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 2, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_8192_numtokens_200": { - "block_sizes": [ - 32, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 1, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_11008_numtokens_200": { - "block_sizes": [ - 8, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 1, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_14336_numtokens_200": { - "block_sizes": [ - 16, - 8 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "first" - ], - "num_warps": 4, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2048_numtokens_208": { - "block_sizes": [ - 32, - 128 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - false - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 2, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2880_numtokens_208": { - "block_sizes": [ - 64, - 64 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 4, - "num_stages": 2, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_4096_numtokens_208": { - "block_sizes": [ - 32, - 128 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - false - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 8, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_8192_numtokens_208": { - "block_sizes": [ - 256, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 16, - "num_stages": 2, - "indexing": [ - "tensor_descriptor", - "tensor_descriptor", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_11008_numtokens_208": { - "block_sizes": [ - 64, - 64 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "last", - "last", - "" - ], - "num_warps": 8, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_14336_numtokens_208": { - "block_sizes": [ - 16, - 128 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 4 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 32, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2048_numtokens_216": { - "block_sizes": [ - 32, - 128 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - false - ], - "l2_groupings": [ - 2 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 2, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2880_numtokens_216": { - "block_sizes": [ - 16, - 128 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "last", - "" - ], - "num_warps": 32, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_4096_numtokens_216": { - "block_sizes": [ - 32, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 4, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_8192_numtokens_216": { - "block_sizes": [ - 16, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 4, - "num_stages": 2, - "indexing": [ - "pointer", - "pointer", - "tensor_descriptor", - "tensor_descriptor" - ], - "pid_type": "flat" - }, - "intermediate_11008_numtokens_216": { - "block_sizes": [ - 32, - 4 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "first" - ], - "num_warps": 1, - "num_stages": 2, - "indexing": [ - "pointer", - "pointer", - "tensor_descriptor", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_14336_numtokens_216": { - "block_sizes": [ - 64, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 2 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "last", - "", - "last" - ], - "num_warps": 32, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2048_numtokens_224": { - "block_sizes": [ - 32, - 16 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - false - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "last", - "" - ], - "num_warps": 2, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2880_numtokens_224": { - "block_sizes": [ - 64, - 64 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "last", - "" - ], - "num_warps": 4, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "tensor_descriptor", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_4096_numtokens_224": { - "block_sizes": [ - 64, - 128 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 4 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 1, - "num_stages": 1, - "indexing": [ - "tensor_descriptor", - "pointer", - "tensor_descriptor", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_8192_numtokens_224": { - "block_sizes": [ - 16, - 64 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 2 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "last" - ], - "num_warps": 16, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_11008_numtokens_224": { - "block_sizes": [ - 256, - 64 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 32, - "num_stages": 1, - "indexing": [ - "tensor_descriptor", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_14336_numtokens_224": { - "block_sizes": [ - 32, - 8 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "last", - "", - "first" - ], - "num_warps": 4, - "num_stages": 1, - "indexing": [ - "pointer", - "tensor_descriptor", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2048_numtokens_232": { - "block_sizes": [ - 16, - 8 - ], - "loop_orders": [ - [ - 1, - 0 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 4 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 1, - "num_stages": 2, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2880_numtokens_232": { - "block_sizes": [ - 64, - 16 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 4, - "num_stages": 1, - "indexing": [ - "tensor_descriptor", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_4096_numtokens_232": { - "block_sizes": [ - 16, - 4 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 8, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_8192_numtokens_232": { - "block_sizes": [ - 32, - 32 - ], - "loop_orders": [ - [ - 1, - 0 - ] - ], - "flatten_loops": [ - false - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 4, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_11008_numtokens_232": { - "block_sizes": [ - 16, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "last", - "" - ], - "num_warps": 2, - "num_stages": 2, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_14336_numtokens_232": { - "block_sizes": [ - 32, - 8 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 2, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2048_numtokens_240": { - "block_sizes": [ - 32, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 1, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2880_numtokens_240": { - "block_sizes": [ - 8, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 16, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_4096_numtokens_240": { - "block_sizes": [ - 16, - 16 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "last", - "" - ], - "num_warps": 16, - "num_stages": 1, - "indexing": [ - "pointer", - "tensor_descriptor", - "tensor_descriptor", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_8192_numtokens_240": { - "block_sizes": [ - 32, - 8 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 8, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_11008_numtokens_240": { - "block_sizes": [ - 8, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 4, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_14336_numtokens_240": { - "block_sizes": [ - 32, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 8, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2048_numtokens_248": { - "block_sizes": [ - 16, - 8 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 16, - "num_stages": 2, - "indexing": [ - "pointer", - "pointer", - "tensor_descriptor", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2880_numtokens_248": { - "block_sizes": [ - 16, - 128 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 32, - "num_stages": 2, - "indexing": [ - "pointer", - "pointer", - "pointer", - "tensor_descriptor" - ], - "pid_type": "flat" - }, - "intermediate_4096_numtokens_248": { - "block_sizes": [ - 256, - 16 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 2 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "first", - "", - "" - ], - "num_warps": 16, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_8192_numtokens_248": { - "block_sizes": [ - 64, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "first" - ], - "num_warps": 4, - "num_stages": 2, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_11008_numtokens_248": { - "block_sizes": [ - 64, - 4 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "last", - "", - "" - ], - "num_warps": 1, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_14336_numtokens_248": { - "block_sizes": [ - 64, - 256 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 8, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2048_numtokens_272": { - "block_sizes": [ - 128, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - false - ], - "l2_groupings": [ - 2 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 1, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2880_numtokens_272": { - "block_sizes": [ - 8, - 128 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "last", - "" - ], - "num_warps": 2, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "tensor_descriptor", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_4096_numtokens_272": { - "block_sizes": [ - 128, - 32 - ], - "loop_orders": [ - [ - 1, - 0 - ] - ], - "flatten_loops": [ - false - ], - "l2_groupings": [ - 2 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 8, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_8192_numtokens_272": { - "block_sizes": [ - 128, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - false - ], - "l2_groupings": [ - 2 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 2, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_11008_numtokens_272": { - "block_sizes": [ - 16, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - false - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 4, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_14336_numtokens_272": { - "block_sizes": [ - 64, - 64 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 2 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 32, - "num_stages": 2, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2048_numtokens_288": { - "block_sizes": [ - 4, - 128 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "last", - "", - "" - ], - "num_warps": 8, - "num_stages": 1, - "indexing": [ - "tensor_descriptor", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2880_numtokens_288": { - "block_sizes": [ - 8, - 16 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 16, - "num_stages": 2, - "indexing": [ - "tensor_descriptor", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_4096_numtokens_288": { - "block_sizes": [ - 64, - 8 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 8, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_8192_numtokens_288": { - "block_sizes": [ - 128, - 64 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 2 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 1, - "num_stages": 2, - "indexing": [ - "pointer", - "pointer", - "tensor_descriptor", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_11008_numtokens_288": { - "block_sizes": [ - 256, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 2 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 1, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_14336_numtokens_288": { - "block_sizes": [ - 16, - 16 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 1, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2048_numtokens_304": { - "block_sizes": [ - 8, - 64 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 32, - "num_stages": 2, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2880_numtokens_304": { - "block_sizes": [ - 32, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 32, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_4096_numtokens_304": { - "block_sizes": [ - 128, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - false - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 8, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_8192_numtokens_304": { - "block_sizes": [ - 8, - 64 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "last", - "" - ], - "num_warps": 16, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_11008_numtokens_304": { - "block_sizes": [ - 64, - 128 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 1, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_14336_numtokens_304": { - "block_sizes": [ - 64, - 4 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 2 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 4, - "num_stages": 2, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2048_numtokens_320": { - "block_sizes": [ - 128, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - false - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 1, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "tensor_descriptor", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2880_numtokens_320": { - "block_sizes": [ - 64, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 4, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_4096_numtokens_320": { - "block_sizes": [ - 512, - 4 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "last", - "", - "" - ], - "num_warps": 1, - "num_stages": 2, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_8192_numtokens_320": { - "block_sizes": [ - 64, - 128 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 8, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_11008_numtokens_320": { - "block_sizes": [ - 32, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 2 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 8, - "num_stages": 1, - "indexing": [ - "tensor_descriptor", - "tensor_descriptor", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_14336_numtokens_320": { - "block_sizes": [ - 128, - 16 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 32, - "num_stages": 1, - "indexing": [ - "pointer", - "tensor_descriptor", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2048_numtokens_336": { - "block_sizes": [ - 2, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 8, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "tensor_descriptor", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2880_numtokens_336": { - "block_sizes": [ - 8, - 16 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 2 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 2, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_4096_numtokens_336": { - "block_sizes": [ - 32, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 16, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_8192_numtokens_336": { - "block_sizes": [ - 64, - 8 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "last" - ], - "num_warps": 8, - "num_stages": 2, - "indexing": [ - "pointer", - "tensor_descriptor", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_11008_numtokens_336": { - "block_sizes": [ - 8, - 16 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "last", - "", - "" - ], - "num_warps": 2, - "num_stages": 2, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_14336_numtokens_336": { - "block_sizes": [ - 8, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 2, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2048_numtokens_352": { - "block_sizes": [ - 32, - 64 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - false - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 2, - "num_stages": 2, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2880_numtokens_352": { - "block_sizes": [ - 32, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 2 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 1, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_4096_numtokens_352": { - "block_sizes": [ - 16, - 128 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "last" - ], - "num_warps": 4, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_8192_numtokens_352": { - "block_sizes": [ - 64, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - false - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 2, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_11008_numtokens_352": { - "block_sizes": [ - 8, - 128 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 4, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_14336_numtokens_352": { - "block_sizes": [ - 32, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "last" - ], - "num_warps": 1, - "num_stages": 2, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2048_numtokens_368": { - "block_sizes": [ - 32, - 8 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 16, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2880_numtokens_368": { - "block_sizes": [ - 8, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - false - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 2, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_4096_numtokens_368": { - "block_sizes": [ - 8, - 64 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 2 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 16, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_8192_numtokens_368": { - "block_sizes": [ - 32, - 128 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 32, - "num_stages": 1, - "indexing": [ - "tensor_descriptor", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_11008_numtokens_368": { - "block_sizes": [ - 32, - 4 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 1, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_14336_numtokens_368": { - "block_sizes": [ - 32, - 128 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - false - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "first", - "" - ], - "num_warps": 2, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2048_numtokens_384": { - "block_sizes": [ - 32, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - false - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 16, - "num_stages": 2, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2880_numtokens_384": { - "block_sizes": [ - 64, - 256 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "first" - ], - "num_warps": 16, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_4096_numtokens_384": { - "block_sizes": [ - 32, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 2 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "last" - ], - "num_warps": 2, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "tensor_descriptor" - ], - "pid_type": "flat" - }, - "intermediate_8192_numtokens_384": { - "block_sizes": [ - 32, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 2, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_11008_numtokens_384": { - "block_sizes": [ - 8, - 256 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - false - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 1, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "tensor_descriptor", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_14336_numtokens_384": { - "block_sizes": [ - 32, - 16 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 2 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 1, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2048_numtokens_400": { - "block_sizes": [ - 64, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "first" - ], - "num_warps": 4, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "tensor_descriptor" - ], - "pid_type": "flat" - }, - "intermediate_2880_numtokens_400": { - "block_sizes": [ - 8, - 64 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "last", - "" - ], - "num_warps": 4, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_4096_numtokens_400": { - "block_sizes": [ - 64, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - false - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 16, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "tensor_descriptor", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_8192_numtokens_400": { - "block_sizes": [ - 32, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 1, - "num_stages": 1, - "indexing": [ - "tensor_descriptor", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_11008_numtokens_400": { - "block_sizes": [ - 256, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "last" - ], - "num_warps": 32, - "num_stages": 2, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_14336_numtokens_400": { - "block_sizes": [ - 8, - 64 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - false - ], - "l2_groupings": [ - 2 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 2, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "tensor_descriptor" - ], - "pid_type": "flat" - }, - "intermediate_2048_numtokens_416": { - "block_sizes": [ - 128, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - false - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 16, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "tensor_descriptor", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2880_numtokens_416": { - "block_sizes": [ - 32, - 8 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 2 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "first" - ], - "num_warps": 8, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_4096_numtokens_416": { - "block_sizes": [ - 64, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "first", - "", - "last" - ], - "num_warps": 1, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "tensor_descriptor" - ], - "pid_type": "flat" - }, - "intermediate_8192_numtokens_416": { - "block_sizes": [ - 128, - 64 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 2 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 1, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_11008_numtokens_416": { - "block_sizes": [ - 64, - 128 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 2 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "first", - "", - "" - ], - "num_warps": 2, - "num_stages": 1, - "indexing": [ - "tensor_descriptor", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_14336_numtokens_416": { - "block_sizes": [ - 32, - 256 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 32, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2048_numtokens_432": { - "block_sizes": [ - 16, - 16 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 2, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2880_numtokens_432": { - "block_sizes": [ - 32, - 16 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 4, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_4096_numtokens_432": { - "block_sizes": [ - 16, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 2 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "last", - "" - ], - "num_warps": 4, - "num_stages": 2, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_8192_numtokens_432": { - "block_sizes": [ - 16, - 64 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 8, - "num_stages": 2, - "indexing": [ - "pointer", - "pointer", - "pointer", - "tensor_descriptor" - ], - "pid_type": "flat" - }, - "intermediate_11008_numtokens_432": { - "block_sizes": [ - 16, - 8 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 1, - "num_stages": 1, - "indexing": [ - "tensor_descriptor", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_14336_numtokens_432": { - "block_sizes": [ - 32, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 2 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "last", - "" - ], - "num_warps": 2, - "num_stages": 2, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2048_numtokens_448": { - "block_sizes": [ - 4, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 16, - "num_stages": 2, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2880_numtokens_448": { - "block_sizes": [ - 8, - 64 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 2 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "last", - "" - ], - "num_warps": 1, - "num_stages": 1, - "indexing": [ - "pointer", - "tensor_descriptor", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_4096_numtokens_448": { - "block_sizes": [ - 4, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 1, - "num_stages": 2, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_8192_numtokens_448": { - "block_sizes": [ - 32, - 256 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "last", - "", - "" - ], - "num_warps": 16, - "num_stages": 2, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_11008_numtokens_448": { - "block_sizes": [ - 16, - 256 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "last", - "" - ], - "num_warps": 1, - "num_stages": 2, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_14336_numtokens_448": { - "block_sizes": [ - 16, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "last", - "" - ], - "num_warps": 1, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2048_numtokens_464": { - "block_sizes": [ - 32, - 64 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - false - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "last" - ], - "num_warps": 16, - "num_stages": 2, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2880_numtokens_464": { - "block_sizes": [ - 32, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 4, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_4096_numtokens_464": { - "block_sizes": [ - 16, - 64 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 2 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 2, - "num_stages": 1, - "indexing": [ - "pointer", - "tensor_descriptor", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_8192_numtokens_464": { - "block_sizes": [ - 8, - 16 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 1, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_11008_numtokens_464": { - "block_sizes": [ - 128, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 2 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "first" - ], - "num_warps": 16, - "num_stages": 2, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_14336_numtokens_464": { - "block_sizes": [ - 128, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - false - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "first", - "" - ], - "num_warps": 1, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "tensor_descriptor", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2048_numtokens_480": { - "block_sizes": [ - 4, - 16 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "last", - "", - "" - ], - "num_warps": 2, - "num_stages": 2, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2880_numtokens_480": { - "block_sizes": [ - 4, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "first", - "first" - ], - "num_warps": 1, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_4096_numtokens_480": { - "block_sizes": [ - 8, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "last", - "", - "" - ], - "num_warps": 1, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "tensor_descriptor", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_8192_numtokens_480": { - "block_sizes": [ - 32, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - false - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 16, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_11008_numtokens_480": { - "block_sizes": [ - 64, - 128 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "last", - "", - "last" - ], - "num_warps": 1, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_14336_numtokens_480": { - "block_sizes": [ - 16, - 128 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 4 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "last", - "", - "" - ], - "num_warps": 8, - "num_stages": 2, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2048_numtokens_496": { - "block_sizes": [ - 32, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - false - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 2, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2880_numtokens_496": { - "block_sizes": [ - 32, - 128 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "first", - "" - ], - "num_warps": 8, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_4096_numtokens_496": { - "block_sizes": [ - 16, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 4 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 2, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "tensor_descriptor", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_8192_numtokens_496": { - "block_sizes": [ - 32, - 8 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 4, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "tensor_descriptor" - ], - "pid_type": "flat" - }, - "intermediate_11008_numtokens_496": { - "block_sizes": [ - 32, - 128 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 4, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_14336_numtokens_496": { - "block_sizes": [ - 256, - 8 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 1, - "num_stages": 2, - "indexing": [ - "pointer", - "tensor_descriptor", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2048_numtokens_512": { - "block_sizes": [ - 32, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 2 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "last" - ], - "num_warps": 32, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_2880_numtokens_512": { - "block_sizes": [ - 16, - 32 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 16, - "num_stages": 1, - "indexing": [ - "pointer", - "tensor_descriptor", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_4096_numtokens_512": { - "block_sizes": [ - 128, - 512 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 16, - "num_stages": 2, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_8192_numtokens_512": { - "block_sizes": [ - 32, - 128 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - false - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 32, - "num_stages": 1, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_11008_numtokens_512": { - "block_sizes": [ - 32, - 128 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 1 - ], - "range_unroll_factors": [ - 0 - ], - "range_warp_specializes": [], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "", - "", - "" - ], - "num_warps": 1, - "num_stages": 1, - "indexing": [ - "tensor_descriptor", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat" - }, - "intermediate_14336_numtokens_512": { - "block_sizes": [ - 16, - 128 - ], - "loop_orders": [ - [ - 0, - 1 - ] - ], - "flatten_loops": [ - true - ], - "l2_groupings": [ - 2 - ], - "range_unroll_factors": [ - 0 - ], - "range_num_stages": [ - 0 - ], - "range_multi_buffers": [ - null - ], - "range_flattens": [ - null - ], - "load_eviction_policies": [ - "first", - "", - "" - ], - "num_warps": 1, - "num_stages": 2, - "indexing": [ - "pointer", - "pointer", - "pointer", - "pointer" - ], - "pid_type": "flat", - "range_warp_specializes": [] - } - }, "nvidia_h100": { "intermediate_2048_numtokens_256": { "block_sizes": [ diff --git a/vllm/kernels/helion/utils.py b/vllm/kernels/helion/utils.py index 600e459f6..5ff8046c7 100644 --- a/vllm/kernels/helion/utils.py +++ b/vllm/kernels/helion/utils.py @@ -8,6 +8,44 @@ from vllm.platforms import current_platform logger = logging.getLogger(__name__) +# Maps known variant GPU names (after lowercase/underscore normalization) +# to their canonical form. +# +# Names that are already canonical after normalization are NOT listed here. +# For example, "NVIDIA H200" normalizes to "nvidia_h200" which needs no +# further mapping, and AMD ROCm names like "AMD_Instinct_MI300X" come from +# a controlled lookup table in rocm.py and normalize cleanly to +# "amd_instinct_mi300x". Only names with variant suffixes (form factor, +# memory size, memory type, etc.) that should be stripped need entries. +# +# To add a new GPU variant: run `canonicalize_gpu_name()` without the alias +# to see the normalized name, then add a mapping here if it contains variant +# suffixes that should be stripped (e.g. Blackwell/Rubin variants). +_GPU_NAME_ALIASES: dict[str, str] = { + # H100 variants + "nvidia_h100_pcie": "nvidia_h100", + "nvidia_h100_sxm5": "nvidia_h100", + "nvidia_h100_80gb_hbm3": "nvidia_h100", + "nvidia_h100_nvl": "nvidia_h100", + # H200 variants + "nvidia_h200_nvl": "nvidia_h200", + "nvidia_h200_141gb_hbm3e": "nvidia_h200", + # A100 variants + "nvidia_a100_sxm4_80gb": "nvidia_a100", + "nvidia_a100_sxm4_40gb": "nvidia_a100", + "nvidia_a100_pcie_80gb": "nvidia_a100", + "nvidia_a100_pcie_40gb": "nvidia_a100", + "nvidia_a100_80gb_pcie": "nvidia_a100", + # V100 variants (Tesla-branded) + "tesla_v100_sxm2_32gb": "tesla_v100", + "tesla_v100_sxm2_16gb": "tesla_v100", + "tesla_v100_pcie_32gb": "tesla_v100", + "tesla_v100_pcie_16gb": "tesla_v100", + # AMD ROCm variants (from _ROCM_DEVICE_ID_NAME_MAP in rocm.py) + "amd_instinct_mi300x_hf": "amd_instinct_mi300x", + # ADD MORE HERE +} + def get_gpu_name(device_id: int | None = None) -> str: if device_id is None: @@ -23,17 +61,19 @@ def canonicalize_gpu_name(name: str) -> str: """ Canonicalize GPU name for use as a platform identifier. - Converts to lowercase and replaces spaces and hyphens with underscores. - e.g., "NVIDIA A100-SXM4-80GB" -> "nvidia_a100_sxm4_80gb" - "AMD_Instinct_MI300X" -> "amd_instinct_mi300x" - - Raises ValueError if name is empty. + Converts to lowercase, replaces spaces and hyphens with underscores, + and maps known variant names to their canonical form via _GPU_NAME_ALIASES. + e.g., "NVIDIA H100 80GB HBM3" -> "nvidia_h100" + "NVIDIA A100-SXM4-80GB" -> "nvidia_a100" + "AMD Instinct MI300X" -> "amd_instinct_mi300x" """ if not name or not name.strip(): raise ValueError("GPU name cannot be empty") name = name.lower() name = name.replace(" ", "_") name = name.replace("-", "_") + if name in _GPU_NAME_ALIASES: + return _GPU_NAME_ALIASES[name] return name