Add thread_n=64 support to Marlin MoE (#32360)

Signed-off-by: mgoin <mgoin64@gmail.com>
This commit is contained in:
Michael Goin
2026-01-15 19:45:44 -05:00
committed by GitHub
parent c277fbdf31
commit 83239ff19a
3 changed files with 8 additions and 5 deletions

View File

@@ -58,7 +58,7 @@ TEMPLATE = (
"( MARLIN_KERNEL_PARAMS );"
)
THREAD_CONFIGS = [(128, 128, 256), (64, 256, 256), (64, 128, 128)]
THREAD_CONFIGS = [(128, 128, 256), (64, 256, 256), (64, 128, 128), (128, 64, 128)]
THREAD_M_BLOCKS = [0.5, 1, 2, 3, 4]

View File

@@ -126,14 +126,16 @@ thread_config_t small_batch_thread_configs[] = {
// thread_k, thread_n, num_threads
{128, 128, 256},
{64, 128, 128}};
{64, 128, 128},
{128, 64, 128}};
thread_config_t large_batch_thread_configs[] = {
// Ordered by priority
// thread_k, thread_n, num_threads
{64, 256, 256},
{64, 128, 128}};
{64, 128, 128},
{128, 64, 128}};
typedef struct {
int blocks_per_sm;