nvfp4-megamoe-kernel/tests/e2e_archive/test_model_construction.py

"""Verify DSV4Model can be constructed (no forward pass, just init)."""
import torch


def test_model_construction():
    from dsv4.model.config import DSV4Config
    from dsv4.model.dsv4 import DSV4Model
    from dsv4.cache.manager import KVCacheManager

    # Flash variant
    config = DSV4Config.flash()
    mgr = KVCacheManager(config, build_schedule(config),
                         max_concurrent_requests=1, max_context_tokens=512)
    model = DSV4Model(config, mgr)
    print(f"  Flash: {len(model.layers)} layers, {config.num_query_heads} heads, hd={config.head_dim}")

    # Pro variant
    config_pro = DSV4Config.pro()
    mgr_pro = KVCacheManager(config_pro, build_schedule(config_pro),
                              max_concurrent_requests=1, max_context_tokens=512)
    model_pro = DSV4Model(config_pro, mgr_pro)
    print(f"  Pro: {len(model_pro.layers)} layers, {config_pro.num_query_heads} heads, hd={config_pro.head_dim}")


def test_model_decode_step():
    """Test decode_step with synthetic weights (all zeros)."""
    from dsv4.model.config import DSV4Config
    from dsv4.model.dsv4 import DSV4Model
    from dsv4.cache.manager import KVCacheManager

    config = DSV4Config.flash()
    mgr = KVCacheManager(config, build_schedule(config),
                         max_concurrent_requests=1, max_context_tokens=512)
    model = DSV4Model(config, mgr)

    # Admit a request
    slot = mgr.admit_request()

    # Single decode step
    token_ids = torch.tensor([0], dtype=torch.int64, device='cuda')
    positions = torch.tensor([0], dtype=torch.int64, device='cuda')
    request_ids = torch.tensor([0], dtype=torch.int32, device='cuda')

    # This will fail at Nvfp4Linear forward (no weights loaded)
    # but the test verifies the model structure is correct
    try:
        logits, mhc_states = model.decode_step(token_ids, positions, request_ids)
        print(f"  decode_step: logits shape={logits.shape}")
    except Exception as e:
        # Expected: Nvfp4Linear needs actual NVFP4 weights
        print(f"  decode_step: expected error (no weights): {type(e).__name__}: {e}")


from dsv4.model.layer_schedule import build_schedule


def test():
    print("=" * 60)
    print("E3: DSV4Model Construction Test")
    print("=" * 60)
    test_model_construction()
    test_model_decode_step()
    print("\n" + "=" * 60)
    print("E3 MODEL CONSTRUCTION TEST DONE")
    print("=" * 60)


if __name__ == '__main__':
    test()