Files
nvfp4-megamoe-kernel/tests/e2e_archive/test_model_construction.py
biondizzle f3b551956d Cleanup Step 2: Archive Lineage P code, fix broken imports
- Move dead dsv4/ modules to dsv4/_archive/ (52 files)
  - model/{dsv4,mtp,layer,layer_schedule}
  - layers/{embedding,attention,ffn,norm} (kept linear,mhc,router,moe,shared_expert,grouped_linear - live)
  - cache/*, kernels/cache/*, kernels/indexer/{csa_indexer,score_topk,compute_valid_lens}
  - kernels/router/{nvfp4_fused_router,dense_router_decode_kernel,dense_router_prefill}
  - ops/{topk,topk_select,rope,router}, loader/{hf_checkpoint,layout_convert}
  - reference/{attention,compressor,csa_attention,moe_pipeline}
  - kernels/compressor/{compress_tail,csa_hca}
- Restore dsv4/ops/{router,custom_ops}.py (needed by live layers)
- Fix dsv4/kernels/{indexer,compressor,attention}/__init__.py (removed broken imports)
- Remove preload_all() from loader.py (dead, referenced nonexistent .cu file)
- Fix loader.py docstring (fused_amax_quantize_nvfp4 → quantize_nvfp4_from_buffer)
- Move broken tests to tests/e2e_archive/
  - test_fused_router, production_values_test, e2e/{one_layer,model_construction,csa_hca}
- vLLM has 0 imports of dsv4 (Step 0 confirmed)
2026-06-02 19:27:07 +00:00

70 lines
2.4 KiB
Python

"""Verify DSV4Model can be constructed (no forward pass, just init)."""
import torch
def test_model_construction():
from dsv4.model.config import DSV4Config
from dsv4.model.dsv4 import DSV4Model
from dsv4.cache.manager import KVCacheManager
# Flash variant
config = DSV4Config.flash()
mgr = KVCacheManager(config, build_schedule(config),
max_concurrent_requests=1, max_context_tokens=512)
model = DSV4Model(config, mgr)
print(f" Flash: {len(model.layers)} layers, {config.num_query_heads} heads, hd={config.head_dim}")
# Pro variant
config_pro = DSV4Config.pro()
mgr_pro = KVCacheManager(config_pro, build_schedule(config_pro),
max_concurrent_requests=1, max_context_tokens=512)
model_pro = DSV4Model(config_pro, mgr_pro)
print(f" Pro: {len(model_pro.layers)} layers, {config_pro.num_query_heads} heads, hd={config_pro.head_dim}")
def test_model_decode_step():
"""Test decode_step with synthetic weights (all zeros)."""
from dsv4.model.config import DSV4Config
from dsv4.model.dsv4 import DSV4Model
from dsv4.cache.manager import KVCacheManager
config = DSV4Config.flash()
mgr = KVCacheManager(config, build_schedule(config),
max_concurrent_requests=1, max_context_tokens=512)
model = DSV4Model(config, mgr)
# Admit a request
slot = mgr.admit_request()
# Single decode step
token_ids = torch.tensor([0], dtype=torch.int64, device='cuda')
positions = torch.tensor([0], dtype=torch.int64, device='cuda')
request_ids = torch.tensor([0], dtype=torch.int32, device='cuda')
# This will fail at Nvfp4Linear forward (no weights loaded)
# but the test verifies the model structure is correct
try:
logits, mhc_states = model.decode_step(token_ids, positions, request_ids)
print(f" decode_step: logits shape={logits.shape}")
except Exception as e:
# Expected: Nvfp4Linear needs actual NVFP4 weights
print(f" decode_step: expected error (no weights): {type(e).__name__}: {e}")
from dsv4.model.layer_schedule import build_schedule
def test():
print("=" * 60)
print("E3: DSV4Model Construction Test")
print("=" * 60)
test_model_construction()
test_model_decode_step()
print("\n" + "=" * 60)
print("E3 MODEL CONSTRUCTION TEST DONE")
print("=" * 60)
if __name__ == '__main__':
test()