Add debug patch to print layer name mismatch
This commit is contained in:
@@ -65,6 +65,11 @@ RUN python3 /tmp/patch_swa_cache.py ${VLLM_SPARSE_SWA_DIR}/sparse_swa.py && rm /
|
||||
COPY vllm/patches/patch_indexer_cache.py /tmp/patch_indexer_cache.py
|
||||
RUN python3 /tmp/patch_indexer_cache.py ${VLLM_LAYERS_DIR2}/deepseek_v4_attention.py && rm /tmp/patch_indexer_cache.py
|
||||
|
||||
# Debug: print layer name mismatch
|
||||
ARG VLLM_WORKER_DIR=/usr/local/lib/python3.12/dist-packages/vllm/v1/worker
|
||||
COPY vllm/patches/patch_debug_layers.py /tmp/patch_debug_layers.py
|
||||
RUN python3 /tmp/patch_debug_layers.py ${VLLM_WORKER_DIR}/gpu_model_runner.py && rm /tmp/patch_debug_layers.py
|
||||
|
||||
# Register CuTeDSL kernel in vLLM's linear kernel selection
|
||||
ARG VLLM_LINEAR_DIR=/usr/local/lib/python3.12/dist-packages/vllm/model_executor/kernels/linear
|
||||
COPY vllm/patches/register_cutedsl_kernel.py /tmp/register_cutedsl_kernel.py
|
||||
|
||||
39
vllm/patches/patch_debug_layers.py
Normal file
39
vllm/patches/patch_debug_layers.py
Normal file
@@ -0,0 +1,39 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Patch _allocate_kv_cache_tensors to print the layer name mismatch."""
|
||||
import sys
|
||||
|
||||
def patch(path):
|
||||
with open(path, 'r') as f:
|
||||
content = f.read()
|
||||
|
||||
if "CLAWMINE_DEBUG_LAYERS" in content:
|
||||
print("Already patched, skipping")
|
||||
return
|
||||
|
||||
old = """ assert layer_names == set(kv_cache_raw_tensors.keys()), (
|
||||
"Some layers are not correctly initialized"
|
||||
)"""
|
||||
|
||||
new = """ # CLAWMINE_DEBUG_LAYERS: print mismatch instead of asserting
|
||||
missing = layer_names - set(kv_cache_raw_tensors.keys())
|
||||
extra = set(kv_cache_raw_tensors.keys()) - layer_names
|
||||
if missing or extra:
|
||||
print(f"CLAWMINE DEBUG: missing layers ({len(missing)}): {sorted(missing)[:20]}")
|
||||
print(f"CLAWMINE DEBUG: extra layers ({len(extra)}): {sorted(extra)[:20]}")
|
||||
print(f"CLAWMINE DEBUG: expected ({len(layer_names)}), got ({len(kv_cache_raw_tensors.keys())})")
|
||||
assert layer_names == set(kv_cache_raw_tensors.keys()), (
|
||||
"Some layers are not correctly initialized"
|
||||
)"""
|
||||
|
||||
if old not in content:
|
||||
print("ERROR: Could not find the code to patch")
|
||||
sys.exit(1)
|
||||
|
||||
content = content.replace(old, new)
|
||||
|
||||
with open(path, 'w') as f:
|
||||
f.write(content)
|
||||
print("Patched gpu_model_runner.py for debug layer names")
|
||||
|
||||
if __name__ == "__main__":
|
||||
patch(sys.argv[1])
|
||||
Reference in New Issue
Block a user