diff --git a/Dockerfile b/Dockerfile index 6a5b426f..7b838e44 100644 --- a/Dockerfile +++ b/Dockerfile @@ -65,6 +65,11 @@ RUN python3 /tmp/patch_swa_cache.py ${VLLM_SPARSE_SWA_DIR}/sparse_swa.py && rm / COPY vllm/patches/patch_indexer_cache.py /tmp/patch_indexer_cache.py RUN python3 /tmp/patch_indexer_cache.py ${VLLM_LAYERS_DIR2}/deepseek_v4_attention.py && rm /tmp/patch_indexer_cache.py +# Debug: print layer name mismatch +ARG VLLM_WORKER_DIR=/usr/local/lib/python3.12/dist-packages/vllm/v1/worker +COPY vllm/patches/patch_debug_layers.py /tmp/patch_debug_layers.py +RUN python3 /tmp/patch_debug_layers.py ${VLLM_WORKER_DIR}/gpu_model_runner.py && rm /tmp/patch_debug_layers.py + # Register CuTeDSL kernel in vLLM's linear kernel selection ARG VLLM_LINEAR_DIR=/usr/local/lib/python3.12/dist-packages/vllm/model_executor/kernels/linear COPY vllm/patches/register_cutedsl_kernel.py /tmp/register_cutedsl_kernel.py diff --git a/vllm/patches/patch_debug_layers.py b/vllm/patches/patch_debug_layers.py new file mode 100644 index 00000000..4af91637 --- /dev/null +++ b/vllm/patches/patch_debug_layers.py @@ -0,0 +1,39 @@ +#!/usr/bin/env python3 +"""Patch _allocate_kv_cache_tensors to print the layer name mismatch.""" +import sys + +def patch(path): + with open(path, 'r') as f: + content = f.read() + + if "CLAWMINE_DEBUG_LAYERS" in content: + print("Already patched, skipping") + return + + old = """ assert layer_names == set(kv_cache_raw_tensors.keys()), ( + "Some layers are not correctly initialized" + )""" + + new = """ # CLAWMINE_DEBUG_LAYERS: print mismatch instead of asserting + missing = layer_names - set(kv_cache_raw_tensors.keys()) + extra = set(kv_cache_raw_tensors.keys()) - layer_names + if missing or extra: + print(f"CLAWMINE DEBUG: missing layers ({len(missing)}): {sorted(missing)[:20]}") + print(f"CLAWMINE DEBUG: extra layers ({len(extra)}): {sorted(extra)[:20]}") + print(f"CLAWMINE DEBUG: expected ({len(layer_names)}), got ({len(kv_cache_raw_tensors.keys())})") + assert layer_names == set(kv_cache_raw_tensors.keys()), ( + "Some layers are not correctly initialized" + )""" + + if old not in content: + print("ERROR: Could not find the code to patch") + sys.exit(1) + + content = content.replace(old, new) + + with open(path, 'w') as f: + f.write(content) + print("Patched gpu_model_runner.py for debug layer names") + +if __name__ == "__main__": + patch(sys.argv[1])