Server running on B200 port 8000 with full NVFP4→vLLM bridge. All critical bugs fixed: DeepGEMM scale format, compressor shapes, block scale values.
45 lines
1.2 KiB
Python
45 lines
1.2 KiB
Python
#!/usr/bin/python3
|
|
"""Clean up the broken logger replacement"""
|
|
|
|
filepath = "/root/nvidia-meeting/deepseek-v4-quant/patches/deepseek_v4.py"
|
|
|
|
with open(filepath, 'r') as f:
|
|
lines = f.readlines()
|
|
|
|
new_lines = []
|
|
skip = False
|
|
for i, line in enumerate(lines):
|
|
if skip:
|
|
if ')' in line:
|
|
skip = False
|
|
continue
|
|
|
|
# Fix the broken logger replacement
|
|
if '# logger.info_once(' in line or 'logger.info_once(' in line:
|
|
if '"Converted %d NVFP4' in lines[i+1] if i+1 < len(lines) else False:
|
|
# Replace the whole block
|
|
new_lines.append(' print(f"Converted {converted} NVFP4 layers to FP8")\n')
|
|
skip = True
|
|
continue
|
|
else:
|
|
new_lines.append(line)
|
|
continue
|
|
|
|
# Also remove orphaned lines from the old block
|
|
if '"Converted %d NVFP4 attention/shared-expert layers to FP8",' in line:
|
|
continue
|
|
if line.strip() == 'converted,':
|
|
continue
|
|
|
|
new_lines.append(line)
|
|
|
|
with open(filepath, 'w') as f:
|
|
f.writelines(new_lines)
|
|
|
|
import ast
|
|
try:
|
|
ast.parse(''.join(new_lines))
|
|
print("Syntax OK")
|
|
except SyntaxError as e:
|
|
print(f"Syntax error at line {e.lineno}: {e.msg}")
|