Server running on B200 port 8000 with full NVFP4→vLLM bridge. All critical bugs fixed: DeepGEMM scale format, compressor shapes, block scale values.
38 lines
1.0 KiB
Python
38 lines
1.0 KiB
Python
#!/usr/bin/python3
|
|
"""Fix the logger.info_once call and any syntax issues"""
|
|
|
|
filepath = "/root/nvidia-meeting/deepseek-v4-quant/patches/deepseek_v4.py"
|
|
|
|
with open(filepath, 'r') as f:
|
|
lines = f.readlines()
|
|
|
|
# Find and fix the logger/print issue
|
|
new_lines = []
|
|
skip_until_paren = False
|
|
for i, line in enumerate(lines):
|
|
stripped = line.strip()
|
|
|
|
# Replace the print( call that was replacing logger.info_once
|
|
if 'print(' in line and 'Converted %d' in line:
|
|
new_lines.append(' if converted > 0:\n')
|
|
new_lines.append(' print(f"Converted {converted} NVFP4 attention/shared-expert layers to FP8")\n')
|
|
skip_until_paren = True
|
|
continue
|
|
|
|
if skip_until_paren:
|
|
if ')' in line:
|
|
skip_until_paren = False
|
|
continue
|
|
|
|
new_lines.append(line)
|
|
|
|
with open(filepath, 'w') as f:
|
|
f.writelines(new_lines)
|
|
|
|
import ast
|
|
try:
|
|
ast.parse(''.join(new_lines))
|
|
print("Syntax OK")
|
|
except SyntaxError as e:
|
|
print(f"Syntax error: {e}")
|