[Chore] Remove debug code in model implementation (#35883)
Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn>
This commit is contained in:
@@ -13,7 +13,6 @@ positions via `inputs_embeds`, while `position_ids` (RoPE) remains standard 1D.
|
|||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import os
|
|
||||||
from collections.abc import Iterable, Mapping, Sequence
|
from collections.abc import Iterable, Mapping, Sequence
|
||||||
from functools import cached_property
|
from functools import cached_property
|
||||||
from typing import Any
|
from typing import Any
|
||||||
@@ -924,53 +923,6 @@ class FunAudioChatForConditionalGeneration(nn.Module, SupportsMultiModal, Suppor
|
|||||||
f"sequence of Tensors (got {type(speech_attention_mask)})"
|
f"sequence of Tensors (got {type(speech_attention_mask)})"
|
||||||
)
|
)
|
||||||
|
|
||||||
debug = os.getenv("VLLM_FUN_AUDIOCHAT_DEBUG", "") == "1"
|
|
||||||
if debug:
|
|
||||||
print(
|
|
||||||
f"[FunAudioChat] embed_multimodal speech_ids={tuple(speech_ids.shape)} "
|
|
||||||
f"speech_attention_mask={tuple(speech_attention_mask.shape)}",
|
|
||||||
flush=True,
|
|
||||||
)
|
|
||||||
attn_impl = getattr(
|
|
||||||
self.continuous_audio_tower.config, "_attn_implementation", None
|
|
||||||
)
|
|
||||||
print(
|
|
||||||
f"[FunAudioChat] audio_attn_impl={attn_impl}",
|
|
||||||
flush=True,
|
|
||||||
)
|
|
||||||
if hasattr(self.continuous_audio_tower, "conv1"):
|
|
||||||
conv1_w = self.continuous_audio_tower.conv1.weight
|
|
||||||
print(
|
|
||||||
f"[FunAudioChat] conv1_w_norm={float(conv1_w.norm().item()):.6g}",
|
|
||||||
flush=True,
|
|
||||||
)
|
|
||||||
try:
|
|
||||||
attn0 = self.continuous_audio_tower.layers[0].self_attn
|
|
||||||
q_norm = float(attn0.q_proj.weight.norm().item())
|
|
||||||
k_norm = float(attn0.k_proj.weight.norm().item())
|
|
||||||
v_norm = float(attn0.v_proj.weight.norm().item())
|
|
||||||
o_norm = float(attn0.out_proj.weight.norm().item())
|
|
||||||
print(
|
|
||||||
f"[FunAudioChat] attn0_q_norm={q_norm:.6g} "
|
|
||||||
f"k_norm={k_norm:.6g} "
|
|
||||||
f"v_norm={v_norm:.6g} "
|
|
||||||
f"o_norm={o_norm:.6g}",
|
|
||||||
flush=True,
|
|
||||||
)
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
if isinstance(input_features, torch.Tensor):
|
|
||||||
print(
|
|
||||||
f"[FunAudioChat] input_features={tuple(input_features.shape)}",
|
|
||||||
flush=True,
|
|
||||||
)
|
|
||||||
if isinstance(feature_attention_mask, torch.Tensor):
|
|
||||||
print(
|
|
||||||
"[FunAudioChat] feature_attention_mask="
|
|
||||||
f"{tuple(feature_attention_mask.shape)}",
|
|
||||||
flush=True,
|
|
||||||
)
|
|
||||||
|
|
||||||
group_size = int(self.audio_tower.group_size)
|
group_size = int(self.audio_tower.group_size)
|
||||||
speech_maxlen = int(speech_ids.shape[-1])
|
speech_maxlen = int(speech_ids.shape[-1])
|
||||||
|
|
||||||
@@ -1019,38 +971,6 @@ class FunAudioChatForConditionalGeneration(nn.Module, SupportsMultiModal, Suppor
|
|||||||
embeds = tuple(
|
embeds = tuple(
|
||||||
audio_features[i, : int(length)] for i, length in enumerate(lengths)
|
audio_features[i, : int(length)] for i, length in enumerate(lengths)
|
||||||
)
|
)
|
||||||
if debug:
|
|
||||||
embed_lens = [int(t.shape[0]) for t in embeds]
|
|
||||||
print(f"[FunAudioChat] embed_multimodal out_lens={embed_lens}", flush=True)
|
|
||||||
if embeds:
|
|
||||||
t0 = embeds[0]
|
|
||||||
print(
|
|
||||||
f"[FunAudioChat] embed0 dtype={t0.dtype} device={t0.device} "
|
|
||||||
f"nan={bool(torch.isnan(t0).any())} "
|
|
||||||
f"norm={float(t0.norm().item()):.6g}",
|
|
||||||
flush=True,
|
|
||||||
)
|
|
||||||
dump_path = os.getenv("VLLM_FUN_AUDIOCHAT_DUMP_PATH", "")
|
|
||||||
if (
|
|
||||||
dump_path
|
|
||||||
and speech_ids.shape[0] == 1
|
|
||||||
and len(embeds) == 1
|
|
||||||
and embed_lens[0] > 10
|
|
||||||
):
|
|
||||||
if not os.path.exists(dump_path):
|
|
||||||
np.save(dump_path, embeds[0].detach().float().cpu().numpy())
|
|
||||||
print(f"[FunAudioChat] dumped embeds to {dump_path}", flush=True)
|
|
||||||
cont_path = dump_path.replace(".npy", "_cont.npy")
|
|
||||||
if continuous_audio_features is not None and not os.path.exists(
|
|
||||||
cont_path
|
|
||||||
):
|
|
||||||
np.save(
|
|
||||||
cont_path,
|
|
||||||
continuous_audio_features.detach().float().cpu().numpy(),
|
|
||||||
)
|
|
||||||
print(
|
|
||||||
f"[FunAudioChat] dumped continuous to {cont_path}", flush=True
|
|
||||||
)
|
|
||||||
return embeds
|
return embeds
|
||||||
|
|
||||||
def forward(
|
def forward(
|
||||||
|
|||||||
@@ -2225,104 +2225,6 @@ class NemotronH_Nano_VL_V2(
|
|||||||
assert len(sound_weights) > 0
|
assert len(sound_weights) > 0
|
||||||
self.sound_encoder.load_weights(sound_weights)
|
self.sound_encoder.load_weights(sound_weights)
|
||||||
|
|
||||||
def print_architecture(self, detailed: bool = True, save_to_file: str = None):
|
|
||||||
"""
|
|
||||||
Print model architecture with parameter names, shapes, and sizes.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
detailed: If True, show detailed parameter breakdown
|
|
||||||
save_to_file: If provided, save output to this file path
|
|
||||||
"""
|
|
||||||
import sys
|
|
||||||
from io import StringIO
|
|
||||||
|
|
||||||
# Capture output if saving to file
|
|
||||||
original_stdout = sys.stdout
|
|
||||||
if save_to_file:
|
|
||||||
sys.stdout = StringIO()
|
|
||||||
|
|
||||||
try:
|
|
||||||
print("=" * 100)
|
|
||||||
print("NemotronH_Nano_VL_V2 Model Architecture")
|
|
||||||
print("=" * 100)
|
|
||||||
|
|
||||||
total_params = 0
|
|
||||||
param_groups = {
|
|
||||||
"language_model": [],
|
|
||||||
"vision_model": [],
|
|
||||||
"mlp1": [],
|
|
||||||
"other": [],
|
|
||||||
}
|
|
||||||
|
|
||||||
for name, param in self.named_parameters():
|
|
||||||
param_size = param.numel()
|
|
||||||
total_params += param_size
|
|
||||||
|
|
||||||
# Group parameters by main component
|
|
||||||
if name.startswith("language_model"):
|
|
||||||
param_groups["language_model"].append(
|
|
||||||
(name, param.shape, param_size, param.dtype)
|
|
||||||
)
|
|
||||||
elif name.startswith("vision_model"):
|
|
||||||
param_groups["vision_model"].append(
|
|
||||||
(name, param.shape, param_size, param.dtype)
|
|
||||||
)
|
|
||||||
elif name.startswith("mlp1"):
|
|
||||||
param_groups["mlp1"].append(
|
|
||||||
(name, param.shape, param_size, param.dtype)
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
param_groups["other"].append(
|
|
||||||
(name, param.shape, param_size, param.dtype)
|
|
||||||
)
|
|
||||||
|
|
||||||
if detailed:
|
|
||||||
print(
|
|
||||||
f"{name:<70} | Shape: {str(param.shape):<25} | "
|
|
||||||
f"Size: {param_size:>12,} | Dtype: {param.dtype}"
|
|
||||||
)
|
|
||||||
|
|
||||||
print("=" * 100)
|
|
||||||
print("Summary by Component:")
|
|
||||||
print("-" * 60)
|
|
||||||
|
|
||||||
for component, params in param_groups.items():
|
|
||||||
if params: # Only show components that have parameters
|
|
||||||
component_total = sum(size for _, _, size, _ in params)
|
|
||||||
percentage = (
|
|
||||||
(component_total / total_params) * 100
|
|
||||||
if total_params > 0
|
|
||||||
else 0
|
|
||||||
)
|
|
||||||
print(
|
|
||||||
f"{component:<20} | Parameters: {len(params):>4} | "
|
|
||||||
f"Total Size: {component_total:>15,} | "
|
|
||||||
f"{percentage:>6.2f}%"
|
|
||||||
)
|
|
||||||
|
|
||||||
print("-" * 60)
|
|
||||||
print(f"{'Total Parameters':<20} | {total_params:>15,}")
|
|
||||||
|
|
||||||
# Estimate memory usage (assuming bfloat16 = 2 bytes per parameter)
|
|
||||||
memory_mb = total_params * 2 / (1024**2)
|
|
||||||
memory_gb = memory_mb / 1024
|
|
||||||
print(f"{'Est. Memory (MB)':<20} | {memory_mb:>15.2f}")
|
|
||||||
print(f"{'Est. Memory (GB)':<20} | {memory_gb:>15.2f}")
|
|
||||||
print("=" * 100)
|
|
||||||
|
|
||||||
# Save to file if requested
|
|
||||||
if save_to_file:
|
|
||||||
output = sys.stdout.getvalue()
|
|
||||||
sys.stdout = original_stdout
|
|
||||||
with open(save_to_file, "w") as f:
|
|
||||||
f.write(output)
|
|
||||||
print(f"Architecture saved to: {save_to_file}")
|
|
||||||
print(output) # Also print to console
|
|
||||||
|
|
||||||
finally:
|
|
||||||
if save_to_file and sys.stdout != original_stdout:
|
|
||||||
sys.stdout = original_stdout
|
|
||||||
|
|
||||||
def get_vit_model_from_radio_config(self, hf_config):
|
def get_vit_model_from_radio_config(self, hf_config):
|
||||||
hf_config_vision = hf_config.vision_config
|
hf_config_vision = hf_config.vision_config
|
||||||
model_name = hf_config_vision.args.get("model")
|
model_name = hf_config_vision.args.get("model")
|
||||||
|
|||||||
Reference in New Issue
Block a user