diff --git a/vllm/model_executor/models/funaudiochat.py b/vllm/model_executor/models/funaudiochat.py index 5bcb49e07..2265d0424 100644 --- a/vllm/model_executor/models/funaudiochat.py +++ b/vllm/model_executor/models/funaudiochat.py @@ -13,7 +13,6 @@ positions via `inputs_embeds`, while `position_ids` (RoPE) remains standard 1D. from __future__ import annotations -import os from collections.abc import Iterable, Mapping, Sequence from functools import cached_property from typing import Any @@ -924,53 +923,6 @@ class FunAudioChatForConditionalGeneration(nn.Module, SupportsMultiModal, Suppor f"sequence of Tensors (got {type(speech_attention_mask)})" ) - debug = os.getenv("VLLM_FUN_AUDIOCHAT_DEBUG", "") == "1" - if debug: - print( - f"[FunAudioChat] embed_multimodal speech_ids={tuple(speech_ids.shape)} " - f"speech_attention_mask={tuple(speech_attention_mask.shape)}", - flush=True, - ) - attn_impl = getattr( - self.continuous_audio_tower.config, "_attn_implementation", None - ) - print( - f"[FunAudioChat] audio_attn_impl={attn_impl}", - flush=True, - ) - if hasattr(self.continuous_audio_tower, "conv1"): - conv1_w = self.continuous_audio_tower.conv1.weight - print( - f"[FunAudioChat] conv1_w_norm={float(conv1_w.norm().item()):.6g}", - flush=True, - ) - try: - attn0 = self.continuous_audio_tower.layers[0].self_attn - q_norm = float(attn0.q_proj.weight.norm().item()) - k_norm = float(attn0.k_proj.weight.norm().item()) - v_norm = float(attn0.v_proj.weight.norm().item()) - o_norm = float(attn0.out_proj.weight.norm().item()) - print( - f"[FunAudioChat] attn0_q_norm={q_norm:.6g} " - f"k_norm={k_norm:.6g} " - f"v_norm={v_norm:.6g} " - f"o_norm={o_norm:.6g}", - flush=True, - ) - except Exception: - pass - if isinstance(input_features, torch.Tensor): - print( - f"[FunAudioChat] input_features={tuple(input_features.shape)}", - flush=True, - ) - if isinstance(feature_attention_mask, torch.Tensor): - print( - "[FunAudioChat] feature_attention_mask=" - f"{tuple(feature_attention_mask.shape)}", - flush=True, - ) - group_size = int(self.audio_tower.group_size) speech_maxlen = int(speech_ids.shape[-1]) @@ -1019,38 +971,6 @@ class FunAudioChatForConditionalGeneration(nn.Module, SupportsMultiModal, Suppor embeds = tuple( audio_features[i, : int(length)] for i, length in enumerate(lengths) ) - if debug: - embed_lens = [int(t.shape[0]) for t in embeds] - print(f"[FunAudioChat] embed_multimodal out_lens={embed_lens}", flush=True) - if embeds: - t0 = embeds[0] - print( - f"[FunAudioChat] embed0 dtype={t0.dtype} device={t0.device} " - f"nan={bool(torch.isnan(t0).any())} " - f"norm={float(t0.norm().item()):.6g}", - flush=True, - ) - dump_path = os.getenv("VLLM_FUN_AUDIOCHAT_DUMP_PATH", "") - if ( - dump_path - and speech_ids.shape[0] == 1 - and len(embeds) == 1 - and embed_lens[0] > 10 - ): - if not os.path.exists(dump_path): - np.save(dump_path, embeds[0].detach().float().cpu().numpy()) - print(f"[FunAudioChat] dumped embeds to {dump_path}", flush=True) - cont_path = dump_path.replace(".npy", "_cont.npy") - if continuous_audio_features is not None and not os.path.exists( - cont_path - ): - np.save( - cont_path, - continuous_audio_features.detach().float().cpu().numpy(), - ) - print( - f"[FunAudioChat] dumped continuous to {cont_path}", flush=True - ) return embeds def forward( diff --git a/vllm/model_executor/models/nano_nemotron_vl.py b/vllm/model_executor/models/nano_nemotron_vl.py index 51b36b1ca..82422e89f 100644 --- a/vllm/model_executor/models/nano_nemotron_vl.py +++ b/vllm/model_executor/models/nano_nemotron_vl.py @@ -2225,104 +2225,6 @@ class NemotronH_Nano_VL_V2( assert len(sound_weights) > 0 self.sound_encoder.load_weights(sound_weights) - def print_architecture(self, detailed: bool = True, save_to_file: str = None): - """ - Print model architecture with parameter names, shapes, and sizes. - - Args: - detailed: If True, show detailed parameter breakdown - save_to_file: If provided, save output to this file path - """ - import sys - from io import StringIO - - # Capture output if saving to file - original_stdout = sys.stdout - if save_to_file: - sys.stdout = StringIO() - - try: - print("=" * 100) - print("NemotronH_Nano_VL_V2 Model Architecture") - print("=" * 100) - - total_params = 0 - param_groups = { - "language_model": [], - "vision_model": [], - "mlp1": [], - "other": [], - } - - for name, param in self.named_parameters(): - param_size = param.numel() - total_params += param_size - - # Group parameters by main component - if name.startswith("language_model"): - param_groups["language_model"].append( - (name, param.shape, param_size, param.dtype) - ) - elif name.startswith("vision_model"): - param_groups["vision_model"].append( - (name, param.shape, param_size, param.dtype) - ) - elif name.startswith("mlp1"): - param_groups["mlp1"].append( - (name, param.shape, param_size, param.dtype) - ) - else: - param_groups["other"].append( - (name, param.shape, param_size, param.dtype) - ) - - if detailed: - print( - f"{name:<70} | Shape: {str(param.shape):<25} | " - f"Size: {param_size:>12,} | Dtype: {param.dtype}" - ) - - print("=" * 100) - print("Summary by Component:") - print("-" * 60) - - for component, params in param_groups.items(): - if params: # Only show components that have parameters - component_total = sum(size for _, _, size, _ in params) - percentage = ( - (component_total / total_params) * 100 - if total_params > 0 - else 0 - ) - print( - f"{component:<20} | Parameters: {len(params):>4} | " - f"Total Size: {component_total:>15,} | " - f"{percentage:>6.2f}%" - ) - - print("-" * 60) - print(f"{'Total Parameters':<20} | {total_params:>15,}") - - # Estimate memory usage (assuming bfloat16 = 2 bytes per parameter) - memory_mb = total_params * 2 / (1024**2) - memory_gb = memory_mb / 1024 - print(f"{'Est. Memory (MB)':<20} | {memory_mb:>15.2f}") - print(f"{'Est. Memory (GB)':<20} | {memory_gb:>15.2f}") - print("=" * 100) - - # Save to file if requested - if save_to_file: - output = sys.stdout.getvalue() - sys.stdout = original_stdout - with open(save_to_file, "w") as f: - f.write(output) - print(f"Architecture saved to: {save_to_file}") - print(output) # Also print to console - - finally: - if save_to_file and sys.stdout != original_stdout: - sys.stdout = original_stdout - def get_vit_model_from_radio_config(self, hf_config): hf_config_vision = hf_config.vision_config model_name = hf_config_vision.args.get("model")