From d56e95223917ba21af2dd87a335dcb5c0b347bfe Mon Sep 17 00:00:00 2001
From: Netanel Haber <58652339+netanel-haber@users.noreply.github.com>
Date: Mon, 6 Apr 2026 01:23:45 +0300
Subject: [PATCH] nano_nemotron_vl: fix tensor device mismatch exception when
 video profiling (#39029)

Signed-off-by: Netanel Haber <58652339+netanel-haber@users.noreply.github.com>
---
 vllm/model_executor/models/nano_nemotron_vl.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/vllm/model_executor/models/nano_nemotron_vl.py b/vllm/model_executor/models/nano_nemotron_vl.py
index 819bb4a3c..249b28969 100644
--- a/vllm/model_executor/models/nano_nemotron_vl.py
+++ b/vllm/model_executor/models/nano_nemotron_vl.py
@@ -1239,12 +1239,13 @@ class NemotronH_Nano_VL_V2(
             img_context_token_ids=self._img_context_token_ids,
             video_temporal_patch_size=video_temporal_patch_size,
         )
+        device = video_embeddings.device
 
         # video_repl.full is a list of token IDs
-        repl_token_ids = torch.tensor(video_repl.full)
+        repl_token_ids = torch.tensor(video_repl.full, device=device)
 
         # Get embedding token IDs for image context (use pre-tokenized version)
-        embed_token_ids = torch.tensor(self._img_context_token_ids)
+        embed_token_ids = torch.tensor(self._img_context_token_ids, device=device)
 
         # Create mask for video embedding positions
         is_video_embed = torch.isin(repl_token_ids, embed_token_ids)