[Core] Interface for accessing model from VllmRunner (#10353)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
2025-01-20 15:00:59 +08:00
parent 83609791d2
commit 59a0192fb9
35 changed files with 460 additions and 293 deletions
--- a/vllm/v1/worker/gpu_model_runner.py
+++ b/vllm/v1/worker/gpu_model_runner.py
@@ -689,6 +689,9 @@ class GPUModelRunner:
                encoder_outputs.append(encoder_output[start_idx:end_idx])
        return encoder_outputs

+    def get_model(self) -> nn.Module:
+        return self.model
+
    @torch.inference_mode()
    def execute_model(
        self,
--- a/vllm/v1/worker/gpu_worker.py
+++ b/vllm/v1/worker/gpu_worker.py
@@ -5,6 +5,7 @@ from typing import TYPE_CHECKING, Optional

 import torch
 import torch.distributed
+import torch.nn as nn

 import vllm.envs as envs
 from vllm.config import CacheConfig, ModelConfig, ParallelConfig, VllmConfig
@@ -176,6 +177,9 @@ class Worker:
        # the model initialization and profiling.
        set_random_seed(self.model_config.seed)

+    def get_model(self) -> nn.Module:
+        return self.model_runner.get_model()
+
    @torch.inference_mode()
    def execute_model(
        self,