[vLLM IR] 1/N Implement IR skeleton and rms_norm op (#33825)

Signed-off-by: Luka Govedič <lgovedic@redhat.com>
Signed-off-by: Xinyu Chen <xinyu1.chen@intel.com>
Signed-off-by: chzhang <chaojun.zhang@intel.com>
Signed-off-by: Luka Govedic <luka.govedic@gmail.com>
Co-authored-by: Xinyu Chen <xinyu1.chen@intel.com>
Co-authored-by: Chaojun Zhang <chaojun.zhang@intel.com>
Co-authored-by: Luka Govedič <ProExpertProg@h100-01.nemg-001.lab.rdu2.dc.redhat.com>
This commit is contained in:
Luka Govedič
2026-03-31 22:15:05 -04:00
committed by GitHub
parent 0fab52f0aa
commit 40bb175027
49 changed files with 2177 additions and 265 deletions

View File

@@ -1622,3 +1622,26 @@ def fresh_vllm_cache(monkeypatch, use_fresh_inductor_cache):
def enable_pickle(monkeypatch):
"""`LLM.apply_model` requires pickling a function."""
monkeypatch.setenv("VLLM_ALLOW_INSECURE_SERIALIZATION", "1")
@pytest.fixture(scope="function")
def disable_log_dedup(monkeypatch):
"""
Disable log deduplication such that warning_once and info_once always print.
"""
# Patch logger._print_warning_once to remove the lru_cache decorator
from vllm import logger
original_print_warning_once = logger._print_warning_once
original_print_info_once = logger._print_info_once
original_print_debug_once = logger._print_debug_once
logger._print_warning_once = original_print_warning_once.__wrapped__
logger._print_info_once = original_print_info_once.__wrapped__
logger._print_debug_once = original_print_debug_once.__wrapped__
yield
logger._print_warning_once = original_print_warning_once
logger._print_info_once = original_print_info_once
logger._print_debug_once = original_print_debug_once