[vLLM IR] 1/N Implement IR skeleton and rms_norm op (#33825)
Signed-off-by: Luka Govedič <lgovedic@redhat.com> Signed-off-by: Xinyu Chen <xinyu1.chen@intel.com> Signed-off-by: chzhang <chaojun.zhang@intel.com> Signed-off-by: Luka Govedic <luka.govedic@gmail.com> Co-authored-by: Xinyu Chen <xinyu1.chen@intel.com> Co-authored-by: Chaojun Zhang <chaojun.zhang@intel.com> Co-authored-by: Luka Govedič <ProExpertProg@h100-01.nemg-001.lab.rdu2.dc.redhat.com>
This commit is contained in:
@@ -1622,3 +1622,26 @@ def fresh_vllm_cache(monkeypatch, use_fresh_inductor_cache):
|
||||
def enable_pickle(monkeypatch):
|
||||
"""`LLM.apply_model` requires pickling a function."""
|
||||
monkeypatch.setenv("VLLM_ALLOW_INSECURE_SERIALIZATION", "1")
|
||||
|
||||
|
||||
@pytest.fixture(scope="function")
|
||||
def disable_log_dedup(monkeypatch):
|
||||
"""
|
||||
Disable log deduplication such that warning_once and info_once always print.
|
||||
"""
|
||||
|
||||
# Patch logger._print_warning_once to remove the lru_cache decorator
|
||||
from vllm import logger
|
||||
|
||||
original_print_warning_once = logger._print_warning_once
|
||||
original_print_info_once = logger._print_info_once
|
||||
original_print_debug_once = logger._print_debug_once
|
||||
|
||||
logger._print_warning_once = original_print_warning_once.__wrapped__
|
||||
logger._print_info_once = original_print_info_once.__wrapped__
|
||||
logger._print_debug_once = original_print_debug_once.__wrapped__
|
||||
|
||||
yield
|
||||
logger._print_warning_once = original_print_warning_once
|
||||
logger._print_info_once = original_print_info_once
|
||||
logger._print_debug_once = original_print_debug_once
|
||||
|
||||
Reference in New Issue
Block a user