diff --git a/requirements/nightly_torch_test.txt b/requirements/nightly_torch_test.txt index 9a0bc4b20..27299f47f 100644 --- a/requirements/nightly_torch_test.txt +++ b/requirements/nightly_torch_test.txt @@ -28,7 +28,7 @@ num2words # required for smolvlm test opencv-python-headless >= 4.13.0 # required for video test datamodel_code_generator # required for minicpm3 test lm-eval[api]>=0.4.11 # required for model evaluation test -mteb>=1.38.11, <2 # required for mteb test +mteb[bm25s]>=2, <3 # required for mteb test transformers==4.57.5 tokenizers==0.22.0 schemathesis>=3.39.15 # Required for openai schema test. diff --git a/requirements/rocm-test.txt b/requirements/rocm-test.txt index 5cfda430b..dd7f949f8 100644 --- a/requirements/rocm-test.txt +++ b/requirements/rocm-test.txt @@ -70,7 +70,7 @@ ray[cgraph,default]>=2.48.0 torchgeo==0.7.0 # via terratorch # MTEB Benchmark Test -mteb==2.1.2 +mteb[bm25s]>=2, <3 # Utilities num2words==0.5.14 diff --git a/requirements/test.txt b/requirements/test.txt index b97bbe902..8aa2d6768 100644 --- a/requirements/test.txt +++ b/requirements/test.txt @@ -491,7 +491,7 @@ msgpack==1.1.0 # via # librosa # ray -mteb==2.1.2 +mteb==2.8.3 # via -r requirements/test.in multidict==6.1.0 # via diff --git a/tests/models/language/pooling_mteb_test/mteb_score_utils.py b/tests/models/language/pooling_mteb_test/mteb_score_utils.py index ad3288039..621aff0e9 100644 --- a/tests/models/language/pooling_mteb_test/mteb_score_utils.py +++ b/tests/models/language/pooling_mteb_test/mteb_score_utils.py @@ -191,6 +191,9 @@ def run_mteb_rerank(cross_encoder: mteb.CrossEncoderProtocol, tasks, languages): mteb_tasks: list[mteb.abstasks.AbsTaskRetrieval] = mteb.get_tasks( tasks=tasks, languages=languages, eval_splits=eval_splits ) + for task in mteb_tasks: + if not task.data_loaded: + task.load_data() mteb.evaluate( bm25s,