Bump lm-eval version for Transformers v5 compatibility (#33994)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
Harry Mellor
2026-02-16 14:24:35 +01:00
committed by GitHub
parent 3ef74cde5d
commit a21cedf4ff
14 changed files with 19 additions and 31 deletions

View File

@@ -2,7 +2,7 @@
# We can use this script to compute baseline accuracy on chartqa for vllm.
#
# Make sure you have lm-eval-harness installed:
# pip install "lm-eval[api]>=0.4.9.2"
# pip install "lm-eval[api]>=0.4.11"
usage() {
echo``

View File

@@ -2,7 +2,7 @@
# We can use this script to compute baseline accuracy on GSM for transformers.
#
# Make sure you have lm-eval-harness installed:
# pip install "lm-eval[api]>=0.4.9.2"
# pip install "lm-eval[api]>=0.4.11"
usage() {
echo``

View File

@@ -3,7 +3,7 @@
# We use this for fp8, which HF does not support.
#
# Make sure you have lm-eval-harness installed:
# pip install "lm-eval[api]>=0.4.9.2"
# pip install "lm-eval[api]>=0.4.11"
usage() {
echo``

View File

@@ -3,7 +3,7 @@
# We use this for fp8, which HF does not support.
#
# Make sure you have lm-eval-harness installed:
# pip install "lm-eval[api]>=0.4.9.2"
# pip install "lm-eval[api]>=0.4.11"
usage() {
echo``

View File

@@ -61,7 +61,7 @@ echo "Results will be stored in: $RESULTS_DIR"
echo "--- Installing Python dependencies ---"
python3 -m pip install --progress-bar off git+https://github.com/thuml/depyf.git \
&& python3 -m pip install --progress-bar off pytest pytest-asyncio tpu-info \
&& python3 -m pip install --progress-bar off "lm-eval[api]>=0.4.9.2" \
&& python3 -m pip install --progress-bar off "lm-eval[api]>=0.4.11" \
&& python3 -m pip install --progress-bar off hf-transfer tblib==3.1.0
echo "--- Python dependencies installed ---"

View File

@@ -61,7 +61,7 @@ echo "Results will be stored in: $RESULTS_DIR"
echo "--- Installing Python dependencies ---"
python3 -m pip install --progress-bar off git+https://github.com/thuml/depyf.git \
&& python3 -m pip install --progress-bar off pytest pytest-asyncio tpu-info \
&& python3 -m pip install --progress-bar off "lm-eval[api]>=0.4.9.2" \
&& python3 -m pip install --progress-bar off "lm-eval[api]>=0.4.11" \
&& python3 -m pip install --progress-bar off hf-transfer tblib==3.1.0
echo "--- Python dependencies installed ---"

View File

@@ -84,7 +84,7 @@ Since simple RTN does not require data for weight quantization and the activatio
Install `vllm` and `lm-evaluation-harness` for evaluation:
```bash
pip install vllm "lm-eval[api]>=0.4.9.2"
pip install vllm "lm-eval[api]>=0.4.11"
```
Load and run the model in `vllm`:

View File

@@ -18,7 +18,7 @@ pip install llmcompressor
Additionally, install `vllm` and `lm-evaluation-harness` for evaluation:
```bash
pip install vllm "lm-eval[api]>=0.4.9.2"
pip install vllm "lm-eval[api]>=0.4.11"
```
## Quantization Process

View File

@@ -23,7 +23,7 @@ pip install llmcompressor
Additionally, install `vllm` and `lm-evaluation-harness` for evaluation:
```bash
pip install vllm "lm-eval[api]>=0.4.9.2"
pip install vllm "lm-eval[api]>=0.4.11"
```
## Quantization Process

View File

@@ -20,7 +20,7 @@ for more installation details.
Additionally, install `vllm` and `lm-evaluation-harness` for evaluation:
```bash
pip install vllm "lm-eval[api]>=0.4.9.2"
pip install vllm "lm-eval[api]>=0.4.11"
```
## Quantization Process

View File

@@ -27,7 +27,7 @@ mistral_common[image,audio] >= 1.9.1 # required for voxtral test
num2words # required for smolvlm test
opencv-python-headless >= 4.13.0 # required for video test
datamodel_code_generator # required for minicpm3 test
lm-eval[api]>=0.4.9.2 # required for model evaluation test
lm-eval[api]>=0.4.11 # required for model evaluation test
mteb>=1.38.11, <2 # required for mteb test
transformers==4.57.5
tokenizers==0.22.0

View File

@@ -58,7 +58,7 @@ schemathesis==3.39.15
# OpenAI schema test
# Evaluation and benchmarking
lm-eval[api]==0.4.9.2
lm-eval[api]==0.4.11
jiwer==4.0.0
# Required for multiprocessed tests that use spawn method, Datasets and Evaluate Test

View File

@@ -35,7 +35,7 @@ num2words # required for smolvlm test
open_clip_torch==2.32.0 # Required for nemotron_vl test, Nemotron Parse in test_common.py
opencv-python-headless >= 4.13.0 # required for video test
datamodel_code_generator # required for minicpm3 test
lm-eval[api]>=0.4.9.2 # required for model evaluation test
lm-eval[api]>=0.4.11 # required for model evaluation test
mteb[bm25s]>=2, <3 # required for mteb test
transformers==4.57.5
tokenizers==0.22.0

View File

@@ -5,9 +5,7 @@ absl-py==2.1.0
# rouge-score
# tensorboard
accelerate==1.0.1
# via
# lm-eval
# peft
# via peft
aenum==3.1.16
# via lightly
affine==2.4.0
@@ -138,7 +136,6 @@ colorama==0.4.6
# perceptron
# sacrebleu
# schemathesis
# tqdm-multiprocess
colorful==0.5.6
# via ray
colorlog==6.10.1
@@ -383,6 +380,7 @@ jinja2==3.1.6
# via
# datamodel-code-generator
# genai-perf
# lm-eval
# torch
jiwer==3.0.5
# via -r requirements/test.in
@@ -448,7 +446,7 @@ lightning-utilities==0.14.3
# torchmetrics
llvmlite==0.44.0
# via numba
lm-eval==0.4.9.2
lm-eval==0.4.11
# via -r requirements/test.in
lxml==5.3.0
# via
@@ -513,8 +511,6 @@ numba==0.61.2
# via
# -r requirements/test.in
# librosa
numexpr==2.10.1
# via lm-eval
numpy==2.2.6
# via
# -r requirements/test.in
@@ -540,11 +536,11 @@ numpy==2.2.6
# librosa
# lightly
# lightly-utils
# lm-eval
# matplotlib
# mistral-common
# mteb
# numba
# numexpr
# opencv-python-headless
# optuna
# pandas
@@ -707,9 +703,7 @@ pathvalidate==3.2.1
patsy==1.0.1
# via statsmodels
peft==0.16.0
# via
# -r requirements/test.in
# lm-eval
# via -r requirements/test.in
perceptron==0.1.4
# via -r requirements/test.in
perf-analyzer==0.1.0
@@ -792,8 +786,6 @@ pyasn1==0.6.1
# rsa
pyasn1-modules==0.4.2
# via google-auth
pybind11==2.13.6
# via lm-eval
pycocotools==2.0.8
# via terratorch
pycountry==24.6.1
@@ -1171,7 +1163,6 @@ torch==2.10.0+cu129
# kornia
# lightly
# lightning
# lm-eval
# mteb
# open-clip-torch
# peft
@@ -1229,15 +1220,11 @@ tqdm==4.67.3
# sentence-transformers
# tacoreader
# terratorch
# tqdm-multiprocess
# transformers
tqdm-multiprocess==0.0.11
# via lm-eval
transformers==4.57.5
# via
# -r requirements/test.in
# genai-perf
# lm-eval
# peft
# sentence-transformers
# transformers-stream-generator
@@ -1272,6 +1259,7 @@ typing-extensions==4.15.0
# librosa
# lightning
# lightning-utilities
# lm-eval
# mistral-common
# mteb
# opentelemetry-api