Bump lm-eval version for Transformers v5 compatibility (#33994)
Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
@@ -2,7 +2,7 @@
|
||||
# We can use this script to compute baseline accuracy on chartqa for vllm.
|
||||
#
|
||||
# Make sure you have lm-eval-harness installed:
|
||||
# pip install "lm-eval[api]>=0.4.9.2"
|
||||
# pip install "lm-eval[api]>=0.4.11"
|
||||
|
||||
usage() {
|
||||
echo``
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
# We can use this script to compute baseline accuracy on GSM for transformers.
|
||||
#
|
||||
# Make sure you have lm-eval-harness installed:
|
||||
# pip install "lm-eval[api]>=0.4.9.2"
|
||||
# pip install "lm-eval[api]>=0.4.11"
|
||||
|
||||
usage() {
|
||||
echo``
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
# We use this for fp8, which HF does not support.
|
||||
#
|
||||
# Make sure you have lm-eval-harness installed:
|
||||
# pip install "lm-eval[api]>=0.4.9.2"
|
||||
# pip install "lm-eval[api]>=0.4.11"
|
||||
|
||||
usage() {
|
||||
echo``
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
# We use this for fp8, which HF does not support.
|
||||
#
|
||||
# Make sure you have lm-eval-harness installed:
|
||||
# pip install "lm-eval[api]>=0.4.9.2"
|
||||
# pip install "lm-eval[api]>=0.4.11"
|
||||
|
||||
usage() {
|
||||
echo``
|
||||
|
||||
@@ -61,7 +61,7 @@ echo "Results will be stored in: $RESULTS_DIR"
|
||||
echo "--- Installing Python dependencies ---"
|
||||
python3 -m pip install --progress-bar off git+https://github.com/thuml/depyf.git \
|
||||
&& python3 -m pip install --progress-bar off pytest pytest-asyncio tpu-info \
|
||||
&& python3 -m pip install --progress-bar off "lm-eval[api]>=0.4.9.2" \
|
||||
&& python3 -m pip install --progress-bar off "lm-eval[api]>=0.4.11" \
|
||||
&& python3 -m pip install --progress-bar off hf-transfer tblib==3.1.0
|
||||
echo "--- Python dependencies installed ---"
|
||||
|
||||
|
||||
@@ -61,7 +61,7 @@ echo "Results will be stored in: $RESULTS_DIR"
|
||||
echo "--- Installing Python dependencies ---"
|
||||
python3 -m pip install --progress-bar off git+https://github.com/thuml/depyf.git \
|
||||
&& python3 -m pip install --progress-bar off pytest pytest-asyncio tpu-info \
|
||||
&& python3 -m pip install --progress-bar off "lm-eval[api]>=0.4.9.2" \
|
||||
&& python3 -m pip install --progress-bar off "lm-eval[api]>=0.4.11" \
|
||||
&& python3 -m pip install --progress-bar off hf-transfer tblib==3.1.0
|
||||
echo "--- Python dependencies installed ---"
|
||||
|
||||
|
||||
@@ -84,7 +84,7 @@ Since simple RTN does not require data for weight quantization and the activatio
|
||||
Install `vllm` and `lm-evaluation-harness` for evaluation:
|
||||
|
||||
```bash
|
||||
pip install vllm "lm-eval[api]>=0.4.9.2"
|
||||
pip install vllm "lm-eval[api]>=0.4.11"
|
||||
```
|
||||
|
||||
Load and run the model in `vllm`:
|
||||
|
||||
@@ -18,7 +18,7 @@ pip install llmcompressor
|
||||
Additionally, install `vllm` and `lm-evaluation-harness` for evaluation:
|
||||
|
||||
```bash
|
||||
pip install vllm "lm-eval[api]>=0.4.9.2"
|
||||
pip install vllm "lm-eval[api]>=0.4.11"
|
||||
```
|
||||
|
||||
## Quantization Process
|
||||
|
||||
@@ -23,7 +23,7 @@ pip install llmcompressor
|
||||
Additionally, install `vllm` and `lm-evaluation-harness` for evaluation:
|
||||
|
||||
```bash
|
||||
pip install vllm "lm-eval[api]>=0.4.9.2"
|
||||
pip install vllm "lm-eval[api]>=0.4.11"
|
||||
```
|
||||
|
||||
## Quantization Process
|
||||
|
||||
@@ -20,7 +20,7 @@ for more installation details.
|
||||
Additionally, install `vllm` and `lm-evaluation-harness` for evaluation:
|
||||
|
||||
```bash
|
||||
pip install vllm "lm-eval[api]>=0.4.9.2"
|
||||
pip install vllm "lm-eval[api]>=0.4.11"
|
||||
```
|
||||
|
||||
## Quantization Process
|
||||
|
||||
@@ -27,7 +27,7 @@ mistral_common[image,audio] >= 1.9.1 # required for voxtral test
|
||||
num2words # required for smolvlm test
|
||||
opencv-python-headless >= 4.13.0 # required for video test
|
||||
datamodel_code_generator # required for minicpm3 test
|
||||
lm-eval[api]>=0.4.9.2 # required for model evaluation test
|
||||
lm-eval[api]>=0.4.11 # required for model evaluation test
|
||||
mteb>=1.38.11, <2 # required for mteb test
|
||||
transformers==4.57.5
|
||||
tokenizers==0.22.0
|
||||
|
||||
@@ -58,7 +58,7 @@ schemathesis==3.39.15
|
||||
# OpenAI schema test
|
||||
|
||||
# Evaluation and benchmarking
|
||||
lm-eval[api]==0.4.9.2
|
||||
lm-eval[api]==0.4.11
|
||||
jiwer==4.0.0
|
||||
|
||||
# Required for multiprocessed tests that use spawn method, Datasets and Evaluate Test
|
||||
|
||||
@@ -35,7 +35,7 @@ num2words # required for smolvlm test
|
||||
open_clip_torch==2.32.0 # Required for nemotron_vl test, Nemotron Parse in test_common.py
|
||||
opencv-python-headless >= 4.13.0 # required for video test
|
||||
datamodel_code_generator # required for minicpm3 test
|
||||
lm-eval[api]>=0.4.9.2 # required for model evaluation test
|
||||
lm-eval[api]>=0.4.11 # required for model evaluation test
|
||||
mteb[bm25s]>=2, <3 # required for mteb test
|
||||
transformers==4.57.5
|
||||
tokenizers==0.22.0
|
||||
|
||||
@@ -5,9 +5,7 @@ absl-py==2.1.0
|
||||
# rouge-score
|
||||
# tensorboard
|
||||
accelerate==1.0.1
|
||||
# via
|
||||
# lm-eval
|
||||
# peft
|
||||
# via peft
|
||||
aenum==3.1.16
|
||||
# via lightly
|
||||
affine==2.4.0
|
||||
@@ -138,7 +136,6 @@ colorama==0.4.6
|
||||
# perceptron
|
||||
# sacrebleu
|
||||
# schemathesis
|
||||
# tqdm-multiprocess
|
||||
colorful==0.5.6
|
||||
# via ray
|
||||
colorlog==6.10.1
|
||||
@@ -383,6 +380,7 @@ jinja2==3.1.6
|
||||
# via
|
||||
# datamodel-code-generator
|
||||
# genai-perf
|
||||
# lm-eval
|
||||
# torch
|
||||
jiwer==3.0.5
|
||||
# via -r requirements/test.in
|
||||
@@ -448,7 +446,7 @@ lightning-utilities==0.14.3
|
||||
# torchmetrics
|
||||
llvmlite==0.44.0
|
||||
# via numba
|
||||
lm-eval==0.4.9.2
|
||||
lm-eval==0.4.11
|
||||
# via -r requirements/test.in
|
||||
lxml==5.3.0
|
||||
# via
|
||||
@@ -513,8 +511,6 @@ numba==0.61.2
|
||||
# via
|
||||
# -r requirements/test.in
|
||||
# librosa
|
||||
numexpr==2.10.1
|
||||
# via lm-eval
|
||||
numpy==2.2.6
|
||||
# via
|
||||
# -r requirements/test.in
|
||||
@@ -540,11 +536,11 @@ numpy==2.2.6
|
||||
# librosa
|
||||
# lightly
|
||||
# lightly-utils
|
||||
# lm-eval
|
||||
# matplotlib
|
||||
# mistral-common
|
||||
# mteb
|
||||
# numba
|
||||
# numexpr
|
||||
# opencv-python-headless
|
||||
# optuna
|
||||
# pandas
|
||||
@@ -707,9 +703,7 @@ pathvalidate==3.2.1
|
||||
patsy==1.0.1
|
||||
# via statsmodels
|
||||
peft==0.16.0
|
||||
# via
|
||||
# -r requirements/test.in
|
||||
# lm-eval
|
||||
# via -r requirements/test.in
|
||||
perceptron==0.1.4
|
||||
# via -r requirements/test.in
|
||||
perf-analyzer==0.1.0
|
||||
@@ -792,8 +786,6 @@ pyasn1==0.6.1
|
||||
# rsa
|
||||
pyasn1-modules==0.4.2
|
||||
# via google-auth
|
||||
pybind11==2.13.6
|
||||
# via lm-eval
|
||||
pycocotools==2.0.8
|
||||
# via terratorch
|
||||
pycountry==24.6.1
|
||||
@@ -1171,7 +1163,6 @@ torch==2.10.0+cu129
|
||||
# kornia
|
||||
# lightly
|
||||
# lightning
|
||||
# lm-eval
|
||||
# mteb
|
||||
# open-clip-torch
|
||||
# peft
|
||||
@@ -1229,15 +1220,11 @@ tqdm==4.67.3
|
||||
# sentence-transformers
|
||||
# tacoreader
|
||||
# terratorch
|
||||
# tqdm-multiprocess
|
||||
# transformers
|
||||
tqdm-multiprocess==0.0.11
|
||||
# via lm-eval
|
||||
transformers==4.57.5
|
||||
# via
|
||||
# -r requirements/test.in
|
||||
# genai-perf
|
||||
# lm-eval
|
||||
# peft
|
||||
# sentence-transformers
|
||||
# transformers-stream-generator
|
||||
@@ -1272,6 +1259,7 @@ typing-extensions==4.15.0
|
||||
# librosa
|
||||
# lightning
|
||||
# lightning-utilities
|
||||
# lm-eval
|
||||
# mistral-common
|
||||
# mteb
|
||||
# opentelemetry-api
|
||||
|
||||
Reference in New Issue
Block a user