Convert formatting to use ruff instead of yapf + isort (#26247)
Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
@@ -17,15 +17,15 @@ class GSM8KAccuracyTestConfig:
|
||||
expected_value: float
|
||||
|
||||
def get_model_args(self) -> str:
|
||||
return (f"pretrained={self.model_name},"
|
||||
"max_model_len=4096,max_num_seqs=32")
|
||||
return f"pretrained={self.model_name},max_model_len=4096,max_num_seqs=32"
|
||||
|
||||
|
||||
# NOTE: Accuracy scores measured on GPUs.
|
||||
ACCURACY_CONFIGS = [
|
||||
GSM8KAccuracyTestConfig(
|
||||
model_name="neuralmagic/Meta-Llama-3.1-8B-Instruct-quantized.w8a8",
|
||||
expected_value=0.76), # no bias
|
||||
expected_value=0.76,
|
||||
), # no bias
|
||||
# NOTE(rob): We cannot re-initialize vLLM in the same process for TPU,
|
||||
# so only one of these tests can run in a single call to pytest. As
|
||||
# a follow-up, move this into the LM-EVAL section of the CI.
|
||||
@@ -37,7 +37,6 @@ ACCURACY_CONFIGS = [
|
||||
|
||||
@pytest.mark.parametrize("config", ACCURACY_CONFIGS)
|
||||
def test_gsm8k_correctness(config: GSM8KAccuracyTestConfig):
|
||||
|
||||
results = lm_eval.simple_evaluate(
|
||||
model="vllm",
|
||||
model_args=config.get_model_args(),
|
||||
@@ -47,6 +46,7 @@ def test_gsm8k_correctness(config: GSM8KAccuracyTestConfig):
|
||||
|
||||
EXPECTED_VALUE = config.expected_value
|
||||
measured_value = results["results"][TASK][FILTER]
|
||||
assert (measured_value - RTOL < EXPECTED_VALUE
|
||||
and measured_value + RTOL > EXPECTED_VALUE
|
||||
), f"Expected: {EXPECTED_VALUE} | Measured: {measured_value}"
|
||||
assert (
|
||||
measured_value - RTOL < EXPECTED_VALUE
|
||||
and measured_value + RTOL > EXPECTED_VALUE
|
||||
), f"Expected: {EXPECTED_VALUE} | Measured: {measured_value}"
|
||||
|
||||
Reference in New Issue
Block a user