Convert formatting to use ruff instead of yapf + isort (#26247)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
Harry Mellor
2025-10-05 15:06:22 +01:00
committed by GitHub
parent 17edd8a807
commit d6953beb91
1508 changed files with 115244 additions and 94146 deletions

View File

@@ -17,46 +17,45 @@ mxbai_rerank_hf_overrides = {
}
RERANK_MODELS = [
LASTPoolingRerankModelInfo("mixedbread-ai/mxbai-rerank-base-v2",
architecture="Qwen2ForSequenceClassification",
hf_overrides=mxbai_rerank_hf_overrides,
mteb_score=0.273,
enable_test=True),
LASTPoolingRerankModelInfo("mixedbread-ai/mxbai-rerank-large-v2",
architecture="Qwen2ForSequenceClassification",
hf_overrides=mxbai_rerank_hf_overrides,
enable_test=False)
LASTPoolingRerankModelInfo(
"mixedbread-ai/mxbai-rerank-base-v2",
architecture="Qwen2ForSequenceClassification",
hf_overrides=mxbai_rerank_hf_overrides,
mteb_score=0.273,
enable_test=True,
),
LASTPoolingRerankModelInfo(
"mixedbread-ai/mxbai-rerank-large-v2",
architecture="Qwen2ForSequenceClassification",
hf_overrides=mxbai_rerank_hf_overrides,
enable_test=False,
),
]
class MxbaiRerankerHfRunner(HfRunner):
def __init__(self,
model_name: str,
dtype: str = "auto",
*args: Any,
**kwargs: Any) -> None:
def __init__(
self, model_name: str, dtype: str = "auto", *args: Any, **kwargs: Any
) -> None:
from transformers import AutoModelForCausalLM, AutoTokenizer
super().__init__(model_name, dtype, auto_cls=AutoModelForCausalLM)
self.tokenizer = AutoTokenizer.from_pretrained(model_name,
padding_side='left')
self.tokenizer = AutoTokenizer.from_pretrained(model_name, padding_side="left")
self.yes_loc = self.tokenizer.convert_tokens_to_ids("1")
self.no_loc = self.tokenizer.convert_tokens_to_ids("0")
def predict(self, prompts: list[list[str]], *args,
**kwargs) -> torch.Tensor:
def predict(self, prompts: list[list[str]], *args, **kwargs) -> torch.Tensor:
def process_inputs(pairs):
inputs = self.tokenizer(pairs,
padding=False,
truncation='longest_first',
return_attention_mask=False)
for i, ele in enumerate(inputs['input_ids']):
inputs['input_ids'][i] = ele
inputs = self.tokenizer.pad(inputs,
padding=True,
return_tensors="pt")
inputs = self.tokenizer(
pairs,
padding=False,
truncation="longest_first",
return_attention_mask=False,
)
for i, ele in enumerate(inputs["input_ids"]):
inputs["input_ids"][i] = ele
inputs = self.tokenizer.pad(inputs, padding=True, return_tensors="pt")
for key in inputs:
inputs[key] = inputs[key].to(self.model.device)
return inputs