# For hf script, without -t option (tensor parallel size). # bash .buildkite/lm-eval-harness/run-lm-eval-chartqa-vllm-vlm-baseline.sh -m meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8 -l 100 -t 8 model_name: "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8" backend: "vllm-vlm" tasks: - name: "chartqa" metrics: - name: "relaxed_accuracy,none" # TODO(zhewenl): model card is 0.90, but the actual score is 0.80. value: 0.80 limit: 100 num_fewshot: 0