Mezura / result /light_eval /avg_yc30f351.json
nmmursit's picture
initial commit
3232d64 verified
{
"model_name": "Qwen/Qwen2.5-72B-Instruct",
"overall_average": 0.3461,
"mmlu_average": 0.242,
"truthfulqa": 0.4484,
"winogrande": 0.5,
"hellaswag": 0.2832,
"hellaswag_norm": 0.2783,
"gsm8k": 0.36,
"arc_challenge": 0.2431,
"arc_challenge_norm": 0.2431,
"total_samples": 1000,
"run_id": "yc30f351"
}