Text Generation
Safetensors
English
llama
OctoThinker-8B-Long-Base / evaluation_results.json
koalazf99's picture
Upload folder using huggingface_hub
e052dad verified
{
"amc-cot": {
"cot": {
"accuracy": 0.05,
"n_samples": 40
},
"tool": {
"n_samples": 0
}
},
"asdiv-cot": {
"cot": {
"accuracy": 0.8356659142212189,
"n_samples": 2215
},
"tool": {
"n_samples": 0
}
},
"gsm8k-cot": {
"cot": {
"accuracy": 0.7247915087187263,
"n_samples": 1319
},
"tool": {
"n_samples": 0
}
},
"math-500-cot": {
"cot": {
"accuracy": 0.378,
"n_samples": 500
},
"tool": {
"n_samples": 0
}
},
"math-cot": {
"cot": {
"accuracy": 0.4198,
"n_samples": 5000
},
"tool": {
"n_samples": 0
}
},
"math_sat-cot": {
"cot": {
"accuracy": 0.8125,
"n_samples": 32
},
"tool": {
"n_samples": 0
}
},
"mathqa-cot": {
"cot": {
"accuracy": 0.628,
"n_samples": 1000
},
"tool": {
"n_samples": 0
}
},
"mawps-cot": {
"cot": {
"accuracy": 0.9443099273607748,
"n_samples": 2065
},
"tool": {
"n_samples": 0
}
},
"mmlu-stem-cot": {
"cot": {
"accuracy": 0.6375082836315441,
"n_samples": 3018
},
"tool": {
"n_samples": 0
}
},
"ocw-courses-cot": {
"cot": {
"accuracy": 0.16176470588235295,
"n_samples": 272
},
"tool": {
"n_samples": 0
}
},
"olympiad-bench-cot": {
"cot": {
"accuracy": 0.11851851851851852,
"n_samples": 675
},
"tool": {
"n_samples": 0
}
},
"svamp-cot": {
"cot": {
"accuracy": 0.824,
"n_samples": 1000
},
"tool": {
"n_samples": 0
}
},
"tabmwp-cot": {
"cot": {
"accuracy": 0.701,
"n_samples": 1000
},
"tool": {
"n_samples": 0
}
}
}