GenCeption / leaderboard /leaderboard.json
valbuc's picture
Add GPT-4o result to the leaderboard (#6)
4635b96 verified
{
"models": [
{
"name": "GPT-4o",
"url": "https://openai.com/index/hello-gpt-4o",
"scores": {
"Mean": 0.359,
"Exist": 0.400,
"Count": 0.388,
"Posi": 0.398,
"Col": 0.421,
"Post": 0.335,
"Cel": 0.331,
"Sce": 0.401,
"Lan": 0.372,
"Art": 0.415,
"Comm": 0.448,
"VisMean": 0.391,
"Code": 0.255,
"Num": 0.282,
"Tran": 0.211,
"OCR": 0.362,
"TextMean": 0.278
}
},
{
"name": "Gemini1.5-Pro",
"url": "https://deepmind.google/technologies/gemini",
"scores": {
"Mean": 0.368,
"Exist": 0.437,
"Count": 0.389,
"Posi": 0.357,
"Col": 0.474,
"Post": 0.374,
"Cel": 0.362,
"Sce": 0.423,
"Lan": 0.375,
"Art": 0.412,
"Comm": 0.464,
"VisMean": 0.407,
"Code": 0.213,
"Num": 0.268,
"Tran": 0.240,
"OCR": 0.367,
"TextMean": 0.272
}
},
{
"name": "Claude3-Opus",
"url": "https://www.anthropic.com/news/claude-3-family",
"scores": {
"Mean": 0.340,
"Exist": 0.382,
"Count": 0.348,
"Posi": 0.357,
"Col": 0.385,
"Post": 0.360,
"Cel": 0.317,
"Sce": 0.374,
"Lan": 0.344,
"Art": 0.385,
"Comm": 0.432,
"VisMean": 0.368,
"Code": 0.245,
"Num": 0.229,
"Tran": 0.236,
"OCR": 0.362,
"TextMean": 0.268
}
},
{
"name": "ChatGPT-4V",
"url": "https://cdn.openai.com/papers/GPTV_System_Card.pdf",
"scores": {
"Mean": 0.351,
"Exist": 0.422,
"Count": 0.404,
"Posi": 0.408,
"Col": 0.403,
"Post": 0.324,
"Cel": 0.332,
"Sce": 0.393,
"Lan": 0.353,
"Art": 0.421,
"Comm": 0.471,
"VisMean": 0.393,
"Code": 0.193,
"Num": 0.240,
"Tran": 0.157,
"OCR": 0.393,
"TextMean": 0.246
}
},
{
"name": "mPLUG-Owl2",
"url": "https://arxiv.org/pdf/2311.04257.pdf",
"scores": {
"Mean": 0.257,
"Exist": 0.323,
"Count": 0.299,
"Posi": 0.306,
"Col": 0.290,
"Post": 0.243,
"Cel": 0.232,
"Sce": 0.299,
"Lan": 0.275,
"Art": 0.252,
"Comm": 0.353,
"VisMean": 0.287,
"Code": 0.176,
"Num": 0.192,
"Tran": 0.081,
"OCR": 0.276,
"TextMean": 0.181
}
},
{
"name": "LLaVA-13B",
"url": "https://arxiv.org/pdf/2304.08485.pdf",
"scores": {
"Mean": 0.238,
"Exist": 0.305,
"Count": 0.294,
"Posi": 0.255,
"Col": 0.300,
"Post": 0.215,
"Cel": 0.206,
"Sce": 0.277,
"Lan": 0.242,
"Art": 0.212,
"Comm": 0.334,
"VisMean": 0.264,
"Code": 0.144,
"Num": 0.195,
"Tran": 0.116,
"OCR": 0.239,
"TextMean": 0.174
}
},
{
"name": "LLaVA-7B",
"url": "https://arxiv.org/pdf/2304.08485.pdf",
"scores": {
"Mean": 0.225,
"Exist": 0.308,
"Count": 0.253,
"Posi": 0.285,
"Col": 0.284,
"Post": 0.214,
"Cel": 0.188,
"Sce": 0.266,
"Lan": 0.252,
"Art": 0.210,
"Comm": 0.294,
"VisMean": 0.255,
"Code": 0.107,
"Num": 0.155,
"Tran": 0.111,
"OCR": 0.222,
"TextMean": 0.149
}
}
]
}