Spaces:

uw-insight-lab
/

Probing-Vis-Literacy-of-VLMs

Paused

App Files Files Community

AustingDong commited on Mar 20

Commit

8b5e432

1 Parent(s): ee8653b

add evaluate

Browse files

Files changed (4) hide show

app.py +1 -3
evaluate/__init__.py +0 -0
evaluate/evaluate.py +66 -0
evaluate/questions.py +73 -0

app.py CHANGED Viewed

@@ -64,9 +64,7 @@ def multimodal_understanding(model_type,
         torch.cuda.ipc_collect()
     # set seed
-    torch.manual_seed(seed)
-    np.random.seed(seed)
-    torch.cuda.manual_seed(seed) if torch.cuda.is_available() else None
     input_text_decoded = ""
     answer = ""

         torch.cuda.ipc_collect()
     # set seed
+    set_seed(model_seed=seed)
     input_text_decoded = ""
     answer = ""

evaluate/__init__.py ADDED Viewed

File without changes

evaluate/evaluate.py ADDED Viewed

	@@ -0,0 +1,66 @@

+import os
+import torch
+import numpy as np
+from PIL import Image
+from demo.model_utils import *
+from evaluate.questions import questions
+def set_seed(model_seed = 42):
+    torch.manual_seed(model_seed)
+    np.random.seed(model_seed)
+    torch.cuda.manual_seed(model_seed) if torch.cuda.is_available() else None
+def evaluate(model_type, num_eval = 10):
+    for eval_idx in range(num_eval):
+        set_seed(np.random.randint(0, 1000))
+        model_utils, vl_gpt, tokenizer = None, None, None
+        if model_type.split('-')[0] == "Janus":
+            model_utils = Janus_Utils()
+            vl_gpt, tokenizer = model_utils.init_Janus(model_type.split('-')[-1])
+        elif model_type.split('-')[0] == "LLaVA":
+            model_utils = LLaVA_Utils()
+            version = model_type.split('-')[1]
+            vl_gpt, tokenizer = model_utils.init_LLaVA(version=version)
+        elif model_type.split('-')[0] == "ChartGemma":
+            model_utils = ChartGemma_Utils()
+            vl_gpt, tokenizer = model_utils.init_ChartGemma()
+        for question in questions:
+            chart_type = question[0]
+            q = question[1]
+            img_path = question[2]
+            image = np.array(Image.open(img_path).convert("RGB"))
+            prepare_inputs = model_utils.prepare_inputs(q, image)
+            temperature = 0.9
+            top_p = 0.1
+            if model_type.split('-')[0] == "Janus":
+                inputs_embeds = model_utils.generate_inputs_embeddings(prepare_inputs)
+                outputs = model_utils.generate_outputs(inputs_embeds, prepare_inputs, temperature, top_p)
+            else:
+                outputs = model_utils.generate_outputs(prepare_inputs, temperature, top_p)
+            sequences = outputs.sequences.cpu().tolist()
+            answer = tokenizer.decode(sequences[0], skip_special_tokens=True)
+            RESULTS_ROOT = "./evaluate/results"
+            FILES_ROOT = f"{RESULTS_ROOT}/{model_type}/{eval_idx}"
+            os.makedirs(FILES_ROOT, exist_ok=True)
+            with open(f"{FILES_ROOT}/{chart_type}.txt", "w") as f:
+                f.write(answer)
+                f.close()
+if __name__ == '__main__':
+    # models = ["ChartGemma", "Janus-Pro-1B", "Janus-Pro-7B", "LLaVA-1.5-7B"]
+    # models = ["ChartGemma", "Janus-Pro-1B"]
+    models = ["Janus-Pro-7B", "LLaVA-1.5-7B"]
+    for model_type in models:
+        evaluate(model_type=model_type, num_eval=10)

evaluate/questions.py ADDED Viewed

	@@ -0,0 +1,73 @@

+questions=[
+    [
+        "LineChart",
+        "What was the price of a barrel of oil in February 2020?",
+        "images/LineChart.png"
+    ],
+    [
+        "BarChart",
+        "What is the average internet speed in Japan?",
+        "images/BarChart.png"
+    ],
+    [
+        "StackedBar",
+        "What is the cost of peanuts in Seoul?",
+        "images/StackedBar.png"
+    ],
+    [
+        "100%StackedBar",
+        "Which country has the lowest proportion of Gold medals?",
+        "images/Stacked100.png"
+    ],
+    [
+        "PieChart",
+        "What is the approximate global smartphone market share of Samsung?",
+        "images/PieChart.png"
+    ],
+    [
+        "Histogram",
+        "What distance have customers traveled in the taxi the most?",
+        "images/Histogram.png"
+    ],
+    [
+        "Scatterplot",
+        "True/False: There is a negative linear relationship between the height and the weight of the 85 males.",
+        "images/Scatterplot.png"
+    ],
+    [
+        "AreaChart",
+        "What was the average price of pount of coffee beans in October 2019?",
+        "images/AreaChart.png"
+    ],
+    [
+        "StackedArea",
+        "What was the ratio of girls named 'Isla' to girls named 'Amelia' in 2012 in the UK?",
+        "images/StackedArea.png"
+    ],
+    [
+        "BubbleChart",
+        "Which city's metro system has the largest number of stations?",
+        "images/BubbleChart.png"
+    ],
+    [
+        "Choropleth",
+        "True/False: In 2020, the unemployment rate for Washington (WA) was higher than that of Wisconsin (WI).",
+        "images/Choropleth_New.png"
+    ],
+    [
+        "TreeMap",
+        "True/False: eBay is nested in the Software category.",
+        "images/TreeMap.png"
+    ]
+]