Spaces:

uw-insight-lab
/

Probing-Vis-Literacy-of-VLMs

Paused

App Files Files Community

AustingDong commited on Mar 12

Commit

5da9d34

1 Parent(s): 22993a5

removed llava onevision

Browse files

Files changed (2) hide show

app.py +31 -28
demo/model_utils.py +2 -1

app.py CHANGED Viewed

@@ -291,7 +291,7 @@ with gr.Blocks() as demo:
             activation_map_output = gr.Gallery(label="activation Map", height=300, columns=1)
         with gr.Column():
-            model_selector = gr.Dropdown(choices=["Clip", "ChartGemma-3B", "Janus-1B", "Janus-7B", "LLaVA-1.5-7B", "LLaVA-onevision-qwen2-7b-si"], value="Clip", label="model")
             response_type = gr.Dropdown(choices=["Visualization only"], value="Visualization only", label="response_type")
             focus = gr.Dropdown(choices=["Visual Encoder"], value="Visual Encoder", label="focus")
             activation_map_method = gr.Dropdown(choices=["GradCAM"], value="GradCAM", label="activation map type")
@@ -347,62 +347,65 @@ with gr.Blocks() as demo:
     examples_inpainting = gr.Examples(
         label="Multimodal Understanding examples",
         examples=[
             [
-                "What is the approximate global smartphone market share of Samsung?",
-                "images/PieChart.png"
             ],
             [
                 "What is the average internet speed in Japan?",
                 "images/BarChart.png"
             ],
             [
-                "What was the average price of coffee beans in October 2019?",
-                "images/AreaChart.png"
-            ],
-            [
-                "Which city's metro system has the largest number of stations?",
-                "images/BubbleChart.png"
             ],
             [
-                "True/False: In 2020, the unemployment rate for Washington (WA) was higher than that of Wisconsin (WI).",
-                "images/Choropleth_New.png"
             ],
             [
                 "What distance have customers traveled in the taxi the most?",
                 "images/Histogram.png"
             ],
             [
-                "What was the price of a barrel of oil in February 2020?",
-                "images/LineChart.png"
             ],
             [
-                "True/False: eBay is nested in the Software category.",
-                "images/TreeMap.png"
             ],
             [
-                "True/False: There is a negative linear relationship between the height and the weight of the 85 males.",
-                "images/Scatterplot.png"
-            ],
-            [
-                "Which country has the lowest proportion of Gold medals?",
-                "images/Stacked100.png"
             ],
             [
-                "What was the ratio of girls named 'Isla' to girls named 'Amelia' in 2012 in the UK?",
-                "images/StackedArea.png"
             ],
             [
-                "What is the cost of peanuts in Seoul?",
-                "images/StackedBar.png"
             ]
         ],

             activation_map_output = gr.Gallery(label="activation Map", height=300, columns=1)
         with gr.Column():
+            model_selector = gr.Dropdown(choices=["Clip", "ChartGemma-3B", "Janus-1B", "Janus-7B", "LLaVA-1.5-7B"], value="Clip", label="model")
             response_type = gr.Dropdown(choices=["Visualization only"], value="Visualization only", label="response_type")
             focus = gr.Dropdown(choices=["Visual Encoder"], value="Visual Encoder", label="focus")
             activation_map_method = gr.Dropdown(choices=["GradCAM"], value="GradCAM", label="activation map type")
     examples_inpainting = gr.Examples(
         label="Multimodal Understanding examples",
         examples=[
             [
+                "What was the price of a barrel of oil in February 2020?",
+                "images/LineChart.png"
             ],
             [
                 "What is the average internet speed in Japan?",
                 "images/BarChart.png"
             ],
             [
+                "What is the cost of peanuts in Seoul?",
+                "images/StackedBar.png"
             ],
             [
+                "Which country has the lowest proportion of Gold medals?",
+                "images/Stacked100.png"
             ],
+            [
+                "What is the approximate global smartphone market share of Samsung?",
+                "images/PieChart.png"
+            ],
             [
                 "What distance have customers traveled in the taxi the most?",
                 "images/Histogram.png"
             ],
             [
+                "True/False: There is a negative linear relationship between the height and the weight of the 85 males.",
+                "images/Scatterplot.png"
             ],
             [
+                "What was the average price of pount of coffee beans in October 2019?",
+                "images/AreaChart.png"
             ],
             [
+                "What was the ratio of girls named 'Isla' to girls named 'Amelia' in 2012 in the UK?",
+                "images/StackedArea.png"
             ],
             [
+                "Which city's metro system has the largest number of stations?",
+                "images/BubbleChart.png"
+            ],
+            [
+                "True/False: In 2020, the unemployment rate for Washington (WA) was higher than that of Wisconsin (WI).",
+                "images/Choropleth_New.png"
             ],
             [
+                "True/False: eBay is nested in the Software category.",
+                "images/TreeMap.png"
             ]
         ],

demo/model_utils.py CHANGED Viewed

@@ -162,8 +162,9 @@ class LLaVA_Utils(Model_Utils):
                 ],
             },
         ]
         prompt = self.processor.apply_chat_template(conversation, add_generation_prompt=True)
-        pil_images = [Image.fromarray(image)]
         prepare_inputs = self.processor(
             images=pil_images, text=prompt, return_tensors="pt"
         ).to(self.cuda_device, dtype=self.dtype)

                 ],
             },
         ]
         prompt = self.processor.apply_chat_template(conversation, add_generation_prompt=True)
+        pil_images = [Image.fromarray(image).resize((384, 384))]
         prepare_inputs = self.processor(
             images=pil_images, text=prompt, return_tensors="pt"
         ).to(self.cuda_device, dtype=self.dtype)