AustingDong commited on
Commit
2067800
Β·
1 Parent(s): 2bbc5f6

expand tests

Browse files
Files changed (40) hide show
  1. app-old.py +0 -501
  2. app.py +5 -82
  3. evaluate/evaluate.py +68 -11
  4. image_modifier.py +19 -0
  5. images/VLAT/AreaChart.png +0 -0
  6. images/VLAT/BarChart.png +0 -0
  7. images/VLAT/BubbleChart.png +0 -0
  8. images/VLAT/Choropleth_New.png +0 -0
  9. images/VLAT/Histogram.png +0 -0
  10. images/VLAT/LineChart.png +0 -0
  11. images/VLAT/PieChart.png +0 -0
  12. images/VLAT/Scatterplot.png +0 -0
  13. images/VLAT/Stacked100.png +0 -0
  14. images/VLAT/StackedArea.png +0 -0
  15. images/VLAT/StackedBar.png +0 -0
  16. images/VLAT/TreeMap.png +0 -0
  17. images/badge.svg +0 -1
  18. images/cat_dog.png +0 -0
  19. images/doge.png +0 -0
  20. images/equation.png +0 -0
  21. images/logo.png +0 -0
  22. images/logo.svg +0 -22
  23. images/{AreaChart.png β†’ mini-VLAT/AreaChart.png} +0 -0
  24. images/{BarChart.png β†’ mini-VLAT/BarChart.png} +0 -0
  25. images/{BubbleChart.png β†’ mini-VLAT/BubbleChart.png} +0 -0
  26. images/{Choropleth_New.png β†’ mini-VLAT/Choropleth_New.png} +0 -0
  27. images/{Histogram.png β†’ mini-VLAT/Histogram.png} +0 -0
  28. images/{LineChart.png β†’ mini-VLAT/LineChart.png} +0 -0
  29. images/{PieChart.png β†’ mini-VLAT/PieChart.png} +0 -0
  30. images/{Scatterplot.png β†’ mini-VLAT/Scatterplot.png} +0 -0
  31. images/{Stacked100.png β†’ mini-VLAT/Stacked100.png} +0 -0
  32. images/{StackedArea.png β†’ mini-VLAT/StackedArea.png} +0 -0
  33. images/{StackedBar.png β†’ mini-VLAT/StackedBar.png} +0 -0
  34. images/{TreeMap.png β†’ mini-VLAT/TreeMap.png} +0 -0
  35. images/pie_chart.png +0 -0
  36. images/ve.png +0 -0
  37. questions/VLAT.py +214 -0
  38. questions/VLAT_old.py +325 -0
  39. questions/__init__.py +0 -0
  40. questions/mini_VLAT.py +73 -0
app-old.py DELETED
@@ -1,501 +0,0 @@
1
- import gradio as gr
2
- import torch
3
- from transformers import AutoConfig, AutoModelForCausalLM
4
- from janus.models import MultiModalityCausalLM, VLChatProcessor
5
- from janus.utils.io import load_pil_images
6
- from demo.cam import generate_gradcam, AttentionGuidedCAMJanus, AttentionGuidedCAMClip, AttentionGuidedCAMChartGemma, AttentionGuidedCAMLLaVA
7
- from demo.model_utils import Clip_Utils, Janus_Utils, LLaVA_Utils, ChartGemma_Utils, add_title_to_image
8
-
9
- import numpy as np
10
- import matplotlib.pyplot as plt
11
- import gc
12
- import os
13
- import spaces
14
- from PIL import Image
15
-
16
- def set_seed(model_seed = 42):
17
- torch.manual_seed(model_seed)
18
- np.random.seed(model_seed)
19
- torch.cuda.manual_seed(model_seed) if torch.cuda.is_available() else None
20
-
21
- set_seed()
22
- clip_utils = Clip_Utils()
23
- clip_utils.init_Clip()
24
- model_utils, vl_gpt, tokenizer = None, None, None
25
- model_name = "Clip"
26
- language_model_max_layer = 24
27
- language_model_best_layer = 8
28
- vision_model_best_layer = 24
29
-
30
- def clean():
31
- global model_utils, vl_gpt, tokenizer, clip_utils
32
- # Move models to CPU first (prevents CUDA references)
33
- if 'vl_gpt' in globals() and vl_gpt is not None:
34
- vl_gpt.to("cpu")
35
- if 'clip_utils' in globals() and clip_utils is not None:
36
- del clip_utils
37
-
38
- # Delete all references
39
- del model_utils, vl_gpt, tokenizer
40
- model_utils, vl_gpt, tokenizer, clip_utils = None, None, None, None
41
- gc.collect()
42
-
43
- # Empty CUDA cache
44
- if torch.cuda.is_available():
45
- torch.cuda.empty_cache()
46
- torch.cuda.ipc_collect() # Frees inter-process CUDA memory
47
-
48
- # Empty MacOS Metal backend (if using Apple Silicon)
49
- if torch.backends.mps.is_available():
50
- torch.mps.empty_cache()
51
-
52
- # Multimodal Understanding function
53
- @spaces.GPU(duration=120)
54
- def multimodal_understanding(model_type,
55
- activation_map_method,
56
- visual_pooling_method,
57
- image, question, seed, top_p, temperature, target_token_idx,
58
- visualization_layer_min, visualization_layer_max, focus, response_type, chart_type):
59
- # Clear CUDA cache before generating
60
- gc.collect()
61
- if torch.cuda.is_available():
62
- torch.cuda.empty_cache()
63
- torch.cuda.ipc_collect()
64
-
65
- # set seed
66
- torch.manual_seed(seed)
67
- np.random.seed(seed)
68
- torch.cuda.manual_seed(seed) if torch.cuda.is_available() else None
69
-
70
- input_text_decoded = ""
71
- answer = ""
72
- if model_name == "Clip":
73
-
74
- inputs = clip_utils.prepare_inputs([question], image)
75
-
76
-
77
- if activation_map_method == "GradCAM":
78
- # Generate Grad-CAM
79
- all_layers = [layer.layer_norm1 for layer in clip_utils.model.vision_model.encoder.layers]
80
-
81
- if visualization_layer_min != visualization_layer_max:
82
- target_layers = all_layers[visualization_layer_min-1 : visualization_layer_max-1]
83
- else:
84
- target_layers = [all_layers[visualization_layer_min-1]]
85
- grad_cam = AttentionGuidedCAMClip(clip_utils.model, target_layers)
86
- cam, outputs, grid_size = grad_cam.generate_cam(inputs, class_idx=0, visual_pooling_method=visual_pooling_method)
87
- cam = cam.to("cpu")
88
- cam = [generate_gradcam(cam, image, size=(224, 224))]
89
- grad_cam.remove_hooks()
90
- target_token_decoded = ""
91
-
92
-
93
-
94
- else:
95
-
96
- for param in vl_gpt.parameters():
97
- param.requires_grad = True
98
-
99
-
100
- prepare_inputs = model_utils.prepare_inputs(question, image)
101
-
102
- if response_type == "answer + visualization":
103
- if model_name.split('-')[0] == "Janus":
104
- inputs_embeds = model_utils.generate_inputs_embeddings(prepare_inputs)
105
- outputs = model_utils.generate_outputs(inputs_embeds, prepare_inputs, temperature, top_p)
106
- else:
107
- outputs = model_utils.generate_outputs(prepare_inputs, temperature, top_p)
108
-
109
- sequences = outputs.sequences.cpu().tolist()
110
- answer = tokenizer.decode(sequences[0], skip_special_tokens=True)
111
- attention_raw = outputs.attentions
112
- print("answer generated")
113
-
114
- input_ids = prepare_inputs.input_ids[0].cpu().tolist()
115
- input_ids_decoded = [tokenizer.decode([input_ids[i]]) for i in range(len(input_ids))]
116
-
117
- if activation_map_method == "GradCAM":
118
- # target_layers = vl_gpt.vision_model.vision_tower.blocks
119
- if focus == "Visual Encoder":
120
- if model_name.split('-')[0] == "Janus":
121
- all_layers = [block.norm1 for block in vl_gpt.vision_model.vision_tower.blocks]
122
- else:
123
- all_layers = [block.layer_norm1 for block in vl_gpt.vision_tower.vision_model.encoder.layers]
124
- else:
125
- all_layers = [layer.self_attn for layer in vl_gpt.language_model.model.layers]
126
-
127
- print("layer values:", visualization_layer_min, visualization_layer_max)
128
- if visualization_layer_min != visualization_layer_max:
129
- print("multi layers")
130
- target_layers = all_layers[visualization_layer_min-1 : visualization_layer_max]
131
- else:
132
- print("single layer")
133
- target_layers = [all_layers[visualization_layer_min-1]]
134
-
135
-
136
- if model_name.split('-')[0] == "Janus":
137
- gradcam = AttentionGuidedCAMJanus(vl_gpt, target_layers)
138
- elif model_name.split('-')[0] == "LLaVA":
139
- gradcam = AttentionGuidedCAMLLaVA(vl_gpt, target_layers)
140
- elif model_name.split('-')[0] == "ChartGemma":
141
- gradcam = AttentionGuidedCAMChartGemma(vl_gpt, target_layers)
142
-
143
- start = 0
144
- cam = []
145
- if focus == "Visual Encoder":
146
- if target_token_idx != -1:
147
- cam_tensors, grid_size, start = gradcam.generate_cam(prepare_inputs, tokenizer, temperature, top_p, target_token_idx, visual_pooling_method, focus)
148
- cam_grid = cam_tensors.reshape(grid_size, grid_size)
149
- cam_i = generate_gradcam(cam_grid, image)
150
- cam_i = add_title_to_image(cam_i, input_ids_decoded[start + target_token_idx])
151
- cam = [cam_i]
152
- else:
153
- i = 0
154
- cam = []
155
- while start + i < len(input_ids_decoded):
156
- if model_name.split('-')[0] == "Janus":
157
- gradcam = AttentionGuidedCAMJanus(vl_gpt, target_layers)
158
- elif model_name.split('-')[0] == "LLaVA":
159
- gradcam = AttentionGuidedCAMLLaVA(vl_gpt, target_layers)
160
- elif model_name.split('-')[0] == "ChartGemma":
161
- gradcam = AttentionGuidedCAMChartGemma(vl_gpt, target_layers)
162
- cam_tensors, grid_size, start = gradcam.generate_cam(prepare_inputs, tokenizer, temperature, top_p, i, visual_pooling_method, focus)
163
- cam_grid = cam_tensors.reshape(grid_size, grid_size)
164
- cam_i = generate_gradcam(cam_grid, image)
165
- cam_i = add_title_to_image(cam_i, input_ids_decoded[start + i])
166
- cam.append(cam_i)
167
- gradcam.remove_hooks()
168
- i += 1
169
- else:
170
- cam_tensors, grid_size, start = gradcam.generate_cam(prepare_inputs, tokenizer, temperature, top_p, target_token_idx, visual_pooling_method, focus)
171
- if target_token_idx != -1:
172
- input_text_decoded = input_ids_decoded[start + target_token_idx]
173
- for i, cam_tensor in enumerate(cam_tensors):
174
- if i == target_token_idx:
175
- cam_grid = cam_tensor.reshape(grid_size, grid_size)
176
- cam_i = generate_gradcam(cam_grid, image)
177
- cam = [add_title_to_image(cam_i, input_text_decoded)]
178
- break
179
- else:
180
- cam = []
181
- for i, cam_tensor in enumerate(cam_tensors):
182
- cam_grid = cam_tensor.reshape(grid_size, grid_size)
183
- cam_i = generate_gradcam(cam_grid, image)
184
- cam_i = add_title_to_image(cam_i, input_ids_decoded[start + i])
185
-
186
- cam.append(cam_i)
187
-
188
- gradcam.remove_hooks()
189
-
190
-
191
- # Collect Results
192
- RESULTS_ROOT = "./results"
193
- FILES_ROOT = f"{RESULTS_ROOT}/{model_name}/{focus}/{chart_type}/layer{visualization_layer_min}-{visualization_layer_max}"
194
- os.makedirs(FILES_ROOT, exist_ok=True)
195
- if focus == "Visual Encoder":
196
- cam[0].save(f"{FILES_ROOT}/{visual_pooling_method}.png")
197
- else:
198
- for i, cam_p in enumerate(cam):
199
- cam_p.save(f"{FILES_ROOT}/{i}.png")
200
-
201
- with open(f"{FILES_ROOT}/input_text_decoded.txt", "w") as f:
202
- f.write(input_text_decoded)
203
- f.close()
204
-
205
- with open(f"{FILES_ROOT}/answer.txt", "w") as f:
206
- f.write(answer)
207
- f.close()
208
-
209
-
210
-
211
- return answer, cam, input_text_decoded
212
-
213
-
214
-
215
-
216
- # Gradio interface
217
-
218
- def model_slider_change(model_type):
219
- global model_utils, vl_gpt, tokenizer, clip_utils, model_name, language_model_max_layer, language_model_best_layer, vision_model_best_layer
220
- model_name = model_type
221
- if model_type == "Clip":
222
- clean()
223
- set_seed()
224
- clip_utils = Clip_Utils()
225
- clip_utils.init_Clip()
226
- res = (
227
- gr.Dropdown(choices=["Visualization only"], value="Visualization only", label="response_type"),
228
- gr.Slider(minimum=1, maximum=12, value=12, step=1, label="visualization layers min"),
229
- gr.Slider(minimum=1, maximum=12, value=12, step=1, label="visualization layers max"),
230
- gr.Dropdown(choices=["Visual Encoder"], value="Visual Encoder", label="focus"),
231
- gr.Dropdown(choices=["GradCAM"], value="GradCAM", label="activation map type")
232
- )
233
- return res
234
- elif model_type.split('-')[0] == "Janus":
235
-
236
- clean()
237
- set_seed()
238
- model_utils = Janus_Utils()
239
- vl_gpt, tokenizer = model_utils.init_Janus(model_type.split('-')[-1])
240
- language_model_max_layer = 24
241
- language_model_best_layer = 8
242
-
243
- res = (
244
- gr.Dropdown(choices=["Visualization only", "answer + visualization"], value="answer + visualization", label="response_type"),
245
- gr.Slider(minimum=1, maximum=24, value=24, step=1, label="visualization layers min"),
246
- gr.Slider(minimum=1, maximum=24, value=24, step=1, label="visualization layers max"),
247
- gr.Dropdown(choices=["Visual Encoder", "Language Model"], value="Visual Encoder", label="focus"),
248
- gr.Dropdown(choices=["GradCAM"], value="GradCAM", label="activation map type")
249
- )
250
- return res
251
-
252
- elif model_type.split('-')[0] == "LLaVA":
253
-
254
- clean()
255
- set_seed()
256
- model_utils = LLaVA_Utils()
257
- version = model_type.split('-')[1]
258
- vl_gpt, tokenizer = model_utils.init_LLaVA(version=version)
259
- language_model_max_layer = 32 if version == "1.5" else 28
260
- language_model_best_layer = 10
261
-
262
- res = (
263
- gr.Dropdown(choices=["Visualization only", "answer + visualization"], value="answer + visualization", label="response_type"),
264
- gr.Slider(minimum=1, maximum=language_model_max_layer, value=language_model_best_layer, step=1, label="visualization layers min"),
265
- gr.Slider(minimum=1, maximum=language_model_max_layer, value=language_model_best_layer, step=1, label="visualization layers max"),
266
- gr.Dropdown(choices=["Language Model"], value="Language Model", label="focus"),
267
- gr.Dropdown(choices=["GradCAM"], value="GradCAM", label="activation map type")
268
- )
269
- return res
270
-
271
- elif model_type.split('-')[0] == "ChartGemma":
272
- clean()
273
- set_seed()
274
- model_utils = ChartGemma_Utils()
275
- vl_gpt, tokenizer = model_utils.init_ChartGemma()
276
- language_model_max_layer = 18
277
- vision_model_best_layer = 19
278
- language_model_best_layer = 15
279
-
280
- res = (
281
- gr.Dropdown(choices=["Visualization only", "answer + visualization"], value="answer + visualization", label="response_type"),
282
- gr.Slider(minimum=1, maximum=language_model_best_layer, value=language_model_best_layer, step=1, label="visualization layers min"),
283
- gr.Slider(minimum=1, maximum=language_model_best_layer, value=language_model_best_layer, step=1, label="visualization layers max"),
284
- gr.Dropdown(choices=["Visual Encoder", "Language Model"], value="Language Model", label="focus"),
285
- gr.Dropdown(choices=["GradCAM"], value="GradCAM", label="activation map type")
286
- )
287
- return res
288
-
289
-
290
-
291
-
292
- def focus_change(focus):
293
- global model_name, language_model_max_layer
294
- if model_name == "Clip":
295
- res = (
296
- gr.Dropdown(choices=["GradCAM"], value="GradCAM", label="activation map type"),
297
- gr.Slider(minimum=1, maximum=12, value=12, step=1, label="visualization layers min"),
298
- gr.Slider(minimum=1, maximum=12, value=12, step=1, label="visualization layers max")
299
- )
300
- return res
301
-
302
- if focus == "Language Model":
303
- if response_type.value == "answer + visualization":
304
- res = (
305
- gr.Dropdown(choices=["GradCAM"], value="GradCAM", label="activation map type"),
306
- gr.Slider(minimum=1, maximum=language_model_max_layer, value=language_model_best_layer, step=1, label="visualization layers min"),
307
- gr.Slider(minimum=1, maximum=language_model_max_layer, value=language_model_best_layer, step=1, label="visualization layers max")
308
- )
309
- return res
310
- else:
311
- res = (
312
- gr.Dropdown(choices=["GradCAM"], value="GradCAM", label="activation map type"),
313
- gr.Slider(minimum=1, maximum=language_model_max_layer, value=language_model_best_layer, step=1, label="visualization layers min"),
314
- gr.Slider(minimum=1, maximum=language_model_max_layer, value=language_model_best_layer, step=1, label="visualization layers max")
315
- )
316
- return res
317
-
318
- else:
319
- if model_name.split('-')[0] == "ChartGemma":
320
- res = (
321
- gr.Dropdown(choices=["GradCAM"], value="GradCAM", label="activation map type"),
322
- gr.Slider(minimum=1, maximum=26, value=vision_model_best_layer, step=1, label="visualization layers min"),
323
- gr.Slider(minimum=1, maximum=26, value=vision_model_best_layer, step=1, label="visualization layers max")
324
- )
325
- return res
326
-
327
- else:
328
- res = (
329
- gr.Dropdown(choices=["GradCAM"], value="GradCAM", label="activation map type"),
330
- gr.Slider(minimum=1, maximum=24, value=24, step=1, label="visualization layers min"),
331
- gr.Slider(minimum=1, maximum=24, value=24, step=1, label="visualization layers max")
332
- )
333
- return res
334
-
335
-
336
-
337
-
338
-
339
- with gr.Blocks() as demo:
340
- gr.Markdown(value="# Multimodal Understanding")
341
-
342
- with gr.Row():
343
- image_input = gr.Image(height=500, label="Image")
344
- activation_map_output = gr.Gallery(label="Visualization", height=500, columns=1, preview=True)
345
-
346
- with gr.Row():
347
- chart_type = gr.Textbox(label="Chart Type")
348
- understanding_output = gr.Textbox(label="Answer")
349
-
350
- with gr.Row():
351
-
352
- with gr.Column():
353
- model_selector = gr.Dropdown(choices=["Clip", "ChartGemma-3B", "Janus-Pro-1B", "Janus-Pro-7B", "LLaVA-1.5-7B"], value="Clip", label="model")
354
- question_input = gr.Textbox(label="Input Prompt")
355
- und_seed_input = gr.Number(label="Seed", precision=0, value=42)
356
- top_p = gr.Slider(minimum=0, maximum=1, value=0.95, step=0.05, label="top_p")
357
- temperature = gr.Slider(minimum=0, maximum=1, value=0.1, step=0.05, label="temperature")
358
- target_token_idx = gr.Number(label="target_token_idx (-1 means all)", precision=0, value=-1)
359
-
360
-
361
- with gr.Column():
362
- response_type = gr.Dropdown(choices=["Visualization only"], value="Visualization only", label="response_type")
363
- focus = gr.Dropdown(choices=["Visual Encoder"], value="Visual Encoder", label="focus")
364
- activation_map_method = gr.Dropdown(choices=["GradCAM"], value="GradCAM", label="visualization type")
365
- visual_pooling_method = gr.Dropdown(choices=["CLS", "max", "avg"], value="CLS", label="visual pooling method")
366
-
367
-
368
- visualization_layers_min = gr.Slider(minimum=1, maximum=12, value=12, step=1, label="visualization layers min")
369
- visualization_layers_max = gr.Slider(minimum=1, maximum=12, value=12, step=1, label="visualization layers max")
370
-
371
-
372
-
373
-
374
-
375
- model_selector.change(
376
- fn=model_slider_change,
377
- inputs=model_selector,
378
- outputs=[
379
- response_type,
380
- visualization_layers_min,
381
- visualization_layers_max,
382
- focus,
383
- activation_map_method
384
- ]
385
- )
386
-
387
- focus.change(
388
- fn = focus_change,
389
- inputs = focus,
390
- outputs=[
391
- activation_map_method,
392
- visualization_layers_min,
393
- visualization_layers_max,
394
- ]
395
- )
396
-
397
- # response_type.change(
398
- # fn = response_type_change,
399
- # inputs = response_type,
400
- # outputs = [activation_map_method]
401
- # )
402
-
403
-
404
-
405
- understanding_button = gr.Button("Submit")
406
-
407
- understanding_target_token_decoded_output = gr.Textbox(label="Target Token Decoded")
408
-
409
-
410
- examples_inpainting = gr.Examples(
411
- label="Multimodal Understanding examples",
412
- examples=[
413
-
414
- [
415
- "LineChart",
416
- "What was the price of a barrel of oil in February 2020?",
417
- "images/LineChart.png"
418
- ],
419
-
420
- [
421
- "BarChart",
422
- "What is the average internet speed in Japan?",
423
- "images/BarChart.png"
424
- ],
425
-
426
- [
427
- "StackedBar",
428
- "What is the cost of peanuts in Seoul?",
429
- "images/StackedBar.png"
430
- ],
431
-
432
- [
433
- "100%StackedBar",
434
- "Which country has the lowest proportion of Gold medals?",
435
- "images/Stacked100.png"
436
- ],
437
-
438
- [
439
- "PieChart",
440
- "What is the approximate global smartphone market share of Samsung?",
441
- "images/PieChart.png"
442
- ],
443
-
444
- [
445
- "Histogram",
446
- "What distance have customers traveled in the taxi the most?",
447
- "images/Histogram.png"
448
- ],
449
-
450
- [
451
- "Scatterplot",
452
- "True/False: There is a negative linear relationship between the height and the weight of the 85 males.",
453
- "images/Scatterplot.png"
454
- ],
455
-
456
- [
457
- "AreaChart",
458
- "What was the average price of pount of coffee beans in October 2019?",
459
- "images/AreaChart.png"
460
- ],
461
-
462
- [
463
- "StackedArea",
464
- "What was the ratio of girls named 'Isla' to girls named 'Amelia' in 2012 in the UK?",
465
- "images/StackedArea.png"
466
- ],
467
-
468
- [
469
- "BubbleChart",
470
- "Which city's metro system has the largest number of stations?",
471
- "images/BubbleChart.png"
472
- ],
473
-
474
- [
475
- "Choropleth",
476
- "True/False: In 2020, the unemployment rate for Washington (WA) was higher than that of Wisconsin (WI).",
477
- "images/Choropleth_New.png"
478
- ],
479
-
480
- [
481
- "TreeMap",
482
- "True/False: eBay is nested in the Software category.",
483
- "images/TreeMap.png"
484
- ]
485
-
486
- ],
487
- inputs=[chart_type, question_input, image_input],
488
- )
489
-
490
-
491
-
492
-
493
- understanding_button.click(
494
- multimodal_understanding,
495
- inputs=[model_selector, activation_map_method, visual_pooling_method, image_input, question_input, und_seed_input, top_p, temperature, target_token_idx,
496
- visualization_layers_min, visualization_layers_max, focus, response_type, chart_type],
497
- outputs=[understanding_output, activation_map_output, understanding_target_token_decoded_output]
498
- )
499
-
500
- demo.launch(share=True)
501
- # demo.queue(concurrency_count=1, max_size=10).launch(server_name="0.0.0.0", server_port=37906, root_path="/path")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app.py CHANGED
@@ -6,7 +6,9 @@ from janus.utils.io import load_pil_images
6
  from demo.visualization import generate_gradcam, VisualizationJanus, VisualizationClip, VisualizationChartGemma, VisualizationLLaVA
7
  from demo.model_utils import Clip_Utils, Janus_Utils, LLaVA_Utils, ChartGemma_Utils, add_title_to_image
8
  from demo.modified_attn import ModifiedLlamaAttention, ModifiedGemmaAttention
9
-
 
 
10
  import numpy as np
11
  import matplotlib.pyplot as plt
12
  import gc
@@ -411,12 +413,6 @@ with gr.Blocks() as demo:
411
  ]
412
  )
413
 
414
- # response_type.change(
415
- # fn = response_type_change,
416
- # inputs = response_type,
417
- # outputs = [activation_map_method]
418
- # )
419
-
420
 
421
 
422
  understanding_button = gr.Button("Submit")
@@ -426,81 +422,8 @@ with gr.Blocks() as demo:
426
 
427
  examples_inpainting = gr.Examples(
428
  label="Multimodal Understanding examples",
429
- examples=[
430
-
431
- [
432
- "LineChart",
433
- "What was the price of a barrel of oil in February 2020?",
434
- "images/LineChart.png"
435
- ],
436
-
437
- [
438
- "BarChart",
439
- "What is the average internet speed in Japan?",
440
- "images/BarChart.png"
441
- ],
442
-
443
- [
444
- "StackedBar",
445
- "What is the cost of peanuts in Seoul?",
446
- "images/StackedBar.png"
447
- ],
448
-
449
- [
450
- "100%StackedBar",
451
- "Which country has the lowest proportion of Gold medals?",
452
- "images/Stacked100.png"
453
- ],
454
-
455
- [
456
- "PieChart",
457
- "What is the approximate global smartphone market share of Samsung?",
458
- "images/PieChart.png"
459
- ],
460
-
461
- [
462
- "Histogram",
463
- "What distance have customers traveled in the taxi the most?",
464
- "images/Histogram.png"
465
- ],
466
-
467
- [
468
- "Scatterplot",
469
- "True/False: There is a negative linear relationship between the height and the weight of the 85 males.",
470
- "images/Scatterplot.png"
471
- ],
472
-
473
- [
474
- "AreaChart",
475
- "What was the average price of pount of coffee beans in October 2019?",
476
- "images/AreaChart.png"
477
- ],
478
-
479
- [
480
- "StackedArea",
481
- "What was the ratio of girls named 'Isla' to girls named 'Amelia' in 2012 in the UK?",
482
- "images/StackedArea.png"
483
- ],
484
-
485
- [
486
- "BubbleChart",
487
- "Which city's metro system has the largest number of stations?",
488
- "images/BubbleChart.png"
489
- ],
490
-
491
- [
492
- "Choropleth",
493
- "True/False: In 2020, the unemployment rate for Washington (WA) was higher than that of Wisconsin (WI).",
494
- "images/Choropleth_New.png"
495
- ],
496
-
497
- [
498
- "TreeMap",
499
- "True/False: eBay is nested in the Software category.",
500
- "images/TreeMap.png"
501
- ]
502
-
503
- ],
504
  inputs=[chart_type, question_input, image_input],
505
  )
506
 
 
6
  from demo.visualization import generate_gradcam, VisualizationJanus, VisualizationClip, VisualizationChartGemma, VisualizationLLaVA
7
  from demo.model_utils import Clip_Utils, Janus_Utils, LLaVA_Utils, ChartGemma_Utils, add_title_to_image
8
  from demo.modified_attn import ModifiedLlamaAttention, ModifiedGemmaAttention
9
+ from questions.mini_VLAT import mini_VLAT_questions
10
+ from questions.VLAT_old import VLAT_old_questions
11
+ from questions.VLAT import VLAT_questions
12
  import numpy as np
13
  import matplotlib.pyplot as plt
14
  import gc
 
413
  ]
414
  )
415
 
 
 
 
 
 
 
416
 
417
 
418
  understanding_button = gr.Button("Submit")
 
422
 
423
  examples_inpainting = gr.Examples(
424
  label="Multimodal Understanding examples",
425
+ # examples=mini_VLAT_questions,
426
+ examples=VLAT_questions,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
427
  inputs=[chart_type, question_input, image_input],
428
  )
429
 
evaluate/evaluate.py CHANGED
@@ -1,7 +1,9 @@
1
  import os
2
  import torch
 
3
  import numpy as np
4
  from PIL import Image
 
5
  from demo.model_utils import *
6
  from evaluate.questions import questions
7
 
@@ -20,6 +22,9 @@ def clean():
20
  if torch.backends.mps.is_available():
21
  torch.mps.empty_cache()
22
 
 
 
 
23
 
24
  def evaluate(model_type, num_eval = 10):
25
  for eval_idx in range(num_eval):
@@ -39,6 +44,13 @@ def evaluate(model_type, num_eval = 10):
39
  elif model_type.split('-')[0] == "ChartGemma":
40
  model_utils = ChartGemma_Utils()
41
  vl_gpt, tokenizer = model_utils.init_ChartGemma()
 
 
 
 
 
 
 
42
 
43
  for question in questions:
44
  chart_type = question[0]
@@ -46,18 +58,63 @@ def evaluate(model_type, num_eval = 10):
46
  img_path = question[2]
47
  image = np.array(Image.open(img_path).convert("RGB"))
48
 
49
- prepare_inputs = model_utils.prepare_inputs(q, image)
50
- temperature = 0.9
51
- top_p = 0.1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
 
53
- if model_type.split('-')[0] == "Janus":
54
- inputs_embeds = model_utils.generate_inputs_embeddings(prepare_inputs)
55
- outputs = model_utils.generate_outputs(inputs_embeds, prepare_inputs, temperature, top_p)
56
  else:
57
- outputs = model_utils.generate_outputs(prepare_inputs, temperature, top_p)
 
 
 
 
 
 
 
 
58
 
59
- sequences = outputs.sequences.cpu().tolist()
60
- answer = tokenizer.decode(sequences[0], skip_special_tokens=True)
61
 
62
  RESULTS_ROOT = "./evaluate/results"
63
  FILES_ROOT = f"{RESULTS_ROOT}/{model_type}/{eval_idx}"
@@ -71,9 +128,9 @@ def evaluate(model_type, num_eval = 10):
71
 
72
  if __name__ == '__main__':
73
 
74
- # models = ["ChartGemma", "Janus-Pro-1B", "Janus-Pro-7B", "LLaVA-1.5-7B"]
75
  # models = ["ChartGemma", "Janus-Pro-1B"]
76
  # models = ["Janus-Pro-7B", "LLaVA-1.5-7B"]
77
- models = ["LLaVA-1.5-7B"]
78
  for model_type in models:
79
  evaluate(model_type=model_type, num_eval=10)
 
1
  import os
2
  import torch
3
+ import base64
4
  import numpy as np
5
  from PIL import Image
6
+ from openai import OpenAI
7
  from demo.model_utils import *
8
  from evaluate.questions import questions
9
 
 
22
  if torch.backends.mps.is_available():
23
  torch.mps.empty_cache()
24
 
25
+ def encode_image(image_path):
26
+ with open(image_path, "rb") as image_file:
27
+ return base64.b64encode(image_file.read()).decode("utf-8")
28
 
29
  def evaluate(model_type, num_eval = 10):
30
  for eval_idx in range(num_eval):
 
44
  elif model_type.split('-')[0] == "ChartGemma":
45
  model_utils = ChartGemma_Utils()
46
  vl_gpt, tokenizer = model_utils.init_ChartGemma()
47
+
48
+ elif model_type.split('-')[0] == "GPT":
49
+ client = OpenAI(api_key=os.environ["OPENAI_HCI_API_KEY"])
50
+
51
+ elif model_type.split('-')[0] == "Gemini":
52
+ client = OpenAI(api_key=os.environ["GEMINI_HCI_API_KEY"],
53
+ base_url="https://generativelanguage.googleapis.com/v1beta/openai/")
54
 
55
  for question in questions:
56
  chart_type = question[0]
 
58
  img_path = question[2]
59
  image = np.array(Image.open(img_path).convert("RGB"))
60
 
61
+
62
+
63
+ if model_type.split('-')[0] == "GPT":
64
+ base64_image = encode_image(img_path)
65
+ completion = client.chat.completions.create(
66
+ model="gpt-4o",
67
+ messages=[
68
+ {
69
+ "role": "user",
70
+ "content": [
71
+ { "type": "text", "text": f"{q}" },
72
+ {
73
+ "type": "image_url",
74
+ "image_url": {
75
+ "url": f"data:image/jpeg;base64,{base64_image}",
76
+ },
77
+ },
78
+ ],
79
+ }
80
+ ],
81
+ )
82
+ answer = completion.choices[0].message.content
83
+
84
+ elif model_type.split('-')[0] == "Gemini":
85
+ base64_image = encode_image(img_path)
86
+ completion = client.chat.completions.create(
87
+ model="gemini-2.0-flash",
88
+ messages=[
89
+ {
90
+ "role": "user",
91
+ "content": [
92
+ { "type": "text", "text": f"{q}" },
93
+ {
94
+ "type": "image_url",
95
+ "image_url": {
96
+ "url": f"data:image/jpeg;base64,{base64_image}",
97
+ },
98
+ },
99
+ ],
100
+ }
101
+ ],
102
+ )
103
+ answer = completion.choices[0].message.content
104
 
 
 
 
105
  else:
106
+ prepare_inputs = model_utils.prepare_inputs(q, image)
107
+ temperature = 0.9
108
+ top_p = 0.1
109
+
110
+ if model_type.split('-')[0] == "Janus":
111
+ inputs_embeds = model_utils.generate_inputs_embeddings(prepare_inputs)
112
+ outputs = model_utils.generate_outputs(inputs_embeds, prepare_inputs, temperature, top_p)
113
+ else:
114
+ outputs = model_utils.generate_outputs(prepare_inputs, temperature, top_p)
115
 
116
+ sequences = outputs.sequences.cpu().tolist()
117
+ answer = tokenizer.decode(sequences[0], skip_special_tokens=True)
118
 
119
  RESULTS_ROOT = "./evaluate/results"
120
  FILES_ROOT = f"{RESULTS_ROOT}/{model_type}/{eval_idx}"
 
128
 
129
  if __name__ == '__main__':
130
 
131
+ # models = ["ChartGemma", "Janus-Pro-1B", "Janus-Pro-7B", "LLaVA-1.5-7B", "GPT-4o", "Gemini-2.0-flash"]
132
  # models = ["ChartGemma", "Janus-Pro-1B"]
133
  # models = ["Janus-Pro-7B", "LLaVA-1.5-7B"]
134
+ models = ["GPT-4o", "Gemini-2.0-flash"]
135
  for model_type in models:
136
  evaluate(model_type=model_type, num_eval=10)
image_modifier.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from PIL import Image
2
+ import os
3
+
4
+ def augment(img_path):
5
+ ROOT = "./images/VLAT"
6
+ NEW_ROOT = "./changed"
7
+ img = Image.open(f"{ROOT}/{img_path}").convert("RGBA")
8
+
9
+ # Create a white background
10
+ white_bg = Image.new("RGB", img.size, (255, 255, 255))
11
+ white_bg.paste(img, mask=img.split()[3]) # Paste using alpha channel as mask
12
+
13
+ # Save the result
14
+ white_bg.save(f'{NEW_ROOT}/{img_path}') # Or save as .png if you prefer
15
+
16
+ if __name__ == "__main__":
17
+ ROOT = "./images/VLAT"
18
+ for img in os.listdir(ROOT):
19
+ augment(img)
images/VLAT/AreaChart.png ADDED
images/VLAT/BarChart.png ADDED
images/VLAT/BubbleChart.png ADDED
images/VLAT/Choropleth_New.png ADDED
images/VLAT/Histogram.png ADDED
images/VLAT/LineChart.png ADDED
images/VLAT/PieChart.png ADDED
images/VLAT/Scatterplot.png ADDED
images/VLAT/Stacked100.png ADDED
images/VLAT/StackedArea.png ADDED
images/VLAT/StackedBar.png ADDED
images/VLAT/TreeMap.png ADDED
images/badge.svg DELETED
images/cat_dog.png DELETED
Binary file (629 kB)
 
images/doge.png DELETED
Binary file (275 kB)
 
images/equation.png DELETED
Binary file (31.5 kB)
 
images/logo.png DELETED
Binary file (8.66 kB)
 
images/logo.svg DELETED
images/{AreaChart.png β†’ mini-VLAT/AreaChart.png} RENAMED
File without changes
images/{BarChart.png β†’ mini-VLAT/BarChart.png} RENAMED
File without changes
images/{BubbleChart.png β†’ mini-VLAT/BubbleChart.png} RENAMED
File without changes
images/{Choropleth_New.png β†’ mini-VLAT/Choropleth_New.png} RENAMED
File without changes
images/{Histogram.png β†’ mini-VLAT/Histogram.png} RENAMED
File without changes
images/{LineChart.png β†’ mini-VLAT/LineChart.png} RENAMED
File without changes
images/{PieChart.png β†’ mini-VLAT/PieChart.png} RENAMED
File without changes
images/{Scatterplot.png β†’ mini-VLAT/Scatterplot.png} RENAMED
File without changes
images/{Stacked100.png β†’ mini-VLAT/Stacked100.png} RENAMED
File without changes
images/{StackedArea.png β†’ mini-VLAT/StackedArea.png} RENAMED
File without changes
images/{StackedBar.png β†’ mini-VLAT/StackedBar.png} RENAMED
File without changes
images/{TreeMap.png β†’ mini-VLAT/TreeMap.png} RENAMED
File without changes
images/pie_chart.png DELETED
Binary file (58.1 kB)
 
images/ve.png DELETED
Binary file (275 kB)
 
questions/VLAT.py ADDED
@@ -0,0 +1,214 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ VLAT_questions=[
2
+ [
3
+ "StackedBar",
4
+ "What percentage of the room service cost in Copenhagen, comes from the sandwich?",
5
+ "images/mini-VLAT/StackedBar.png"
6
+ ],
7
+
8
+ [
9
+ "StackedBar",
10
+ "In which city is the cost of soda the highest?",
11
+ "images/mini-VLAT/StackedBar.png"
12
+ ],
13
+
14
+ [
15
+ "StackedBar",
16
+ "True/False: The cost of vodka in Helsinki is higher than that of Toronto.",
17
+ "images/mini-VLAT/StackedBar.png"
18
+ ],
19
+
20
+ [
21
+ "StackedBar",
22
+ "True/False: The ratio of the cost of Soda to the cost of Water is higher in N.Y.C than in Stockholm.",
23
+ "images/mini-VLAT/StackedBar.png"
24
+ ],
25
+
26
+ [
27
+ "StackedBar",
28
+ "What is the cost of peanuts in room service in Seoul?",
29
+ "images/mini-VLAT/StackedBar.png"
30
+ ],
31
+
32
+ [
33
+ "StackedArea",
34
+ "What was the number of girls named 'Isla' in 2010 in the UK?",
35
+ "images/mini-VLAT/StackedArea.png"
36
+ ],
37
+
38
+ [
39
+ "StackedArea",
40
+ "True/False: Over the course of years between 2009 and 2014, the number of girls named 'Isla' was always more than 'Amelia'.",
41
+ "images/mini-VLAT/StackedArea.png"
42
+ ],
43
+
44
+ [
45
+ "StackedArea",
46
+ "Over the course of years between 2009 and 2014, when was the number of girls named 'Isla' at the maximum?",
47
+ "images/mini-VLAT/StackedArea.png"
48
+ ],
49
+
50
+ [
51
+ "StackedArea",
52
+ "The number of girls named 'Olivia' was raising or falling from 2009 to 2012.",
53
+ "images/mini-VLAT/StackedArea.png"
54
+ ],
55
+
56
+ [
57
+ "StackedArea",
58
+ "True/False: In the UK, the number of girls named 'Isla' in 2014 was more than it was in 2013.",
59
+ "images/mini-VLAT/StackedArea.png"
60
+ ],
61
+
62
+ [
63
+ "Scatterplot",
64
+ "What is the weight for the person who is 161 cm tall?",
65
+ "images/mini-VLAT/Scatterplot.png"
66
+ ],
67
+
68
+ [
69
+ "Scatterplot",
70
+ "What is the height for the tallest person among the 85 individuals?",
71
+ "images/mini-VLAT/Scatterplot.png"
72
+ ],
73
+
74
+ [
75
+ "Scatterplot",
76
+ "What is the range in weight for the 85 individuals?",
77
+ "images/mini-VLAT/Scatterplot.png"
78
+ ],
79
+
80
+ [
81
+ "Scatterplot",
82
+ "True/False: There is a group of individuals centered at the height of 176 cm and the weight of 55 kg.",
83
+ "images/mini-VLAT/Scatterplot.png"
84
+ ],
85
+
86
+ [
87
+ "Scatterplot",
88
+ "True/False: There is a negative linear relationship between the height and the weight of the 85 individuals",
89
+ "images/mini-VLAT/Scatterplot.png"
90
+ ],
91
+
92
+ [
93
+ "Scatterplot",
94
+ "What is the height of the most prominent outlier in the data?",
95
+ "images/mini-VLAT/Scatterplot.png"
96
+ ],
97
+
98
+ [
99
+ "LineChart",
100
+ "What was the price of a barrel of oil in February 2020?",
101
+ "images/mini-VLAT/LineChart.png"
102
+ ],
103
+
104
+ [
105
+ "LineChart",
106
+ "In which month was the price of a barrel of oil the lowest in 2020?",
107
+ "images/mini-VLAT/LineChart.png"
108
+ ],
109
+
110
+ [
111
+ "LineChart",
112
+ "What was the price range of a barrel of oil in 2020?",
113
+ "images/mini-VLAT/LineChart.png"
114
+ ],
115
+
116
+ [
117
+ "LineChart",
118
+ "Over the course of the first quarter of 2020, the price of a barrel of oil was rising or falling?",
119
+ "images/mini-VLAT/LineChart.png"
120
+ ],
121
+
122
+ [
123
+ "LineChart",
124
+ "About how much did the price of a barrel of oil rise from April to August in 2020?",
125
+ "images/mini-VLAT/LineChart.png"
126
+ ],
127
+
128
+ [
129
+ "BubbleChart",
130
+ "What is the range of metro system lengths in the data shown in this chart?",
131
+ "images/mini-VLAT/BubbleChart.png"
132
+ ],
133
+
134
+ [
135
+ "BubbleChart",
136
+ "True or False: Tokyo has a bigger ridership than Guangzhow.",
137
+ "images/mini-VLAT/BubbleChart.png"
138
+ ],
139
+
140
+ [
141
+ "BubbleChart",
142
+ "True/False: In general, the ridership of the metro system increases as the number of stations increases.",
143
+ "images/mini-VLAT/BubbleChart.png"
144
+ ],
145
+
146
+ [
147
+ "BubbleChart",
148
+ "True/False: The average metro system in this chart has approximately 300 stations and is 200km in length.",
149
+ "images/mini-VLAT/BubbleChart.png"
150
+ ],
151
+
152
+ [
153
+ "BubbleChart",
154
+ "Which city's metro system has the largest number of stations?",
155
+ "images/mini-VLAT/BubbleChart.png"
156
+ ],
157
+
158
+
159
+
160
+
161
+
162
+
163
+
164
+
165
+ [
166
+ "BarChart",
167
+ "What is the average internet speed in Japan?",
168
+ "images/mini-VLAT/BarChart.png"
169
+ ],
170
+
171
+ [
172
+ "StackedBar",
173
+ "What is the cost of peanuts in Seoul?",
174
+ "images/mini-VLAT/StackedBar.png"
175
+ ],
176
+
177
+ [
178
+ "100%StackedBar",
179
+ "Which country has the lowest proportion of Gold medals?",
180
+ "images/mini-VLAT/Stacked100.png"
181
+ ],
182
+
183
+ [
184
+ "PieChart",
185
+ "What is the approximate global smartphone market share of Samsung?",
186
+ "images/mini-VLAT/PieChart.png"
187
+ ],
188
+
189
+ [
190
+ "Histogram",
191
+ "What distance have customers traveled in the taxi the most?",
192
+ "images/mini-VLAT/Histogram.png"
193
+ ],
194
+
195
+
196
+ [
197
+ "AreaChart",
198
+ "What was the average price of pount of coffee beans in October 2019?",
199
+ "images/mini-VLAT/AreaChart.png"
200
+ ],
201
+
202
+
203
+ [
204
+ "Choropleth",
205
+ "True/False: In 2020, the unemployment rate for Washington (WA) was higher than that of Wisconsin (WI).",
206
+ "images/mini-VLAT/Choropleth_New.png"
207
+ ],
208
+
209
+ [
210
+ "TreeMap",
211
+ "True/False: eBay is nested in the Software category.",
212
+ "images/mini-VLAT/TreeMap.png"
213
+ ]
214
+ ]
questions/VLAT_old.py ADDED
@@ -0,0 +1,325 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ VLAT_old_questions = [
2
+ [
3
+ "LineChart",
4
+ "What was the price of a barrel of oil in February 2015?",
5
+ "images/VLAT/LineChart.png"
6
+ ],
7
+
8
+ [
9
+ "LineChart",
10
+ "In which month was the price of a barrel of oil the lowest in 2015?",
11
+ "images/VLAT/LineChart.png"
12
+ ],
13
+
14
+ [
15
+ "LineChart",
16
+ "What was the price range of a barrel of oil in 2015?",
17
+ "images/VLAT/LineChart.png"
18
+ ],
19
+
20
+ [
21
+ "LineChart",
22
+ "Over the course of the second half of 2015, the price of a barrel of oil was increasing or decreasing?",
23
+ "images/VLAT/LineChart.png"
24
+ ],
25
+
26
+ [
27
+ "LineChart",
28
+ "About how much did the price of a barrel of oil fall from April to September in 2015?",
29
+ "images/VLAT/LineChart.png"
30
+ ],
31
+
32
+ [
33
+ "BarChart",
34
+ "What is the average internet speed in Japan?",
35
+ "images/VLAT/BarChart.png"
36
+ ],
37
+
38
+ [
39
+ "BarChart",
40
+ "In which country is the average internet speed the fastest in Asia?",
41
+ "images/VLAT/BarChart.png"
42
+ ],
43
+
44
+ [
45
+ "BarChart",
46
+ "What is the range of the average internet speed in Asia?",
47
+ "images/VLAT/BarChart.png"
48
+ ],
49
+
50
+ [
51
+ "BarChart",
52
+ "How many countries in Asia is the average internet speed slower than Thailand?",
53
+ "images/VLAT/BarChart.png"
54
+ ],
55
+
56
+ [
57
+ "StackedBar",
58
+ "What is the cost of peanuts in Las Vegas?",
59
+ "images/VLAT/StackedBar.png"
60
+ ],
61
+
62
+ [
63
+ "StackedBar",
64
+ "About what is the ratio of the cost of a sandwich to the total cost of room service in Seattle?",
65
+ "images/VLAT/StackedBar.png"
66
+ ],
67
+
68
+ [
69
+ "StackedBar",
70
+ "In which city is the cost of soda the highest?",
71
+ "images/VLAT/StackedBar.png"
72
+ ],
73
+
74
+ [
75
+ "StackedBar",
76
+ "True/False: The cost of vodka in Atlanta is higher than that of Honolulu.",
77
+ "images/VLAT/StackedBar.png"
78
+ ],
79
+
80
+ [
81
+ "StackedBar",
82
+ "True/False: The ratio of the cost of Soda to the cost of Water in Orlando is higher than that of Washington D.C.",
83
+ "images/VLAT/StackedBar.png"
84
+ ],
85
+
86
+ [
87
+ "100%StackedBar",
88
+ "What is the approval rating of Republicans among the people who have the education level of Postgraduate Study?",
89
+ "images/VLAT/Stacked100.png"
90
+ ],
91
+
92
+ [
93
+ "100%StackedBar",
94
+ "What is the education level of people in which the Democrats have the lowest approval rating",
95
+ "images/VLAT/Stacked100.png"
96
+ ],
97
+
98
+ [
99
+ "100%StackedBar",
100
+ "True/False: The approval rating of Republicans for the people who have the education level of Some College Degree is lower than that for the people who have the education level of Postgraduate Study.",
101
+ "images/VLAT/Stacked100.png"
102
+ ],
103
+
104
+ [
105
+ "PieChart",
106
+ "About what is the global smartphone market share of Samsung?",
107
+ "images/VLAT/PieChart.png"
108
+ ],
109
+
110
+ [
111
+ "PieChart",
112
+ "In which company is the global smartphone market share the smallest?",
113
+ "images/VLAT/PieChart.png"
114
+ ],
115
+
116
+ [
117
+ "PieChart",
118
+ "True/False: The global smartphone market share of Apple is larger than that of Huawei.",
119
+ "images/VLAT/PieChart.png"
120
+ ],
121
+
122
+ [
123
+ "Histogram",
124
+ "How many people have rated the taxi between 4.0 and 4.2?",
125
+ "images/VLAT/Histogram.png"
126
+ ],
127
+
128
+ [
129
+ "Histogram",
130
+ "What is the rating that the people have rated the taxi the most?",
131
+ "images/VLAT/Histogram.png"
132
+ ],
133
+
134
+ [
135
+ "Histogram",
136
+ "True/False: The distribution of the taxi passenger rating is generally skewed to the left.",
137
+ "images/VLAT/Histogram.png"
138
+ ],
139
+
140
+ [
141
+ "Histogram",
142
+ "True/False: More people have rated the taxi between 4.6 and 4.8 than between 4.2 and 4.4.",
143
+ "images/VLAT/Histogram.png"
144
+ ],
145
+
146
+ [
147
+ "Scatterplot",
148
+ "What is the weight for the person who is 165.1 cm tall?",
149
+ "images/VLAT/Scatterplot.png"
150
+ ],
151
+
152
+ [
153
+ "Scatterplot",
154
+ "What is the height for the tallest person among the 85 males?",
155
+ "images/VLAT/Scatterplot.png"
156
+ ],
157
+
158
+ [
159
+ "Scatterplot",
160
+ "What is the range in weight for the 85 males?",
161
+ "images/VLAT/Scatterplot.png"
162
+ ],
163
+
164
+ [
165
+ "Scatterplot",
166
+ "What is the height for a person who lies outside the others the most?",
167
+ "images/VLAT/Scatterplot.png"
168
+ ],
169
+
170
+ [
171
+ "Scatterplot",
172
+ "True/False: A group of males are gathered around the height of 176 cm and the weight of 70 kg.",
173
+ "images/VLAT/Scatterplot.png"
174
+ ],
175
+
176
+ [
177
+ "Scatterplot",
178
+ "True/False: There is a negative linear relationship between the height and the weight of the 85 males.",
179
+ "images/VLAT/Scatterplot.png"
180
+ ],
181
+
182
+ [
183
+ "Scatterplot",
184
+ "True/False: The weights for males with the height of 188 cm are all the same.",
185
+ "images/VLAT/Scatterplot.png"
186
+ ],
187
+
188
+ [
189
+ "AreaChart",
190
+ "What was the average price of a pound of coffee beans in September 2013?",
191
+ "images/VLAT/AreaChart.png"
192
+ ],
193
+
194
+ [
195
+ "AreaChart",
196
+ "When was the average price of a pound of coffee beans at minimum?",
197
+ "images/VLAT/AreaChart.png"
198
+ ],
199
+
200
+ [
201
+ "AreaChart",
202
+ "What was the range of the average price of a pound of coffee beans between January 2013 and December 2014?",
203
+ "images/VLAT/AreaChart.png"
204
+ ],
205
+
206
+ [
207
+ "AreaChart",
208
+ "Over the course of 2013, the average price of a pound of coffee beans was ____________?",
209
+ "images/VLAT/AreaChart.png"
210
+ ],
211
+
212
+ [
213
+ "StackedArea",
214
+ "What was the number of girls named 'Amelia' in 2010 in the UK?",
215
+ "images/VLAT/StackedArea.png"
216
+ ],
217
+
218
+ [
219
+ "StackedArea",
220
+ "About what was the ratio of the number of girls named β€˜Olivia’ to those named β€˜Isla’ in 2014 in the UK?",
221
+ "images/VLAT/StackedArea.png"
222
+ ],
223
+
224
+ [
225
+ "StackedArea",
226
+ "Over the course of years between 2009 and 2014, when was the number of girls named 'Amelia' at the maximum?",
227
+ "images/VLAT/StackedArea.png"
228
+ ],
229
+
230
+ [
231
+ "StackedArea",
232
+ "The number of girls named 'Isla' was __________ from 2009 to 2012?",
233
+ "images/VLAT/StackedArea.png"
234
+ ],
235
+
236
+ [
237
+ "StackedArea",
238
+ "True/False: In the UK, the number of girls named 'Amelia' in 2014 was more than it was in 2013",
239
+ "images/VLAT/StackedArea.png"
240
+ ],
241
+
242
+ [
243
+ "StackedArea",
244
+ "True/False: Over the course of years between 2009 and 2014, the number of girls named 'Isla' was always more than 'Olivia'",
245
+ "images/VLAT/StackedArea.png"
246
+ ],
247
+
248
+ [
249
+ "BubbleChart",
250
+ "What is the total length of the metro system in Beijing?",
251
+ "images/VLAT/BubbleChart.png"
252
+ ],
253
+
254
+ [
255
+ "BubbleChart",
256
+ "Which city's metro system has the largest number of stations?",
257
+ "images/VLAT/BubbleChart.png"
258
+ ],
259
+
260
+ [
261
+ "BubbleChart",
262
+ "What is the range of the total length of the metro systems?",
263
+ "images/VLAT/BubbleChart.png"
264
+ ],
265
+
266
+ [
267
+ "BubbleChart",
268
+ "Which city's metro system does lie outside the relationship between the total system length and the number of stations most?",
269
+ "images/VLAT/BubbleChart.png"
270
+ ],
271
+
272
+ [
273
+ "BubbleChart",
274
+ "True/False: A group of the metro systems of the world has approximately 300 stations and around a 200km system length.",
275
+ "images/VLAT/BubbleChart.png"
276
+ ],
277
+
278
+ [
279
+ "BubbleChart",
280
+ "True/False: In general, the ridership of the metro system increases as the number of stations increases.",
281
+ "images/VLAT/BubbleChart.png"
282
+ ],
283
+
284
+ [
285
+ "BubbleChart",
286
+ "True/False: The metro system in Shanghai has more ridership than the metro system in Beijing.",
287
+ "images/VLAT/BubbleChart.png"
288
+ ],
289
+
290
+ [
291
+ "Choropleth",
292
+ "What was the unemployment rate for Indiana (IN) in 2015?",
293
+ "images/VLAT/Choropleth_New.png"
294
+ ],
295
+
296
+ [
297
+ "Choropleth",
298
+ "In which state was the unemployment rate the highest in 2015?",
299
+ "images/VLAT/Choropleth_New.png"
300
+ ],
301
+
302
+ [
303
+ "Choropleth",
304
+ "True/False: In 2020, the unemployment rate for Washington (WA) was higher than that of Wisconsin (WI).",
305
+ "images/VLAT/Choropleth_New.png"
306
+ ],
307
+
308
+ [
309
+ "TreeMap",
310
+ "For which website was the number of unique visitors the largest in 2010?",
311
+ "images/VLAT/TreeMap.png"
312
+ ],
313
+
314
+ [
315
+ "TreeMap",
316
+ "True/False: The number of unique visitors for Amazon was more than that of Yahoo in 2010.",
317
+ "images/VLAT/TreeMap.png"
318
+ ],
319
+
320
+ [
321
+ "TreeMap",
322
+ "True/False: Samsung is nested in the Financial category.",
323
+ "images/VLAT/TreeMap.png"
324
+ ]
325
+ ]
questions/__init__.py ADDED
File without changes
questions/mini_VLAT.py ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ mini_VLAT_questions=[
2
+ [
3
+ "LineChart",
4
+ "What was the price of a barrel of oil in February 2020?",
5
+ "images/mini-VLAT/LineChart.png"
6
+ ],
7
+
8
+ [
9
+ "BarChart",
10
+ "What is the average internet speed in Japan?",
11
+ "images/mini-VLAT/BarChart.png"
12
+ ],
13
+
14
+ [
15
+ "StackedBar",
16
+ "What is the cost of peanuts in Seoul?",
17
+ "images/mini-VLAT/StackedBar.png"
18
+ ],
19
+
20
+ [
21
+ "100%StackedBar",
22
+ "Which country has the lowest proportion of Gold medals?",
23
+ "images/mini-VLAT/Stacked100.png"
24
+ ],
25
+
26
+ [
27
+ "PieChart",
28
+ "What is the approximate global smartphone market share of Samsung?",
29
+ "images/mini-VLAT/PieChart.png"
30
+ ],
31
+
32
+ [
33
+ "Histogram",
34
+ "What distance have customers traveled in the taxi the most?",
35
+ "images/mini-VLAT/Histogram.png"
36
+ ],
37
+
38
+ [
39
+ "Scatterplot",
40
+ "True/False: There is a negative linear relationship between the height and the weight of the 85 males.",
41
+ "images/mini-VLAT/Scatterplot.png"
42
+ ],
43
+
44
+ [
45
+ "AreaChart",
46
+ "What was the average price of pount of coffee beans in October 2019?",
47
+ "images/mini-VLAT/AreaChart.png"
48
+ ],
49
+
50
+ [
51
+ "StackedArea",
52
+ "What was the ratio of girls named 'Isla' to girls named 'Amelia' in 2012 in the UK?",
53
+ "images/mini-VLAT/StackedArea.png"
54
+ ],
55
+
56
+ [
57
+ "BubbleChart",
58
+ "Which city's metro system has the largest number of stations?",
59
+ "images/mini-VLAT/BubbleChart.png"
60
+ ],
61
+
62
+ [
63
+ "Choropleth",
64
+ "True/False: In 2020, the unemployment rate for Washington (WA) was higher than that of Wisconsin (WI).",
65
+ "images/mini-VLAT/Choropleth_New.png"
66
+ ],
67
+
68
+ [
69
+ "TreeMap",
70
+ "True/False: eBay is nested in the Software category.",
71
+ "images/mini-VLAT/TreeMap.png"
72
+ ]
73
+ ]