AustingDong
commited on
Commit
·
5da9d34
1
Parent(s):
22993a5
removed llava onevision
Browse files- app.py +31 -28
- demo/model_utils.py +2 -1
app.py
CHANGED
@@ -291,7 +291,7 @@ with gr.Blocks() as demo:
|
|
291 |
activation_map_output = gr.Gallery(label="activation Map", height=300, columns=1)
|
292 |
|
293 |
with gr.Column():
|
294 |
-
model_selector = gr.Dropdown(choices=["Clip", "ChartGemma-3B", "Janus-1B", "Janus-7B", "LLaVA-1.5-7B"
|
295 |
response_type = gr.Dropdown(choices=["Visualization only"], value="Visualization only", label="response_type")
|
296 |
focus = gr.Dropdown(choices=["Visual Encoder"], value="Visual Encoder", label="focus")
|
297 |
activation_map_method = gr.Dropdown(choices=["GradCAM"], value="GradCAM", label="activation map type")
|
@@ -347,62 +347,65 @@ with gr.Blocks() as demo:
|
|
347 |
examples_inpainting = gr.Examples(
|
348 |
label="Multimodal Understanding examples",
|
349 |
examples=[
|
350 |
-
|
351 |
[
|
352 |
-
"What
|
353 |
-
"images/
|
354 |
],
|
|
|
355 |
[
|
356 |
"What is the average internet speed in Japan?",
|
357 |
"images/BarChart.png"
|
358 |
],
|
|
|
359 |
[
|
360 |
-
"What
|
361 |
-
"images/
|
362 |
-
],
|
363 |
-
[
|
364 |
-
"Which city's metro system has the largest number of stations?",
|
365 |
-
"images/BubbleChart.png"
|
366 |
],
|
367 |
|
368 |
[
|
369 |
-
"
|
370 |
-
"images/
|
371 |
],
|
372 |
-
|
|
|
|
|
|
|
|
|
|
|
373 |
[
|
374 |
"What distance have customers traveled in the taxi the most?",
|
375 |
"images/Histogram.png"
|
376 |
],
|
377 |
|
378 |
[
|
379 |
-
"
|
380 |
-
"images/
|
381 |
],
|
382 |
|
383 |
[
|
384 |
-
"
|
385 |
-
"images/
|
386 |
],
|
387 |
|
388 |
[
|
389 |
-
"
|
390 |
-
"images/
|
391 |
-
],
|
392 |
-
|
393 |
-
[
|
394 |
-
"Which country has the lowest proportion of Gold medals?",
|
395 |
-
"images/Stacked100.png"
|
396 |
],
|
397 |
|
398 |
[
|
399 |
-
"
|
400 |
-
"images/
|
|
|
|
|
|
|
|
|
|
|
401 |
],
|
402 |
|
403 |
[
|
404 |
-
"
|
405 |
-
"images/
|
406 |
]
|
407 |
|
408 |
],
|
|
|
291 |
activation_map_output = gr.Gallery(label="activation Map", height=300, columns=1)
|
292 |
|
293 |
with gr.Column():
|
294 |
+
model_selector = gr.Dropdown(choices=["Clip", "ChartGemma-3B", "Janus-1B", "Janus-7B", "LLaVA-1.5-7B"], value="Clip", label="model")
|
295 |
response_type = gr.Dropdown(choices=["Visualization only"], value="Visualization only", label="response_type")
|
296 |
focus = gr.Dropdown(choices=["Visual Encoder"], value="Visual Encoder", label="focus")
|
297 |
activation_map_method = gr.Dropdown(choices=["GradCAM"], value="GradCAM", label="activation map type")
|
|
|
347 |
examples_inpainting = gr.Examples(
|
348 |
label="Multimodal Understanding examples",
|
349 |
examples=[
|
350 |
+
|
351 |
[
|
352 |
+
"What was the price of a barrel of oil in February 2020?",
|
353 |
+
"images/LineChart.png"
|
354 |
],
|
355 |
+
|
356 |
[
|
357 |
"What is the average internet speed in Japan?",
|
358 |
"images/BarChart.png"
|
359 |
],
|
360 |
+
|
361 |
[
|
362 |
+
"What is the cost of peanuts in Seoul?",
|
363 |
+
"images/StackedBar.png"
|
|
|
|
|
|
|
|
|
364 |
],
|
365 |
|
366 |
[
|
367 |
+
"Which country has the lowest proportion of Gold medals?",
|
368 |
+
"images/Stacked100.png"
|
369 |
],
|
370 |
+
|
371 |
+
[
|
372 |
+
"What is the approximate global smartphone market share of Samsung?",
|
373 |
+
"images/PieChart.png"
|
374 |
+
],
|
375 |
+
|
376 |
[
|
377 |
"What distance have customers traveled in the taxi the most?",
|
378 |
"images/Histogram.png"
|
379 |
],
|
380 |
|
381 |
[
|
382 |
+
"True/False: There is a negative linear relationship between the height and the weight of the 85 males.",
|
383 |
+
"images/Scatterplot.png"
|
384 |
],
|
385 |
|
386 |
[
|
387 |
+
"What was the average price of pount of coffee beans in October 2019?",
|
388 |
+
"images/AreaChart.png"
|
389 |
],
|
390 |
|
391 |
[
|
392 |
+
"What was the ratio of girls named 'Isla' to girls named 'Amelia' in 2012 in the UK?",
|
393 |
+
"images/StackedArea.png"
|
|
|
|
|
|
|
|
|
|
|
394 |
],
|
395 |
|
396 |
[
|
397 |
+
"Which city's metro system has the largest number of stations?",
|
398 |
+
"images/BubbleChart.png"
|
399 |
+
],
|
400 |
+
|
401 |
+
[
|
402 |
+
"True/False: In 2020, the unemployment rate for Washington (WA) was higher than that of Wisconsin (WI).",
|
403 |
+
"images/Choropleth_New.png"
|
404 |
],
|
405 |
|
406 |
[
|
407 |
+
"True/False: eBay is nested in the Software category.",
|
408 |
+
"images/TreeMap.png"
|
409 |
]
|
410 |
|
411 |
],
|
demo/model_utils.py
CHANGED
@@ -162,8 +162,9 @@ class LLaVA_Utils(Model_Utils):
|
|
162 |
],
|
163 |
},
|
164 |
]
|
|
|
165 |
prompt = self.processor.apply_chat_template(conversation, add_generation_prompt=True)
|
166 |
-
pil_images = [Image.fromarray(image)]
|
167 |
prepare_inputs = self.processor(
|
168 |
images=pil_images, text=prompt, return_tensors="pt"
|
169 |
).to(self.cuda_device, dtype=self.dtype)
|
|
|
162 |
],
|
163 |
},
|
164 |
]
|
165 |
+
|
166 |
prompt = self.processor.apply_chat_template(conversation, add_generation_prompt=True)
|
167 |
+
pil_images = [Image.fromarray(image).resize((384, 384))]
|
168 |
prepare_inputs = self.processor(
|
169 |
images=pil_images, text=prompt, return_tensors="pt"
|
170 |
).to(self.cuda_device, dtype=self.dtype)
|