Spaces:

uw-insight-lab
/

Probing-Vis-Literacy-of-VLMs

Paused

AustingDong commited on Mar 6

Commit

9a42c30

1 Parent(s): 9d76fc2

utilize llava 1.5

Files changed (2) hide show

app.py CHANGED Viewed

@@ -114,8 +114,8 @@ def multimodal_understanding(model_type,
             start = 620
         elif model_name.split('-')[0] == "ChartGemma":
             start = 1024
-        else:
-            start = 512
         if activation_map_method == "GradCAM":
             # target_layers = vl_gpt.vision_model.vision_tower.blocks
@@ -286,7 +286,7 @@ with gr.Blocks() as demo:
             activation_map_output = gr.Gallery(label="activation Map", height=300, columns=1)
         with gr.Column():
-            model_selector = gr.Dropdown(choices=["Clip", "ChartGemma-3B", "Janus-1B", "Janus-7B", "LLaVA-v1.6-7B"], value="Clip", label="model")
             response_type = gr.Dropdown(choices=["Visualization only"], value="Visualization only", label="response_type")
             focus = gr.Dropdown(choices=["Visual Encoder"], value="Visual Encoder", label="focus")
             activation_map_method = gr.Dropdown(choices=["GradCAM"], value="GradCAM", label="activation map type")

             start = 620
         elif model_name.split('-')[0] == "ChartGemma":
             start = 1024
+        elif model_name.split('-')[0] == "LLaVA":
+            start = 581
         if activation_map_method == "GradCAM":
             # target_layers = vl_gpt.vision_model.vision_tower.blocks
             activation_map_output = gr.Gallery(label="activation Map", height=300, columns=1)
         with gr.Column():
+            model_selector = gr.Dropdown(choices=["Clip", "ChartGemma-3B", "Janus-1B", "Janus-7B", "LLaVA-1.5-7B"], value="Clip", label="model")
             response_type = gr.Dropdown(choices=["Visualization only"], value="Visualization only", label="response_type")
             focus = gr.Dropdown(choices=["Visual Encoder"], value="Visual Encoder", label="focus")
             activation_map_method = gr.Dropdown(choices=["GradCAM"], value="GradCAM", label="activation map type")

demo/model_utils.py CHANGED Viewed

@@ -119,8 +119,8 @@ class LLaVA_Utils(Model_Utils):
     def init_LLaVA(self):
-        # model_path = "llava-hf/llava-1.5-7b-hf"
-        model_path = "llava-hf/llava-v1.6-vicuna-7b-hf"
         config = AutoConfig.from_pretrained(model_path)
         self.vl_gpt = LlavaNextForConditionalGeneration.from_pretrained(model_path,

     def init_LLaVA(self):
+        model_path = "llava-hf/llava-1.5-7b-hf"
+        # model_path = "llava-hf/llava-v1.6-vicuna-7b-hf"
         config = AutoConfig.from_pretrained(model_path)
         self.vl_gpt = LlavaNextForConditionalGeneration.from_pretrained(model_path,