Spaces:

uw-insight-lab
/

Probing-Vis-Literacy-of-VLMs

Paused

AustingDong commited on Mar 6

Commit

a25a8bd

1 Parent(s): 789cbe1

change llava model

Files changed (2) hide show

app.py CHANGED Viewed

@@ -286,7 +286,7 @@ with gr.Blocks() as demo:
             activation_map_output = gr.Gallery(label="activation Map", height=300, columns=1)
         with gr.Column():
-            model_selector = gr.Dropdown(choices=["Clip", "ChartGemma-3B", "Janus-1B", "Janus-7B", "LLaVA-v1.6-Mistral-7B"], value="Clip", label="model")
             response_type = gr.Dropdown(choices=["Visualization only"], value="Visualization only", label="response_type")
             focus = gr.Dropdown(choices=["Visual Encoder"], value="Visual Encoder", label="focus")
             activation_map_method = gr.Dropdown(choices=["GradCAM"], value="GradCAM", label="activation map type")

             activation_map_output = gr.Gallery(label="activation Map", height=300, columns=1)
         with gr.Column():
+            model_selector = gr.Dropdown(choices=["Clip", "ChartGemma-3B", "Janus-1B", "Janus-7B", "LLaVA-v1.6-7B"], value="Clip", label="model")
             response_type = gr.Dropdown(choices=["Visualization only"], value="Visualization only", label="response_type")
             focus = gr.Dropdown(choices=["Visual Encoder"], value="Visual Encoder", label="focus")
             activation_map_method = gr.Dropdown(choices=["GradCAM"], value="GradCAM", label="activation map type")

demo/model_utils.py CHANGED Viewed

@@ -120,7 +120,7 @@ class LLaVA_Utils(Model_Utils):
     def init_LLaVA(self):
         # model_path = "llava-hf/llava-1.5-7b-hf"
-        model_path = "llava-hf/llava-v1.6-mistral-7b-hf"
         config = AutoConfig.from_pretrained(model_path)
         self.vl_gpt = LlavaNextForConditionalGeneration.from_pretrained(model_path,
@@ -138,11 +138,10 @@ class LLaVA_Utils(Model_Utils):
     def prepare_inputs(self, question, image):
         conversation = [
             {
-            "role": "user",
-            "content": [
-                {"type": "text", "text": question},
-                {"type": "image"},
                 ],
             },
         ]

     def init_LLaVA(self):
         # model_path = "llava-hf/llava-1.5-7b-hf"
+        model_path = "llava-hf/llava-v1.6-vicuna-7b-hf"
         config = AutoConfig.from_pretrained(model_path)
         self.vl_gpt = LlavaNextForConditionalGeneration.from_pretrained(model_path,
     def prepare_inputs(self, question, image):
         conversation = [
             {
+                "role": "user",
+                "content": [
+                    {"type": "text", "text": question},
+                    {"type": "image"},
                 ],
             },
         ]