AustingDong
commited on
Commit
·
6a0d13c
1
Parent(s):
035a152
modified font, corrected model name
Browse files- app.py +1 -1
- demo/cam.py +7 -6
- demo/model_utils.py +2 -2
app.py
CHANGED
@@ -286,7 +286,7 @@ with gr.Blocks() as demo:
|
|
286 |
saliency_map_output = gr.Gallery(label="Saliency Map", height=300, columns=1)
|
287 |
|
288 |
with gr.Column():
|
289 |
-
model_selector = gr.Dropdown(choices=["Clip", "ChartGemma-2B", "Janus-1B", "Janus-7B", "LLaVA-
|
290 |
response_type = gr.Dropdown(choices=["Visualization only"], value="Visualization only", label="response_type")
|
291 |
focus = gr.Dropdown(choices=["Visual Encoder"], value="Visual Encoder", label="focus")
|
292 |
saliency_map_method = gr.Dropdown(choices=["GradCAM"], value="GradCAM", label="saliency map type")
|
|
|
286 |
saliency_map_output = gr.Gallery(label="Saliency Map", height=300, columns=1)
|
287 |
|
288 |
with gr.Column():
|
289 |
+
model_selector = gr.Dropdown(choices=["Clip", "ChartGemma-2B", "Janus-1B", "Janus-7B", "LLaVA-v1.6-Mistral-7B"], value="Clip", label="model")
|
290 |
response_type = gr.Dropdown(choices=["Visualization only"], value="Visualization only", label="response_type")
|
291 |
focus = gr.Dropdown(choices=["Visual Encoder"], value="Visual Encoder", label="focus")
|
292 |
saliency_map_method = gr.Dropdown(choices=["GradCAM"], value="GradCAM", label="saliency map type")
|
demo/cam.py
CHANGED
@@ -11,12 +11,13 @@ from demo.modify_llama import *
|
|
11 |
|
12 |
|
13 |
class AttentionGuidedCAM:
|
14 |
-
def __init__(self, model):
|
15 |
self.model = model
|
16 |
self.gradients = []
|
17 |
self.activations = []
|
18 |
self.hooks = []
|
19 |
-
|
|
|
20 |
|
21 |
def _register_hooks(self):
|
22 |
""" Registers hooks to extract activations and gradients from ALL attention layers. """
|
@@ -309,7 +310,7 @@ class AttentionGuidedCAMJanus(AttentionGuidedCAM):
|
|
309 |
class AttentionGuidedCAMLLaVA(AttentionGuidedCAM):
|
310 |
def __init__(self, model, target_layers):
|
311 |
self.target_layers = target_layers
|
312 |
-
super().__init__(model)
|
313 |
self._modify_layers()
|
314 |
self._register_hooks_activations()
|
315 |
|
@@ -439,7 +440,7 @@ class AttentionGuidedCAMLLaVA(AttentionGuidedCAM):
|
|
439 |
class AttentionGuidedCAMChartGemma(AttentionGuidedCAM):
|
440 |
def __init__(self, model, target_layers):
|
441 |
self.target_layers = target_layers
|
442 |
-
super().__init__(model)
|
443 |
self._modify_layers()
|
444 |
self._register_hooks_activations()
|
445 |
|
@@ -473,7 +474,7 @@ class AttentionGuidedCAMChartGemma(AttentionGuidedCAM):
|
|
473 |
outputs_raw = self.model(**inputs)
|
474 |
|
475 |
self.model.zero_grad()
|
476 |
-
print(outputs_raw)
|
477 |
# loss = self.target_layers[-1].attention_map.sum()
|
478 |
loss = outputs_raw.logits.max(dim=-1).values.sum()
|
479 |
loss.backward()
|
@@ -616,7 +617,7 @@ def generate_gradcam(
|
|
616 |
Returns:
|
617 |
PIL.Image: The image overlaid with the Grad-CAM heatmap.
|
618 |
"""
|
619 |
-
print("Generating Grad-CAM with shape:", cam.shape)
|
620 |
|
621 |
if normalize:
|
622 |
cam_min, cam_max = cam.min(), cam.max()
|
|
|
11 |
|
12 |
|
13 |
class AttentionGuidedCAM:
|
14 |
+
def __init__(self, model, register=True):
|
15 |
self.model = model
|
16 |
self.gradients = []
|
17 |
self.activations = []
|
18 |
self.hooks = []
|
19 |
+
if register:
|
20 |
+
self._register_hooks()
|
21 |
|
22 |
def _register_hooks(self):
|
23 |
""" Registers hooks to extract activations and gradients from ALL attention layers. """
|
|
|
310 |
class AttentionGuidedCAMLLaVA(AttentionGuidedCAM):
|
311 |
def __init__(self, model, target_layers):
|
312 |
self.target_layers = target_layers
|
313 |
+
super().__init__(model, register=False)
|
314 |
self._modify_layers()
|
315 |
self._register_hooks_activations()
|
316 |
|
|
|
440 |
class AttentionGuidedCAMChartGemma(AttentionGuidedCAM):
|
441 |
def __init__(self, model, target_layers):
|
442 |
self.target_layers = target_layers
|
443 |
+
super().__init__(model, register=False)
|
444 |
self._modify_layers()
|
445 |
self._register_hooks_activations()
|
446 |
|
|
|
474 |
outputs_raw = self.model(**inputs)
|
475 |
|
476 |
self.model.zero_grad()
|
477 |
+
# print(outputs_raw)
|
478 |
# loss = self.target_layers[-1].attention_map.sum()
|
479 |
loss = outputs_raw.logits.max(dim=-1).values.sum()
|
480 |
loss.backward()
|
|
|
617 |
Returns:
|
618 |
PIL.Image: The image overlaid with the Grad-CAM heatmap.
|
619 |
"""
|
620 |
+
# print("Generating Grad-CAM with shape:", cam.shape)
|
621 |
|
622 |
if normalize:
|
623 |
cam_min, cam_max = cam.min(), cam.max()
|
demo/model_utils.py
CHANGED
@@ -45,7 +45,7 @@ class Clip_Utils(Model_Utils):
|
|
45 |
@spaces.GPU(duration=120)
|
46 |
def prepare_inputs(self, question_lst, image):
|
47 |
image = Image.fromarray(image)
|
48 |
-
print("image_size: ", image.size)
|
49 |
inputs = self.processor(text=question_lst, images=image, return_tensors="pt", padding=True)
|
50 |
return inputs
|
51 |
|
@@ -228,7 +228,7 @@ class ChartGemma_Utils(Model_Utils):
|
|
228 |
|
229 |
|
230 |
|
231 |
-
def add_title_to_image(image, title, font_size=
|
232 |
"""Adds a title above an image using PIL and textbbox()."""
|
233 |
img_width, img_height = image.size
|
234 |
|
|
|
45 |
@spaces.GPU(duration=120)
|
46 |
def prepare_inputs(self, question_lst, image):
|
47 |
image = Image.fromarray(image)
|
48 |
+
# print("image_size: ", image.size)
|
49 |
inputs = self.processor(text=question_lst, images=image, return_tensors="pt", padding=True)
|
50 |
return inputs
|
51 |
|
|
|
228 |
|
229 |
|
230 |
|
231 |
+
def add_title_to_image(image, title, font_size=50):
|
232 |
"""Adds a title above an image using PIL and textbbox()."""
|
233 |
img_width, img_height = image.size
|
234 |
|