AustingDong commited on
Commit
6a0d13c
·
1 Parent(s): 035a152

modified font, corrected model name

Browse files
Files changed (3) hide show
  1. app.py +1 -1
  2. demo/cam.py +7 -6
  3. demo/model_utils.py +2 -2
app.py CHANGED
@@ -286,7 +286,7 @@ with gr.Blocks() as demo:
286
  saliency_map_output = gr.Gallery(label="Saliency Map", height=300, columns=1)
287
 
288
  with gr.Column():
289
- model_selector = gr.Dropdown(choices=["Clip", "ChartGemma-2B", "Janus-1B", "Janus-7B", "LLaVA-1.5-7B"], value="Clip", label="model")
290
  response_type = gr.Dropdown(choices=["Visualization only"], value="Visualization only", label="response_type")
291
  focus = gr.Dropdown(choices=["Visual Encoder"], value="Visual Encoder", label="focus")
292
  saliency_map_method = gr.Dropdown(choices=["GradCAM"], value="GradCAM", label="saliency map type")
 
286
  saliency_map_output = gr.Gallery(label="Saliency Map", height=300, columns=1)
287
 
288
  with gr.Column():
289
+ model_selector = gr.Dropdown(choices=["Clip", "ChartGemma-2B", "Janus-1B", "Janus-7B", "LLaVA-v1.6-Mistral-7B"], value="Clip", label="model")
290
  response_type = gr.Dropdown(choices=["Visualization only"], value="Visualization only", label="response_type")
291
  focus = gr.Dropdown(choices=["Visual Encoder"], value="Visual Encoder", label="focus")
292
  saliency_map_method = gr.Dropdown(choices=["GradCAM"], value="GradCAM", label="saliency map type")
demo/cam.py CHANGED
@@ -11,12 +11,13 @@ from demo.modify_llama import *
11
 
12
 
13
  class AttentionGuidedCAM:
14
- def __init__(self, model):
15
  self.model = model
16
  self.gradients = []
17
  self.activations = []
18
  self.hooks = []
19
- self._register_hooks()
 
20
 
21
  def _register_hooks(self):
22
  """ Registers hooks to extract activations and gradients from ALL attention layers. """
@@ -309,7 +310,7 @@ class AttentionGuidedCAMJanus(AttentionGuidedCAM):
309
  class AttentionGuidedCAMLLaVA(AttentionGuidedCAM):
310
  def __init__(self, model, target_layers):
311
  self.target_layers = target_layers
312
- super().__init__(model)
313
  self._modify_layers()
314
  self._register_hooks_activations()
315
 
@@ -439,7 +440,7 @@ class AttentionGuidedCAMLLaVA(AttentionGuidedCAM):
439
  class AttentionGuidedCAMChartGemma(AttentionGuidedCAM):
440
  def __init__(self, model, target_layers):
441
  self.target_layers = target_layers
442
- super().__init__(model)
443
  self._modify_layers()
444
  self._register_hooks_activations()
445
 
@@ -473,7 +474,7 @@ class AttentionGuidedCAMChartGemma(AttentionGuidedCAM):
473
  outputs_raw = self.model(**inputs)
474
 
475
  self.model.zero_grad()
476
- print(outputs_raw)
477
  # loss = self.target_layers[-1].attention_map.sum()
478
  loss = outputs_raw.logits.max(dim=-1).values.sum()
479
  loss.backward()
@@ -616,7 +617,7 @@ def generate_gradcam(
616
  Returns:
617
  PIL.Image: The image overlaid with the Grad-CAM heatmap.
618
  """
619
- print("Generating Grad-CAM with shape:", cam.shape)
620
 
621
  if normalize:
622
  cam_min, cam_max = cam.min(), cam.max()
 
11
 
12
 
13
  class AttentionGuidedCAM:
14
+ def __init__(self, model, register=True):
15
  self.model = model
16
  self.gradients = []
17
  self.activations = []
18
  self.hooks = []
19
+ if register:
20
+ self._register_hooks()
21
 
22
  def _register_hooks(self):
23
  """ Registers hooks to extract activations and gradients from ALL attention layers. """
 
310
  class AttentionGuidedCAMLLaVA(AttentionGuidedCAM):
311
  def __init__(self, model, target_layers):
312
  self.target_layers = target_layers
313
+ super().__init__(model, register=False)
314
  self._modify_layers()
315
  self._register_hooks_activations()
316
 
 
440
  class AttentionGuidedCAMChartGemma(AttentionGuidedCAM):
441
  def __init__(self, model, target_layers):
442
  self.target_layers = target_layers
443
+ super().__init__(model, register=False)
444
  self._modify_layers()
445
  self._register_hooks_activations()
446
 
 
474
  outputs_raw = self.model(**inputs)
475
 
476
  self.model.zero_grad()
477
+ # print(outputs_raw)
478
  # loss = self.target_layers[-1].attention_map.sum()
479
  loss = outputs_raw.logits.max(dim=-1).values.sum()
480
  loss.backward()
 
617
  Returns:
618
  PIL.Image: The image overlaid with the Grad-CAM heatmap.
619
  """
620
+ # print("Generating Grad-CAM with shape:", cam.shape)
621
 
622
  if normalize:
623
  cam_min, cam_max = cam.min(), cam.max()
demo/model_utils.py CHANGED
@@ -45,7 +45,7 @@ class Clip_Utils(Model_Utils):
45
  @spaces.GPU(duration=120)
46
  def prepare_inputs(self, question_lst, image):
47
  image = Image.fromarray(image)
48
- print("image_size: ", image.size)
49
  inputs = self.processor(text=question_lst, images=image, return_tensors="pt", padding=True)
50
  return inputs
51
 
@@ -228,7 +228,7 @@ class ChartGemma_Utils(Model_Utils):
228
 
229
 
230
 
231
- def add_title_to_image(image, title, font_size=20):
232
  """Adds a title above an image using PIL and textbbox()."""
233
  img_width, img_height = image.size
234
 
 
45
  @spaces.GPU(duration=120)
46
  def prepare_inputs(self, question_lst, image):
47
  image = Image.fromarray(image)
48
+ # print("image_size: ", image.size)
49
  inputs = self.processor(text=question_lst, images=image, return_tensors="pt", padding=True)
50
  return inputs
51
 
 
228
 
229
 
230
 
231
+ def add_title_to_image(image, title, font_size=50):
232
  """Adds a title above an image using PIL and textbbox()."""
233
  img_width, img_height = image.size
234