AustingDong commited on
Commit
035a152
·
1 Parent(s): b9b9d9b

fixed llava

Browse files
Files changed (2) hide show
  1. app.py +4 -4
  2. demo/model_utils.py +3 -3
app.py CHANGED
@@ -208,13 +208,13 @@ def model_slider_change(model_type):
208
  set_seed()
209
  model_utils = LLaVA_Utils()
210
  vl_gpt, tokenizer = model_utils.init_LLaVA()
211
- language_model_max_layer = 24
212
- language_model_best_layer = 8
213
 
214
  res = (
215
  gr.Dropdown(choices=["Visualization only", "answer + visualization"], value="Visualization only", label="response_type"),
216
- gr.Slider(minimum=1, maximum=24, value=24, step=1, label="visualization layers min"),
217
- gr.Slider(minimum=1, maximum=24, value=24, step=1, label="visualization layers max"),
218
  gr.Dropdown(choices=["Language Model"], value="Language Model", label="focus"),
219
  gr.Dropdown(choices=["GradCAM"], value="GradCAM", label="saliency map type")
220
  )
 
208
  set_seed()
209
  model_utils = LLaVA_Utils()
210
  vl_gpt, tokenizer = model_utils.init_LLaVA()
211
+ language_model_max_layer = 32
212
+ language_model_best_layer = 24
213
 
214
  res = (
215
  gr.Dropdown(choices=["Visualization only", "answer + visualization"], value="Visualization only", label="response_type"),
216
+ gr.Slider(minimum=1, maximum=32, value=24, step=1, label="visualization layers min"),
217
+ gr.Slider(minimum=1, maximum=32, value=24, step=1, label="visualization layers max"),
218
  gr.Dropdown(choices=["Language Model"], value="Language Model", label="focus"),
219
  gr.Dropdown(choices=["GradCAM"], value="GradCAM", label="saliency map type")
220
  )
demo/model_utils.py CHANGED
@@ -2,7 +2,7 @@ import torch
2
  import numpy as np
3
  import spaces
4
  from PIL import Image, ImageDraw, ImageFont
5
- from transformers import AutoConfig, AutoModelForCausalLM, LlavaForConditionalGeneration, AutoProcessor, PaliGemmaForConditionalGeneration
6
  from transformers import CLIPProcessor, CLIPModel
7
  from janus.models import MultiModalityCausalLM, VLChatProcessor
8
 
@@ -123,13 +123,13 @@ class LLaVA_Utils(Model_Utils):
123
  model_path = "llava-hf/llava-v1.6-mistral-7b-hf"
124
  config = AutoConfig.from_pretrained(model_path)
125
 
126
- self.vl_gpt = LlavaForConditionalGeneration.from_pretrained(model_path,
127
  low_cpu_mem_usage=True,
128
  attn_implementation = 'eager',
129
  output_attentions=True
130
  )
131
  self.vl_gpt, self.dtype, self.cuda_device = set_dtype_device(self.vl_gpt)
132
- self.processor = AutoProcessor.from_pretrained(model_path)
133
  self.tokenizer = self.processor.tokenizer
134
 
135
  return self.vl_gpt, self.tokenizer
 
2
  import numpy as np
3
  import spaces
4
  from PIL import Image, ImageDraw, ImageFont
5
+ from transformers import AutoConfig, AutoModelForCausalLM, LlavaForConditionalGeneration, LlavaNextForConditionalGeneration, LlavaNextProcessor, AutoProcessor, PaliGemmaForConditionalGeneration
6
  from transformers import CLIPProcessor, CLIPModel
7
  from janus.models import MultiModalityCausalLM, VLChatProcessor
8
 
 
123
  model_path = "llava-hf/llava-v1.6-mistral-7b-hf"
124
  config = AutoConfig.from_pretrained(model_path)
125
 
126
+ self.vl_gpt = LlavaNextForConditionalGeneration.from_pretrained(model_path,
127
  low_cpu_mem_usage=True,
128
  attn_implementation = 'eager',
129
  output_attentions=True
130
  )
131
  self.vl_gpt, self.dtype, self.cuda_device = set_dtype_device(self.vl_gpt)
132
+ self.processor = LlavaNextProcessor.from_pretrained(model_path)
133
  self.tokenizer = self.processor.tokenizer
134
 
135
  return self.vl_gpt, self.tokenizer