MaykaGR commited on
Commit
e60395e
verified
1 Parent(s): 46b6beb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -7
app.py CHANGED
@@ -15,6 +15,10 @@ from torch.nn.utils.parametrizations import weight_norm
15
  login(token=os.environ["HF_TOKEN"])
16
 
17
  device = torch.device("cpu")
 
 
 
 
18
 
19
  #img_url = 'https://www.caracteristicass.de/wp-content/uploads/2023/02/imagenes-artisticas.jpg'
20
 
@@ -32,8 +36,6 @@ with gr.Blocks(theme=gr.themes.Ocean(primary_hue="pink", neutral_hue="indigo", f
32
  output2 = gr.Audio(label="Audio")
33
 
34
  def describir(url):
35
- processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-large")
36
- model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-large").to("cpu")
37
  raw_image = Image.open(requests.get(url, stream=True).raw).convert('RGB')
38
  inputs = processor(raw_image, return_tensors="pt").to("cpu")
39
  out = model.generate(**inputs)
@@ -41,8 +43,6 @@ with gr.Blocks(theme=gr.themes.Ocean(primary_hue="pink", neutral_hue="indigo", f
41
  return processor.decode(out[0], skip_special_tokens=True)
42
 
43
  def leer(texto):
44
- pipe = StableAudioPipeline.from_pretrained("stabilityai/stable-audio-open-1.0")
45
- pipe = pipe.to("cpu")
46
  prompt = texto
47
  negative_prompt = "Low quality."
48
 
@@ -59,10 +59,11 @@ with gr.Blocks(theme=gr.themes.Ocean(primary_hue="pink", neutral_hue="indigo", f
59
  generator=generator,
60
  ).audios
61
 
62
- #sf.write("demo.wav", salida, pipe.vae.sampling_rate)
63
- return audio[0].T.float().cpu().numpy()
 
64
 
65
 
66
- button.click(describir, [textbox], output)
67
 
68
  demo.launch(debug=True)
 
15
  login(token=os.environ["HF_TOKEN"])
16
 
17
  device = torch.device("cpu")
18
+ processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-large")
19
+ model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-large").to("cpu")
20
+ pipe = StableAudioPipeline.from_pretrained("stabilityai/stable-audio-open-1.0")
21
+ pipe = pipe.to("cpu")
22
 
23
  #img_url = 'https://www.caracteristicass.de/wp-content/uploads/2023/02/imagenes-artisticas.jpg'
24
 
 
36
  output2 = gr.Audio(label="Audio")
37
 
38
  def describir(url):
 
 
39
  raw_image = Image.open(requests.get(url, stream=True).raw).convert('RGB')
40
  inputs = processor(raw_image, return_tensors="pt").to("cpu")
41
  out = model.generate(**inputs)
 
43
  return processor.decode(out[0], skip_special_tokens=True)
44
 
45
  def leer(texto):
 
 
46
  prompt = texto
47
  negative_prompt = "Low quality."
48
 
 
59
  generator=generator,
60
  ).audios
61
 
62
+ salida = audio[0].T.float().cpu().numpy()
63
+ sf.write("demo.wav", salida, pipe.vae.sampling_rate)
64
+ return sf.read("demo.wav")
65
 
66
 
67
+ button.click(describir, [textbox], output, leer, [output], output2)
68
 
69
  demo.launch(debug=True)