proyectomod / app.py
MaykaGR's picture
Update app.py
08ac218 verified
raw
history blame
2.05 kB
import gradio as gr
import requests
from PIL import Image
from transformers import BlipProcessor, BlipForConditionalGeneration
import requests
import base64
import tempfile
processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-large")
model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-large").to("cpu")
class Aspecto():
pass
screen = Aspecto()
with gr.Blocks(theme=gr.themes.Ocean(primary_hue="pink", neutral_hue="indigo", font=[gr.themes.GoogleFont("Montserrat"), "Playwrite England SemiJoine", "Quicksand"])) as demo:
textbox = gr.Textbox(label="Url")
with gr.Row():
button = gr.Button("Describir", variant="primary")
clear = gr.Button("Borrar")
output = gr.Textbox(label="Resumen")
with gr.Row():
button2 = gr.Button("Leer", variant="primary")
clear = gr.Button("Borrar")
output2 = gr.Audio(label="Audio")
def describir(url):
raw_image = Image.open(requests.get(url, stream=True).raw).convert('RGB')
inputs = processor(raw_image, return_tensors="pt").to("cpu")
out = model.generate(**inputs)
return processor.decode(out[0], skip_special_tokens=True)
def leer(texto):
response = requests.post("https://charly-text-to-speech.hf.space/run/predict", json={
"data": [
texto,
]}).json()
data = response['data'][0]
# Extraer la parte de base64 del string (eliminar el prefijo 'data:audio/flac;base64,')
audio_base64 = data.split(',')[1]
# Decodificar el string base64
audio_data = base64.b64decode(audio_base64)
# Crear un archivo temporal
with tempfile.NamedTemporaryFile(delete=False, suffix='.flac') as temp_audio_file:
temp_audio_file.write(audio_data)
temp_audio_path = temp_audio_file.name
return temp_audio_path
button.click(describir, [textbox], output)
button2.click(leer, [output], output2)
demo.launch(debug=True)