rmayormartins's picture
Subindo arquivos2
9cfa877
raw
history blame
2.44 kB
import gradio as gr
import torch
from PIL import Image
from gtts import gTTS
import numpy as np
import cv2
from skimage.feature import greycomatrix, greycoprops
from transformers import BlipProcessor, BlipForConditionalGeneration, MarianMTModel, MarianTokenizer
#yolov5
model = torch.hub.load('ultralytics/yolov5', 'yolov5s')
#análise de textura usando GLCM
def analyze_texture(image):
gray_image = cv2.cvtColor(np.array(image), cv2.COLOR_BGR2GRAY)
glcm = greycomatrix(gray_image, distances=[5], angles=[0], levels=256, symmetric=True, normed=True)
contrast = greycoprops(glcm, 'contrast')[0, 0]
return contrast
#Descrever imagem usando BLIP
def describe_image(image):
processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
inputs = processor(image, return_tensors="pt")
out = model.generate(**inputs)
description = processor.decode(out[0], skip_special_tokens=True)
return description
#Traduz descrição para português
def translate_description(description):
model_name = 'Helsinki-NLP/opus-mt-en-pt'
tokenizer = MarianTokenizer.from_pretrained(model_name)
model = MarianMTModel.from_pretrained(model_name)
translated = model.generate(**tokenizer(description, return_tensors="pt", padding=True))
translated_text = tokenizer.decode(translated[0], skip_special_tokens=True)
return translated_text
#Processar imagem e gerar saída de voz
def process_image(image):
# Detecção de objeto
results = model(image)
detected_image = results.render()[0]
# Análise de cor (média RGB)
mean_rgb = np.mean(np.array(image), axis=(0, 1))
# Análise de textura
texture_contrast = analyze_texture(image)
# Descrição da imagem
description = describe_image(image)
translated_description = translate_description(description)
# Texto para voz
tts = gTTS(text=translated_description, lang='pt')
tts.save("output.mp3")
# Saída
return Image.fromarray(detected_image), translated_description, "output.mp3"
#
example_image = Image.open("/mnt/data/example1.JPG")
#Gradio
iface = gr.Interface(
fn=process_image,
inputs=gr.inputs.Image(type="pil"),
outputs=[gr.outputs.Image(type="pil"), gr.outputs.Textbox(), gr.outputs.Audio(type="file")],
examples=[example_image]
)
iface.launch()