import gradio as gr import torch from PIL import Image from gtts import gTTS import numpy as np import cv2 from skimage.feature import greycomatrix, greycoprops from transformers import BlipProcessor, BlipForConditionalGeneration, MarianMTModel, MarianTokenizer #yolov5 model = torch.hub.load('ultralytics/yolov5', 'yolov5s') #análise de textura usando GLCM def analyze_texture(image): gray_image = cv2.cvtColor(np.array(image), cv2.COLOR_BGR2GRAY) glcm = greycomatrix(gray_image, distances=[5], angles=[0], levels=256, symmetric=True, normed=True) contrast = greycoprops(glcm, 'contrast')[0, 0] return contrast #Descrever imagem usando BLIP def describe_image(image): processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base") model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base") inputs = processor(image, return_tensors="pt") out = model.generate(**inputs) description = processor.decode(out[0], skip_special_tokens=True) return description #Traduz descrição para português def translate_description(description): model_name = 'Helsinki-NLP/opus-mt-en-pt' tokenizer = MarianTokenizer.from_pretrained(model_name) model = MarianMTModel.from_pretrained(model_name) translated = model.generate(**tokenizer(description, return_tensors="pt", padding=True)) translated_text = tokenizer.decode(translated[0], skip_special_tokens=True) return translated_text #Processar imagem e gerar saída de voz def process_image(image): # Detecção de objeto results = model(image) detected_image = results.render()[0] # Análise de cor (média RGB) mean_rgb = np.mean(np.array(image), axis=(0, 1)) # Análise de textura texture_contrast = analyze_texture(image) # Descrição da imagem description = describe_image(image) translated_description = translate_description(description) # Texto para voz tts = gTTS(text=translated_description, lang='pt') tts.save("output.mp3") # Saída return Image.fromarray(detected_image), translated_description, "output.mp3" # example_image = Image.open("/mnt/data/example1.JPG") #Gradio iface = gr.Interface( fn=process_image, inputs=gr.inputs.Image(type="pil"), outputs=[gr.outputs.Image(type="pil"), gr.outputs.Textbox(), gr.outputs.Audio(type="file")], examples=[example_image] ) iface.launch()