Spaces:

rmayormartins
/

inclusion-visually-impaired-image2speech

Sleeping

App Files Files Community

rmayormartins commited on May 26, 2024

Commit

2a13288

1 Parent(s): 9cfa877

Subindo arquivos3

Browse files

Files changed (2) hide show

app.py +9 -9
requirements.txt +1 -1

app.py CHANGED Viewed

@@ -7,17 +7,17 @@ import cv2
 from skimage.feature import greycomatrix, greycoprops
 from transformers import BlipProcessor, BlipForConditionalGeneration, MarianMTModel, MarianTokenizer
-#yolov5
 model = torch.hub.load('ultralytics/yolov5', 'yolov5s')
-#análise de textura usando GLCM
 def analyze_texture(image):
     gray_image = cv2.cvtColor(np.array(image), cv2.COLOR_BGR2GRAY)
     glcm = greycomatrix(gray_image, distances=[5], angles=[0], levels=256, symmetric=True, normed=True)
     contrast = greycoprops(glcm, 'contrast')[0, 0]
     return contrast
-#Descrever imagem usando BLIP
 def describe_image(image):
     processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
     model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
@@ -26,7 +26,7 @@ def describe_image(image):
     description = processor.decode(out[0], skip_special_tokens=True)
     return description
-#Traduz descrição para português
 def translate_description(description):
     model_name = 'Helsinki-NLP/opus-mt-en-pt'
     tokenizer = MarianTokenizer.from_pretrained(model_name)
@@ -35,9 +35,9 @@ def translate_description(description):
     translated_text = tokenizer.decode(translated[0], skip_special_tokens=True)
     return translated_text
-#Processar imagem e gerar saída de voz
 def process_image(image):
-    # Detecção de objeto
     results = model(image)
     detected_image = results.render()[0]
@@ -55,13 +55,13 @@ def process_image(image):
     tts = gTTS(text=translated_description, lang='pt')
     tts.save("output.mp3")
-    # Saída
     return Image.fromarray(detected_image), translated_description, "output.mp3"
-#
 example_image = Image.open("/mnt/data/example1.JPG")
-#Gradio
 iface = gr.Interface(
     fn=process_image,
     inputs=gr.inputs.Image(type="pil"),

 from skimage.feature import greycomatrix, greycoprops
 from transformers import BlipProcessor, BlipForConditionalGeneration, MarianMTModel, MarianTokenizer
+# Carregar o modelo YOLOv5
 model = torch.hub.load('ultralytics/yolov5', 'yolov5s')
+# Função para análise de textura usando GLCM
 def analyze_texture(image):
     gray_image = cv2.cvtColor(np.array(image), cv2.COLOR_BGR2GRAY)
     glcm = greycomatrix(gray_image, distances=[5], angles=[0], levels=256, symmetric=True, normed=True)
     contrast = greycoprops(glcm, 'contrast')[0, 0]
     return contrast
+# Função para descrever imagem usando BLIP
 def describe_image(image):
     processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
     model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
     description = processor.decode(out[0], skip_special_tokens=True)
     return description
+# Função para traduzir descrição para português
 def translate_description(description):
     model_name = 'Helsinki-NLP/opus-mt-en-pt'
     tokenizer = MarianTokenizer.from_pretrained(model_name)
     translated_text = tokenizer.decode(translated[0], skip_special_tokens=True)
     return translated_text
+# Função principal para processar imagem e gerar saída de voz
 def process_image(image):
+    # Detecção de objetos
     results = model(image)
     detected_image = results.render()[0]
     tts = gTTS(text=translated_description, lang='pt')
     tts.save("output.mp3")
+    # Retornar imagem com detecções, descrição e áudio
     return Image.fromarray(detected_image), translated_description, "output.mp3"
+# Carregar imagem de exemplo
 example_image = Image.open("/mnt/data/example1.JPG")
+# Interface Gradio
 iface = gr.Interface(
     fn=process_image,
     inputs=gr.inputs.Image(type="pil"),

requirements.txt CHANGED Viewed

@@ -3,7 +3,7 @@ torch
 Pillow
 numpy
 opencv-python
-scikit-image
 transformers
 gtts

 Pillow
 numpy
 opencv-python
+scikit-image>=0.18.3
 transformers
 gtts