import streamlit as st import requests import os import time # Load Hugging Face API key HF_API_KEY = os.getenv("HF_API_KEY") # Define API URLs IMG2TEXT_API = "https://api-inference.huggingface.co/models/nlpconnect/vit-gpt2-image-captioning" CHAT_API = "https://api-inference.huggingface.co/models/facebook/blenderbot-3B" HEADERS = {"Authorization": f"Bearer {HF_API_KEY}"} # App Title st.title("Multimodal Chatbot") # Initialize chat history if "messages" not in st.session_state: st.session_state.messages = [] # Add initial bot welcome message initial_message = "Hello! I'm a chatbot. You can upload an image or ask me anything to get started!" st.session_state.messages.append({"role": "assistant", "content": initial_message}) # Display chat history for msg in st.session_state.messages: with st.chat_message(msg["role"]): st.write(msg["content"]) # Image upload uploaded_file = st.file_uploader("Upload an image...", type=["jpg", "png", "jpeg"]) # User input user_input = st.chat_input("Ask about this image or anything...") image_caption = None # Process image if uploaded if uploaded_file: # Check image type if uploaded_file.type not in ["image/jpeg", "image/png"]: st.error("⚠️ Please upload a valid JPG or PNG image.") else: # Send image to Hugging Face image-to-text API with retries img_bytes = uploaded_file.read() st.session_state.messages.append({"role": "user", "content": "[Image Uploaded]"}) with st.chat_message("user"): st.image(img_bytes, caption="Uploaded Image", use_column_width=True) # st.write(f"**Image to text context generated:** {image_caption}") fix plz max_retries = 3 for i in range(max_retries): response = requests.post( IMG2TEXT_API, headers={ "Authorization": f"Bearer {HF_API_KEY}", "Content-Type": "application/octet-stream", }, data=img_bytes # Send raw image data ) if response.status_code == 200: try: res_json = response.json() # Check for list format and dictionary format if isinstance(res_json, list) and len(res_json) > 0: image_caption = res_json[0].get("generated_text", "⚠️ No caption generated.") elif isinstance(res_json, dict) and "generated_text" in res_json: image_caption = res_json["generated_text"] if image_caption: st.session_state.image_caption = image_caption bot_context = ( f"Consider this image: {image_caption}. Please provide a relevant and engaging response to the image." ) payload = {"inputs": bot_context} # Send context to chatbot bot_response = requests.post(CHAT_API, headers=HEADERS, json=payload) if bot_response.status_code == 200: res_json = bot_response.json() # Check if the response is a list or dictionary if isinstance(res_json, list) and len(res_json) > 0: bot_reply = res_json[0].get("generated_text", "I received your image. What would you like to ask about it?") elif isinstance(res_json, dict) and "generated_text" in res_json: bot_reply = res_json["generated_text"] else: bot_reply = "I received your image. What would you like to ask about it?" else: bot_reply = "I received your image. What would you like to ask about it?" # Append chatbot's generated response st.session_state.messages.append({"role": "assistant", "content": bot_reply}) with st.chat_message("assistant"): st.write(bot_reply) uploaded_file = None # Clear image after processing break # Successful, no need to retry else: st.error("⚠️ Unexpected response format from image captioning API.") break except (KeyError, IndexError, TypeError) as e: st.error(f"⚠️ Error: Unable to generate caption. Details: {e}") break elif response.status_code == 503: st.warning(f"⏳ Model warming up... Retrying in 5 seconds. Attempt {i+1}/{max_retries}") time.sleep(5) # Wait before retrying else: st.error(f"⚠️ Image API Error: {response.status_code} - {response.text}") break # Process user input if provided if user_input: combined_input = user_input # Merge image caption with user query if an image was uploaded if "image_caption" in st.session_state and st.session_state.image_caption: combined_input = f"Image context: {st.session_state.image_caption}. {user_input}" # Append user message st.session_state.messages.append({"role": "user", "content": user_input}) with st.chat_message("user"): st.write(user_input) # Send combined input to chatbot with retries payload = {"inputs": combined_input} max_retries = 3 for i in range(max_retries): response = requests.post(CHAT_API, headers=HEADERS, json=payload) if response.status_code == 200: try: res_json = response.json() # If it's a dictionary and contains 'generated_text' if isinstance(res_json, dict) and "generated_text" in res_json: bot_reply = res_json["generated_text"] break # Successful, no need to retry # If response is a list (some models return list format) elif isinstance(res_json, list) and len(res_json) > 0 and "generated_text" in res_json[0]: bot_reply = res_json[0]["generated_text"] break else: st.error("⚠️ Unexpected response format from chatbot API.") bot_reply = "⚠️ Unable to generate a response." break except (KeyError, TypeError, IndexError): bot_reply = "⚠️ Error: Unable to generate response." break elif response.status_code == 503: st.warning(f"⏳ Model warming up... Retrying in 5 seconds. Attempt {i+1}/{max_retries}") time.sleep(5) # Wait before retrying else: bot_reply = f"⚠️ Chatbot Error {response.status_code}: {response.text}" break # Append bot response st.session_state.messages.append({"role": "assistant", "content": bot_reply}) with st.chat_message("assistant"): st.write(bot_reply) # Clear button to reset chat if st.button("Clear Chat"): st.session_state.messages = [] st.experimental_rerun()