albertmartinez's picture
Upgrade gradio
3ce1088
import multiprocessing
import threading
import gradio as gr
from mining import mining
from sts import sts
from utils import getDataFrame, save_to_csv, delete_folder_periodically
import logging
# Configure logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)
CONCURRENCY_LIMIT = 5
AVAILABLE_MODELS = [
"Lajavaness/bilingual-embedding-large",
"sentence-transformers/all-mpnet-base-v2",
"intfloat/multilingual-e5-large-instruct"
]
MODEL_DESCRIPTIONS = {
"Lajavaness/bilingual-embedding-large": "Multilingual model optimized for multiple languages. [More info](https://huggingface.co/Lajavaness/bilingual-embedding-large)",
"sentence-transformers/all-mpnet-base-v2": "High-quality general-purpose model. [More info](https://huggingface.co/sentence-transformers/all-mpnet-base-v2)",
"intfloat/multilingual-e5-large-instruct": "Multilingual model with instructions. [More info](https://huggingface.co/intfloat/multilingual-e5-large-instruct)"
}
def create_interface():
with gr.Blocks(title="Sentence Transformers Demo") as demo:
gr.Markdown("# Sentence Transformers Demo")
gr.Markdown("This application provides two main functionalities: Paraphrase Mining and Semantic Textual Similarity (STS).")
with gr.Tab("Paraphrase Mining"):
with gr.Row():
with gr.Column():
gr.Markdown(
"### Paraphrase Mining\n"
"Find paraphrases (texts with identical/similar meaning) in a large corpus of sentences.\n"
"Upload a CSV file containing your sentences and select a model to begin."
)
with gr.Row():
with gr.Column():
gr.Markdown("#### Input Sentences")
upload_button_sentences = gr.UploadButton(
label="Upload Sentences CSV",
file_types=['.csv'],
file_count="single",
variant="primary"
)
output_data_sentences = gr.Dataframe(
headers=["_id", "text"],
col_count=2,
label="Sentences Data",
interactive=False
)
upload_button_sentences.upload(
fn=getDataFrame,
inputs=upload_button_sentences,
outputs=output_data_sentences,
concurrency_limit=CONCURRENCY_LIMIT
)
with gr.Row():
with gr.Column():
model = gr.Dropdown(
choices=AVAILABLE_MODELS,
label="Select Model",
value=AVAILABLE_MODELS[0],
interactive=True
)
model_description = gr.Markdown(MODEL_DESCRIPTIONS[AVAILABLE_MODELS[0]])
def update_model_description(model_name):
return MODEL_DESCRIPTIONS[model_name]
model.change(
fn=update_model_description,
inputs=model,
outputs=model_description
)
score_mining = gr.Slider(
minimum=0.0,
maximum=1.0,
value=0.96,
step=0.01,
label="Similarity Threshold",
interactive=True
)
submit_button_mining = gr.Button("Process", variant="primary")
with gr.Row():
with gr.Column():
output_mining = gr.Dataframe(
headers=["score", "sentence_1", "sentence_2"],
type="polars",
label="Mining Results"
)
submit_button_mining.click(
fn=mining,
inputs=[model, upload_button_sentences, score_mining],
outputs=output_mining
).then(
fn=lambda x: gr.Info("Processing completed successfully!") if x is not None else gr.Error("Error processing data. Please check the logs for details."),
inputs=[output_mining],
outputs=[]
)
download_button = gr.Button("Download Results as CSV", variant="secondary")
download_file = gr.File(label="Downloadable File")
download_button.click(
fn=save_to_csv,
inputs=output_mining,
outputs=download_file
).then(
fn=lambda x: gr.Info("Results saved successfully!") if x is not None else gr.Error("Error saving results. Please check the logs for details."),
inputs=[download_file],
outputs=[]
)
with gr.Tab("Semantic Textual Similarity"):
with gr.Row():
with gr.Column():
gr.Markdown(
"### Semantic Textual Similarity (STS)\n"
"Calculate semantic similarity between two sets of sentences.\n"
"Upload two CSV files containing your sentences and select a model to begin."
)
with gr.Row():
with gr.Column():
gr.Markdown("#### First Set of Sentences")
upload_button_sentences1 = gr.UploadButton(
label="Upload First Set CSV",
file_types=['.csv'],
file_count="single",
variant="primary"
)
output_data_sentences1 = gr.Dataframe(
headers=["_id", "text"],
col_count=2,
label="First Set Data",
interactive=False
)
upload_button_sentences1.upload(
fn=getDataFrame,
inputs=upload_button_sentences1,
outputs=output_data_sentences1,
concurrency_limit=CONCURRENCY_LIMIT
)
with gr.Column():
gr.Markdown("#### Second Set of Sentences")
upload_button_sentences2 = gr.UploadButton(
label="Upload Second Set CSV",
file_types=['.csv'],
file_count="single",
variant="primary"
)
output_data_sentences2 = gr.Dataframe(
headers=["_id", "text"],
col_count=2,
label="Second Set Data",
interactive=False
)
upload_button_sentences2.upload(
fn=getDataFrame,
inputs=upload_button_sentences2,
outputs=output_data_sentences2,
concurrency_limit=CONCURRENCY_LIMIT
)
with gr.Row():
with gr.Column():
model = gr.Dropdown(
choices=AVAILABLE_MODELS,
label="Select Model",
value=AVAILABLE_MODELS[0],
interactive=True
)
model_description = gr.Markdown(MODEL_DESCRIPTIONS[AVAILABLE_MODELS[0]])
model.change(
fn=update_model_description,
inputs=model,
outputs=model_description
)
score_sts = gr.Slider(
minimum=0.0,
maximum=1.0,
value=0.96,
step=0.01,
label="Similarity Threshold",
interactive=True
)
submit_button_sts = gr.Button("Process", variant="primary")
with gr.Row():
with gr.Column():
output_sts = gr.Dataframe(
headers=["score", "sentences1", "sentences2"],
type="polars",
label="Similarity Results"
)
submit_button_sts.click(
fn=sts,
inputs=[model, upload_button_sentences1, upload_button_sentences2, score_sts],
outputs=output_sts
).then(
fn=lambda x: gr.Info("Processing completed successfully!") if x is not None else gr.Error("Error processing data. Please check the logs for details."),
inputs=[output_sts],
outputs=[]
)
download_button = gr.Button("Download Results as CSV", variant="secondary")
download_file = gr.File(label="Downloadable File")
download_button.click(
fn=save_to_csv,
inputs=output_sts,
outputs=download_file
).then(
fn=lambda x: gr.Info("Results saved successfully!") if x is not None else gr.Error("Error saving results. Please check the logs for details."),
inputs=[download_file],
outputs=[]
)
return demo
if __name__ == "__main__":
try:
multiprocessing.set_start_method("spawn")
# Start cleanup thread
folder_path = "data"
thread = threading.Thread(
target=delete_folder_periodically,
args=(folder_path, 1800),
daemon=True
)
thread.start()
# Create and launch interface
demo = create_interface()
demo.launch(
share=False,
server_name="0.0.0.0",
server_port=7860,
show_error=True,
show_api=False
)
except Exception as e:
logger.error(f"Error starting application: {str(e)}")
raise