Spaces:
Sleeping
Sleeping
File size: 4,075 Bytes
8b092c8 377de90 8b092c8 377de90 8b092c8 377de90 8b092c8 377de90 8b092c8 377de90 8b092c8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 |
import streamlit as st
import pandas as pd
from streamlit_pandas_profiling import st_profile_report
from pathlib import Path
from transformers import MBartForConditionalGeneration, MBart50TokenizerFast
st.set_page_config(page_title="Francesco Daimon Fernicola", page_icon=":milky_way:", layout="wide")
with st.container():
st.subheader("Hello, and welcome to my official webpage! I am Daimon :alien:")
st.title("PhD Candidate in Machine Translation / Translator / Mountain enthusiast")
st.write("I am passionate about finding new ways to effectively use and understand Machine Translation and effectively evaluating its quality.")
st.write("""
[Github](https://github.com/FrancescoFernicola)
[Unibo](https://www.unibo.it/sitoweb/francesco.fernicola2)
[LinkedIn](https://www.linkedin.com/in/francesco-fernicola-69a0771b7/?locale=en_US)
[Twitter](https://twitter.com/FrancescoDaimon)
""")
st.title('Upload your data')
st.subheader('Input TSV/CSV')
uploaded_file = st.file_uploader("Choose a file")
with st.spinner("Loading..."):
if uploaded_file is not None:
if uploaded_file.name.endswith('.tsv'):
data = pd.read_csv(uploaded_file, sep="\t")
else:
data = pd.read_csv(uploaded_file)
st.subheader("DataFrame")
st.write(data)
st.write(data.describe())
else:
st.info("☝️ Upload a TSV/CSV file")
st.subheader("MBART-50 Translator")
source = "In the beginning the Universe was created. This has made a lot of people very angry and been widely regarded as a bad move."
target = ""
model = MBartForConditionalGeneration.from_pretrained("facebook/mbart-large-50-many-to-many-mmt")
tokenizer = MBart50TokenizerFast.from_pretrained("facebook/mbart-large-50-many-to-many-mmt")
def get_translation(src_code, trg_code, src):
tokenizer.src_lang = src_code
encoded = tokenizer(src, return_tensors="pt")
generated_tokens = model.generate(
**encoded,
forced_bos_token_id=tokenizer.lang_code_to_id[trg_code]
)
trg = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
return trg
valid_languages = ['en_XX', 'fr_XX', 'de_DE', 'it_IT', 'es_XX']
with st.form("my_form"):
left_c, right_c = st.columns(2)
with left_c:
src_lang = st.selectbox(
'Source language',
('en_XX', 'fr_XX', 'de_DE', 'it_IT', 'es_XX'),
)
with right_c:
trg_lang = st.selectbox(
'Target language',
('fr_XX', 'en_XX', 'de_DE', 'it_IT', 'es_XX')
)
source = st.text_area("Source", value=source, height=130, placeholder="Enter the source text...")
submitted = st.form_submit_button("Translate")
if submitted:
if len(source) > 0 and src_lang in valid_languages and trg_lang in valid_languages:
with st.spinner("Translating..."):
try:
target = get_translation(src_lang, trg_lang, source)[0]
st.subheader("Translation done!")
target = st.text_area("Target", value=target, height=130)
except:
st.subheader("Translation failed :sad:")
else:
st.write("Please enter the source text, source language and target language.")
# ---- CONTACT ----
with st.container():
st.write("---")
st.header("Get in Touch With Me!")
st.write("##")
contact_form = """
<form action="https://formsubmit.co/daimon.f@outlook.com" method="POST">
<input type="hidden" name="_captcha" value="false">
<input type="text" name="name" placeholder="Your name" required>
<input type="email" name="email" placeholder="Your email" required>
<textarea name="message" placeholder="Your message here" required></textarea>
<button type="submit">Send</button>
</form>
"""
left_column, right_column = st.columns(2)
with left_column:
st.markdown(contact_form, unsafe_allow_html=True)
with right_column:
st.empty() |