File size: 4,075 Bytes
8b092c8
 
 
 
377de90
8b092c8
 
 
 
377de90
8b092c8
 
 
 
 
 
 
 
 
377de90
8b092c8
 
 
377de90
 
 
 
 
 
 
 
 
 
 
8b092c8
377de90
8b092c8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
import streamlit as st
import pandas as pd
from streamlit_pandas_profiling import st_profile_report
from pathlib import Path
from transformers import MBartForConditionalGeneration, MBart50TokenizerFast

st.set_page_config(page_title="Francesco Daimon Fernicola", page_icon=":milky_way:", layout="wide")

with st.container():
    st.subheader("Hello, and welcome to my official webpage! I am Daimon :alien:")
    st.title("PhD Candidate in Machine Translation / Translator / Mountain enthusiast")
    st.write("I am passionate about finding new ways to effectively use and understand Machine Translation and effectively evaluating its quality.")
    st.write("""
    [Github](https://github.com/FrancescoFernicola)
    [Unibo](https://www.unibo.it/sitoweb/francesco.fernicola2)
    [LinkedIn](https://www.linkedin.com/in/francesco-fernicola-69a0771b7/?locale=en_US)
    [Twitter](https://twitter.com/FrancescoDaimon)
    """)

st.title('Upload your data')

st.subheader('Input TSV/CSV')
uploaded_file = st.file_uploader("Choose a file")
with st.spinner("Loading..."):
    if uploaded_file is not None:
        if uploaded_file.name.endswith('.tsv'):
            data = pd.read_csv(uploaded_file, sep="\t")
        else:
            data = pd.read_csv(uploaded_file)
    
        st.subheader("DataFrame")
        st.write(data)
        st.write(data.describe())
    
    else:
        st.info("☝️ Upload a TSV/CSV file")
    

st.subheader("MBART-50 Translator")

source = "In the beginning the Universe was created. This has made a lot of people very angry and been widely regarded as a bad move."
target = ""
model = MBartForConditionalGeneration.from_pretrained("facebook/mbart-large-50-many-to-many-mmt")
tokenizer = MBart50TokenizerFast.from_pretrained("facebook/mbart-large-50-many-to-many-mmt")


def get_translation(src_code, trg_code, src):

    tokenizer.src_lang = src_code
    encoded = tokenizer(src, return_tensors="pt")
    generated_tokens = model.generate(
        **encoded,
        forced_bos_token_id=tokenizer.lang_code_to_id[trg_code]
    )
    trg = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
    
    return trg

valid_languages = ['en_XX', 'fr_XX', 'de_DE', 'it_IT', 'es_XX']

with st.form("my_form"):
    left_c, right_c = st.columns(2)
    with left_c:
        src_lang = st.selectbox(
        'Source language',
        ('en_XX', 'fr_XX', 'de_DE', 'it_IT', 'es_XX'),
        )
    with right_c:
        trg_lang = st.selectbox(
        'Target language',
        ('fr_XX', 'en_XX', 'de_DE', 'it_IT', 'es_XX')
        )
    source = st.text_area("Source", value=source, height=130, placeholder="Enter the source text...")


    submitted = st.form_submit_button("Translate")
    if submitted:
        if len(source) > 0 and src_lang in valid_languages and trg_lang in valid_languages:
            with st.spinner("Translating..."):
                try:
                    target = get_translation(src_lang, trg_lang, source)[0]
                    st.subheader("Translation done!")
                    target = st.text_area("Target", value=target, height=130)
                except:
                    st.subheader("Translation failed :sad:")
                    
        else:
            st.write("Please enter the source text, source language and target language.")



# ---- CONTACT ----
with st.container():
    st.write("---")
    st.header("Get in Touch With Me!")
    st.write("##")

    contact_form = """
    <form action="https://formsubmit.co/daimon.f@outlook.com" method="POST">
        <input type="hidden" name="_captcha" value="false">
        <input type="text" name="name" placeholder="Your name" required>
        <input type="email" name="email" placeholder="Your email" required>
        <textarea name="message" placeholder="Your message here" required></textarea>
        <button type="submit">Send</button>
    </form>
    """
    left_column, right_column = st.columns(2)
    with left_column:
        st.markdown(contact_form, unsafe_allow_html=True)
    with right_column:
        st.empty()