#mT5 Deployment OtherLanguage2ID # library import gradio as gr import tensorflow as tf from transformers import AutoTokenizer, AutoModelForSeq2SeqLM # function to run def run_model(input_text, min_length): #mT5 Transformer model_name = "csebuetnlp/mT5_m2m_crossSum_enhanced" tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False) model = AutoModelForSeq2SeqLM.from_pretrained(model_name) #get id language get_lang_id = lambda lang: tokenizer._convert_token_to_id( model.config.task_specific_params["langid_map"][lang][1]) target_lang = "indonesian" # for a list of available language names see below / defaul define # preprocessing text input input_text = str(input_text) input_text = ' '.join(input_text.split()) # hapus white space dan #encode input to vector input_ids = tokenizer(input_text, return_tensors="pt", padding="max_length", truncation=True, max_length=512)["input_ids"] #generate input output_ids = model.generate( input_ids=input_ids, decoder_start_token_id=get_lang_id(target_lang), min_length=min_length, max_length=512, no_repeat_ngram_size=2, repetition_penalty=1.5, temperature= 0.5, # [0.8 if temper == "Creative" else 0.2 if temper == "Better" else 0.2][0], early_stopping=True, num_beams=4)[0] #decode output to text summary = tokenizer.decode( output_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False) return ' '.join(summary.split(' ')[1:])# get output to str # end #example # contoh = [["TAMPAN"]] #judul title = "Text Summarization ID" #deskripsi description = "Demo for Text Summarization ID. Models are mT5" #footer # article = "
" #run gradio gr.Interface( fn=run_model, #input text inputs=[ gr.inputs.Textbox( lines=3, placeholder="Ketik disini...", label="Text", ), gr.inputs.Slider( minimum=100, maximum=400, step=10, default=150, label="Max Length(Maximal Sentence Length)", ), ], #output text outputs= gr.outputs.Textbox( label="Output text", ), title=title, description=description, # article=article, # examples=contoh ).launch(debug = True)