Spaces:
Runtime error
Runtime error
from transformers import T5Tokenizer, T5Model, T5ForConditionalGeneration, MarianMTModel, MarianTokenizer, pipeline | |
import nltk.data | |
import pandas as pd | |
import matplotlib.pyplot as plt | |
nltk.download('punkt') | |
import gradio as gr | |
from gradio.mix import Parallel | |
tokenizer_t5 = T5Tokenizer.from_pretrained("panggi/t5-base-indonesian-summarization-cased") | |
model_t5 = T5ForConditionalGeneration.from_pretrained("panggi/t5-base-indonesian-summarization-cased") | |
pretrained_sentiment = "ProsusAI/finbert" | |
pretrained_ner = "51la5/roberta-large-NER" | |
sentence_tokenizer = nltk.data.load('tokenizers/punkt/english.pickle') | |
tokenizer_translate = MarianTokenizer.from_pretrained("Helsinki-NLP/opus-mt-id-en") | |
model_translate = MarianMTModel.from_pretrained( | |
"Helsinki-NLP/opus-mt-id-en") | |
#finetuned_model = MarianMTModel.from_pretrained( | |
# "wolfrage89/annual_report_translation_id_en") | |
sentiment_pipeline = pipeline( | |
"sentiment-analysis", | |
model=pretrained_sentiment, | |
tokenizer=pretrained_sentiment, | |
return_all_scores=True | |
) | |
ner_pipeline = pipeline( | |
"ner", | |
model=pretrained_ner, | |
tokenizer=pretrained_ner, | |
grouped_entities=True | |
) | |
examples = [ | |
"Perusahaan industri e-commerce Indonesia, Bukalapak telah memberhentikan puluhan karyawan dari beberapa function; Berlawanan dengan PHK sebelumnya, perusahaan mengontrak jajaran pekerja kantornya, harian Kompas melaporkan.", | |
"Dengan pabrik produksi baru, perusahaan akan meningkatkan kapasitasnya untuk memenuhi peningkatan permintaan yang diharapkan dan akan meningkatkan penggunaan bahan baku dan oleh karena itu meningkatkan profitabilitas produksi.", | |
"Lifetree didirikan pada tahun 2000, dan pendapatannya meningkat rata-rata 40% dengan margin di akhir 30-an." | |
] | |
def get_translation(text): | |
translated_tokens = model_translate.generate( | |
**tokenizer_translate([text], return_tensors='pt', max_length=104, truncation=True))[0] | |
translated_sentence = tokenizer_translate.decode( | |
translated_tokens, skip_special_tokens=True) | |
return translated_sentence | |
def summ_t5(text): | |
input_ids = tokenizer_t5.encode(text, return_tensors='pt') | |
summary_ids = model_t5.generate(input_ids, | |
max_length=100, | |
num_beams=2, | |
repetition_penalty=2.5, | |
length_penalty=1.0, | |
early_stopping=True, | |
no_repeat_ngram_size=2, | |
use_cache=True) | |
summary_text = tokenizer_t5.decode(summary_ids[0], skip_special_tokens=True) | |
return summary_text | |
def sentiment_analysis(text): | |
output = sentiment_pipeline(text) | |
return {elm["label"]: elm["score"] for elm in output[0]} | |
def ner(text): | |
output = ner_pipeline(text) | |
for elm in output: | |
elm['entity'] = elm['entity_group'] | |
return {"text": text, "entities": output} | |
def sentiment_df(text): | |
df = pd.DataFrame(columns=['Text', 'Eng', 'Label', 'Score']) | |
text_list = sentence_tokenizer.tokenize(text) | |
eng_text = [get_translation(text) for text in text_list] | |
result = [sentiment_analysis(text) for text in eng_text] | |
labels = [] | |
scores = [] | |
for pred in result: | |
idx = list(pred.values()).index(max(list(pred.values()))) | |
labels.append(list(pred.keys())[idx]) | |
scores.append(round(list(pred.values())[idx], 3)) | |
df['Text'] = text_list | |
df['Eng'] = eng_text | |
df['Label'] = labels | |
df['Score'] = scores | |
return df | |
def run(text): | |
summ_ = summ_t5(text) | |
summ_translated = get_translation(summ_) | |
sent_ = sentiment_analysis(summ_translated ) | |
ner_ = ner(summ_) | |
df_sentiment = sentiment_df(text) | |
return summ_, sent_, ner_, df_sentiment | |
if __name__ == "__main__": | |
with gr.Blocks() as demo: | |
gr.Markdown("""<h1 style="text-align:center">Financial Statement Analysis - Indonesia</h1>""") | |
gr.Markdown( | |
""" | |
Creator: Wira Indra Kusuma | |
""" | |
) | |
with gr.Row(): | |
with gr.Column(): | |
input_text = gr.Textbox(label="Input Text") | |
analyze_button = gr.Button(label="Analyze") | |
examples_bar = gr.Examples(examples=examples, inputs=input_text) | |
with gr.Column(): | |
summ_output = gr.Textbox(label="Article Summary") | |
ner_output = gr.HighlightedText(label="NER of Summary") | |
sent_output = gr.Label(label="Sentiment of Summary") | |
dataframe_component = gr.DataFrame(type="pandas", | |
label="Dataframe", | |
max_rows=(20,'fixed'), | |
overflow_row_behaviour='paginate', | |
wrap=True) | |
analyze_button.click(run, inputs=input_text, outputs=[summ_output, sent_output, ner_output, dataframe_component ]) | |
demo.launch() |