|
import gradio as gr |
|
import torch |
|
import os |
|
|
|
from pipeline import KeywordExtractorPipeline |
|
|
|
DIR_PATH = os.path.dirname(os.path.realpath(__file__)) |
|
|
|
|
|
def extract_keyword(title, text, top_n, ngram_low_range, ngram_high_range, min_freq, diversify_result): |
|
inp = {"text": text, "title": title} |
|
keyword_ls = kw_pipeline(inputs=inp, min_freq=min_freq, ngram_n=(ngram_low_range, ngram_high_range), |
|
top_n=top_n, diversify_result=diversify_result) |
|
result = '' |
|
for kw, score in keyword_ls: |
|
result += f'{kw}: {score}\n' |
|
return result |
|
|
|
|
|
if gr.NO_RELOAD: |
|
print("Loading PhoBERT model") |
|
phobert = torch.load(f'{DIR_PATH}/pretrained-models/phobert.pt') |
|
phobert.eval() |
|
|
|
print("Loading NER model") |
|
ner_model = torch.load(f'{DIR_PATH}/pretrained-models/ner-vietnamese-electra-base.pt') |
|
ner_model.eval() |
|
kw_pipeline = KeywordExtractorPipeline(phobert, ner_model) |
|
|
|
if __name__ == "__main__": |
|
demo = gr.Interface(fn=extract_keyword, |
|
inputs=[ |
|
gr.Text( |
|
label="Title", |
|
lines=1, |
|
value="Enter title here", |
|
), |
|
gr.Textbox( |
|
label="Text", |
|
lines=5, |
|
value="Enter text here", |
|
), |
|
gr.Number( |
|
label="Top N keywords", |
|
info="Number of keywords retrieved", |
|
value=10 |
|
), |
|
gr.Number( |
|
label="Ngram low range", |
|
value=1 |
|
), |
|
gr.Number( |
|
label="Ngram high range", |
|
value=3 |
|
), |
|
gr.Number( |
|
label="Ngram minimum frequency", |
|
value=1 |
|
), |
|
gr.Checkbox( |
|
label="Diversify result" |
|
) |
|
], |
|
|
|
outputs=gr.Textbox( |
|
label="Keywords Extracted", |
|
) |
|
) |
|
|
|
demo.launch(share=True) |
|
|