|
import gradio as gr |
|
import model_wrapper |
|
|
|
|
|
model = model_wrapper.PredictionModel() |
|
|
|
|
|
def pretty_print_opinion(opinion_dict): |
|
res = [] |
|
maxlen = max([len(key) for key in opinion_dict.keys()]) + 2 |
|
maxlen = 0 |
|
for key, value in opinion_dict.items(): |
|
if key == 'Polarity': |
|
res.append(f'{(key + ":").ljust(maxlen)} {value}') |
|
else: |
|
res.append(f'{(key + ":").ljust(maxlen)} \'{" ".join(value[0])}\'') |
|
return '\n'.join(res) + '\n' |
|
|
|
|
|
def predict(text): |
|
print(f'Input message "{text}"') |
|
try: |
|
predictions = model([text]) |
|
prediction = predictions[0] |
|
results = [] |
|
if not prediction['opinions']: |
|
return 'No opinions detected' |
|
for opinion in prediction['opinions']: |
|
results.append(pretty_print_opinion(opinion)) |
|
print(f'Successfully predicted SA for input message "{text}": {results}') |
|
return '\n'.join(results) |
|
except Exception as e: |
|
print(f'Error for input message "{text}": {e}') |
|
raise e |
|
|
|
|
|
|
|
markdown_text = ''' |
|
<br> |
|
<br> |
|
This space provides a gradio demo and an easy-to-run wrapper of the pre-trained model for structured sentiment analysis in Norwegian language, pre-trained on the [NoReC dataset](https://huggingface.co/datasets/norec). |
|
This space containt an implementation of method described in "Direct parsing to sentiment graphs" (Samuel _et al._, ACL 2022). The main repository that also contains the scripts for training the model, can be found on the project [github](https://github.com/jerbarnes/direct_parsing_to_sent_graph). |
|
|
|
The sentiment graph model is based on an underlying masked language model – [NorBERT 2](https://huggingface.co/ltg/norbert2). |
|
The proposed method suggests three different ways to encode the sentiment graph: "node-centric", "labeled-edge", and "opinion-tuple". |
|
The current model |
|
- uses "labeled-edge" graph encoding |
|
- does not use character-level embedding |
|
- all other hyperparameters are set to [default values](https://github.com/jerbarnes/direct_parsing_to_sent_graph/blob/main/perin/config/edge_norec.yaml) |
|
, and it achieves the following results on the held-out set of the NoReC dataset: |
|
|
|
| Unlabeled sentiment tuple F1 | Target F1 | Relative polarity precision | |
|
|:----------------------------:|:----------:|:---------------------------:| |
|
| 0.434 | 0.541 | 0.926 | |
|
|
|
|
|
In "Word Substitution with Masked Language Models as Data Augmentation for Sentiment Analysis", we analyzed data augmentation strategies for improving performance of the model. Using masked-language modeling (MLM), we augmented the sentences with MLM-substituted words inside, outside, or inside+outside the actual sentiment tuples. The results below show that augmentation may be improve the model performance. This space, however, runs the original model trained without augmentation. |
|
|
|
| | Augmentation rate | Unlabeled sentiment tuple F1 | Target F1 | Relative polarity precision | |
|
|----------------|-------------------|------------------------------|-----------|-----------------------------| |
|
| Baseline | 0% | 43.39 | 54.13 | 92.59 | |
|
| Outside | 59% | **45.08** | 56.18 | 92.95 | |
|
| Inside | 9% | 43.38 | 55.62 | 92.49 | |
|
| Inside+Outside | 27% | 44.12 | **56.44** | **93.19** | |
|
|
|
|
|
|
|
The model can be easily used for predicting sentiment tuples as follows: |
|
|
|
```python |
|
>>> import model_wrapper |
|
>>> model = model_wrapper.PredictionModel() |
|
>>> model.predict(['vi liker svart kaffe']) |
|
[{'sent_id': '0', |
|
'text': 'vi liker svart kaffe', |
|
'opinions': [{'Source': [['vi'], ['0:2']], |
|
'Target': [['svart', 'kaffe'], ['9:14', '15:20']], |
|
'Polar_expression': [['liker'], ['3:8']], |
|
'Polarity': 'Positive'}]}] |
|
``` |
|
''' |
|
|
|
|
|
|
|
with gr.Blocks() as demo: |
|
with gr.Row() as row: |
|
text_input = gr.Textbox(label="input") |
|
text_output = gr.Textbox(label="output") |
|
with gr.Row() as row: |
|
text_button = gr.Button("submit") |
|
|
|
text_button.click(fn=predict, inputs=text_input, outputs=text_output) |
|
|
|
gr.Markdown(markdown_text) |
|
|
|
|
|
demo.launch() |
|
|