|
import streamlit as st |
|
import json |
|
import torch |
|
from transformers import AutoTokenizer |
|
from modelling_cnn import CNNForNER, SentimentCNNModel |
|
|
|
|
|
ner_model_name = "./my_model/pytorch_model.bin" |
|
model_ner = "Testys/cnn_yor_ner" |
|
ner_tokenizer = AutoTokenizer.from_pretrained(model_ner) |
|
with open("./my_model/config.json", "r") as f: |
|
ner_config = json.load(f) |
|
|
|
ner_model = CNNForNER( |
|
pretrained_model_name=ner_config["pretrained_model_name"], |
|
num_classes=ner_config["num_classes"] |
|
) |
|
ner_model.load_state_dict(torch.load(ner_model_name, map_location=torch.device('cpu'))) |
|
ner_model.eval() |
|
|
|
|
|
sentiment_model_name = "./sent_model/sent_pytorch_model.bin" |
|
model_sent = "Testys/cnn_sent_yor" |
|
sentiment_tokenizer = AutoTokenizer.from_pretrained(model_sent) |
|
|
|
with open("./sent_model/config.json", "r") as f: |
|
sentiment_config = json.load(f) |
|
|
|
sentiment_model = SentimentCNNModel( |
|
transformer_model_name=sentiment_config["pretrained_model_name"], |
|
num_classes=sentiment_config["num_classes"] |
|
) |
|
|
|
sentiment_model.load_state_dict(torch.load(sentiment_model_name, map_location=torch.device('cpu'))) |
|
sentiment_model.eval() |
|
|
|
|
|
def analyze_text(text): |
|
|
|
ner_inputs = ner_tokenizer(text, return_tensors="pt") |
|
|
|
input_ids = ner_inputs['input_ids'] |
|
|
|
|
|
tokens = [ner_tokenizer.convert_ids_to_tokens(id) for id in input_ids.squeeze().tolist()] |
|
|
|
|
|
|
|
with torch.no_grad(): |
|
ner_outputs = ner_model(**ner_inputs) |
|
|
|
ner_predictions = torch.argmax(ner_outputs, dim=-1)[0] |
|
ner_labels = ner_predictions.tolist() |
|
ner_labels = [ner_config["id2labels"][str(label)] for label in ner_labels] |
|
|
|
|
|
ner_labels = [f"{token}: {label}" for token, label in zip(tokens, ner_labels)] |
|
|
|
|
|
sentiment_inputs = sentiment_tokenizer(text, return_tensors="pt") |
|
|
|
|
|
with torch.no_grad(): |
|
sentiment_outputs = sentiment_model(**sentiment_inputs) |
|
sentiment_probabilities = torch.argmax(sentiment_outputs, dim=1) |
|
sentiment_scores = sentiment_probabilities.tolist() |
|
sentiment_id = sentiment_scores[0] |
|
sentiment = sentiment_config["id2label"][str(sentiment_id)] |
|
|
|
return ner_labels, sentiment |
|
|
|
def main(): |
|
st.title("YorubaCNN Models for NER and Sentiment Analysis") |
|
|
|
|
|
text = st.text_area("Enter Yoruba text", "") |
|
|
|
if st.button("Analyze"): |
|
if text: |
|
ner_labels, sentiment_scores = analyze_text(text) |
|
|
|
|
|
st.subheader("Named Entities") |
|
st.write(ner_labels) |
|
|
|
|
|
st.subheader("Sentiment Analysis") |
|
st.write(f"Sentiment: {sentiment_scores}") |
|
|
|
if __name__ == "__main__": |
|
main() |
|
|