File size: 3,131 Bytes
b8cf6ae 16ba103 b8cf6ae 16ba103 6303acb 16ba103 b8cf6ae 6303acb b8cf6ae 16ba103 b8cf6ae 16ba103 db225d0 b8cf6ae 16ba103 b8cf6ae db225d0 b8cf6ae 16ba103 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 |
import streamlit as st
import json
import torch
from transformers import AutoTokenizer
from modelling_cnn import CNNForNER, SentimentCNNModel
# Load the Yoruba NER model
ner_model_name = "./my_model/pytorch_model.bin"
model_ner = "Testys/cnn_yor_ner"
ner_tokenizer = AutoTokenizer.from_pretrained(model_ner)
with open("./my_model/config.json", "r") as f:
ner_config = json.load(f)
ner_model = CNNForNER(
pretrained_model_name=ner_config["pretrained_model_name"],
num_classes=ner_config["num_classes"]
)
ner_model.load_state_dict(torch.load(ner_model_name, map_location=torch.device('cpu')))
ner_model.eval()
# Load the Yoruba sentiment analysis model
sentiment_model_name = "./sent_model/sent_pytorch_model.bin"
model_sent = "Testys/cnn_sent_yor"
sentiment_tokenizer = AutoTokenizer.from_pretrained(model_sent)
with open("./sent_model/config.json", "r") as f:
sentiment_config = json.load(f)
sentiment_model = SentimentCNNModel(
transformer_model_name=sentiment_config["pretrained_model_name"],
num_classes=sentiment_config["num_classes"]
)
sentiment_model.load_state_dict(torch.load(sentiment_model_name, map_location=torch.device('cpu')))
sentiment_model.eval()
def analyze_text(text):
# Tokenize input text for NER
ner_inputs = ner_tokenizer(text, return_tensors="pt")
input_ids = ner_inputs['input_ids']
# Converting token IDs back to tokens
tokens = [ner_tokenizer.convert_ids_to_tokens(id) for id in input_ids.squeeze().tolist()]
# Perform Named Entity Recognition
with torch.no_grad():
ner_outputs = ner_model(**ner_inputs)
ner_predictions = torch.argmax(ner_outputs, dim=-1)[0]
ner_labels = ner_predictions.tolist()
ner_labels = [ner_config["id2labels"][str(label)] for label in ner_labels]
#matching the tokens with the labels
ner_labels = [f"{token}: {label}" for token, label in zip(tokens, ner_labels)]
# Tokenize input text for sentiment analysis
sentiment_inputs = sentiment_tokenizer(text, return_tensors="pt")
# Perform sentiment analysis
with torch.no_grad():
sentiment_outputs = sentiment_model(**sentiment_inputs)
sentiment_probabilities = torch.argmax(sentiment_outputs, dim=1)
sentiment_scores = sentiment_probabilities.tolist()
sentiment_id = sentiment_scores[0]
sentiment = sentiment_config["id2label"][str(sentiment_id)]
return ner_labels, sentiment
def main():
st.title("YorubaCNN Models for NER and Sentiment Analysis")
# Input text
text = st.text_area("Enter Yoruba text", "")
if st.button("Analyze"):
if text:
ner_labels, sentiment_scores = analyze_text(text)
# Display Named Entities
st.subheader("Named Entities")
st.write(ner_labels)
# Display Sentiment Analysis
st.subheader("Sentiment Analysis")
st.write(f"Sentiment: {sentiment_scores}")
if __name__ == "__main__":
main()
|