Spaces:
Sleeping
Sleeping
File size: 2,148 Bytes
c29d314 a79be1a c29d314 2c5b5b4 1073c6d c29d314 efb3d97 c85007d c29d314 4ee9c74 c29d314 458779c c29d314 4ee9c74 5dd89c8 efb3d97 458779c 5dd89c8 c29d314 4ee9c74 c29d314 efb3d97 c29d314 458779c c29d314 c078293 efb3d97 9d55375 7ff01e2 efb3d97 b56f45b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 |
#importing the necessary libraries
import gradio as gr
import numpy as np
import pandas as pd
import re
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
#Defining the labels of the models
labels = [ "business", "science", "health", "world", "sport", "politics", "entertainment", "technology", "education", "environment", "travel", "lifestyle", "crime", "opinion", "weather", "culture", "art", "food", "automotive", "finance", "international" ]
#Defining the models and tokenuzer
model_name = "valurank/finetuned-distilbert-news-article-categorization"
model = AutoModelForSequenceClassification.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)
"""
#Reading in the text file
def read_in_text(url):
with open(url, 'r') as file:
article = file.read()
return article
"""
def clean_text(raw_text):
text = raw_text.encode("ascii", errors="ignore").decode(
"ascii"
) # remove non-ascii, Chinese characters
text = re.sub(r"\n", " ", text)
text = re.sub(r"\n\n", " ", text)
text = re.sub(r"\t", " ", text)
text = text.strip(" ")
text = re.sub(
" +", " ", text
).strip() # get rid of multiple spaces and replace with a single
text = re.sub(r"Date\s\d{1,2}\/\d{1,2}\/\d{4}", "", text) #remove date
text = re.sub(r"\d{1,2}:\d{2}\s[A-Z]+\s[A-Z]+", "", text) #remove time
return text
#Defining a function to get the category of the news article
def get_category(text):
text = clean_text(text)
input_tensor = tokenizer.encode(text, return_tensors="pt", truncation=True)
logits = model(input_tensor).logits
softmax = torch.nn.Softmax(dim=1)
probs = softmax(logits)[0]
probs = probs.cpu().detach().numpy()
max_index = np.argmax(probs)
emotion = labels[max_index]
return emotion
#Creating the interface for the radio app
demo = gr.Interface(get_category, inputs=gr.Textbox(label="Drop your articles here"),
outputs = "text",
title="News Article Categorization")
#Launching the gradio app
if __name__ == "__main__":
demo.launch(debug=True)
|