abdulmatinomotoso commited on
Commit
4ee9c74
·
1 Parent(s): 9d55375

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -6
app.py CHANGED
@@ -14,16 +14,17 @@ model_name = "valurank/finetuned-distilbert-news-article-categorization"
14
  model = AutoModelForSequenceClassification.from_pretrained(model_name)
15
  tokenizer = AutoTokenizer.from_pretrained(model_name)
16
 
 
17
  #Reading in the text file
18
  def read_in_text(url):
19
  with open(url, 'r') as file:
20
  article = file.read()
21
 
22
  return article
23
-
24
- def clean_text(url):
25
- text = url
26
- text = text.encode("ascii", errors="ignore").decode(
27
  "ascii"
28
  ) # remove non-ascii, Chinese characters
29
 
@@ -41,8 +42,8 @@ def clean_text(url):
41
  return text
42
 
43
  #Defining a function to get the category of the news article
44
- def get_category(file):
45
- text = clean_text(file)
46
 
47
  input_tensor = tokenizer.encode(text, return_tensors="pt", truncation=True)
48
  logits = model(input_tensor).logits
 
14
  model = AutoModelForSequenceClassification.from_pretrained(model_name)
15
  tokenizer = AutoTokenizer.from_pretrained(model_name)
16
 
17
+ """
18
  #Reading in the text file
19
  def read_in_text(url):
20
  with open(url, 'r') as file:
21
  article = file.read()
22
 
23
  return article
24
+ """
25
+
26
+ def clean_text(raw_text):
27
+ text = raw_text.encode("ascii", errors="ignore").decode(
28
  "ascii"
29
  ) # remove non-ascii, Chinese characters
30
 
 
42
  return text
43
 
44
  #Defining a function to get the category of the news article
45
+ def get_category(text):
46
+ text = clean_text(text)
47
 
48
  input_tensor = tokenizer.encode(text, return_tensors="pt", truncation=True)
49
  logits = model(input_tensor).logits