Spaces:
Sleeping
Sleeping
import numpy as np | |
import pandas as pd | |
from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification | |
import torch | |
from tqdm import tqdm | |
# Global var | |
TEST_SIZE = 2000 | |
FINE_TUNED_MODEL = "andyqin18/finetuned-bert-uncased" | |
# Define analyze function | |
def analyze(text: str): | |
''' | |
Input: Text string | |
Output: Prediction array (6x1) with threshold prob > 0.5 | |
''' | |
encoding = tokenizer(text, return_tensors="pt") | |
encoding = {k: v.to(model.device) for k,v in encoding.items()} | |
outputs = model(**encoding) | |
logits = outputs.logits | |
sigmoid = torch.nn.Sigmoid() | |
probs = sigmoid(logits.squeeze().cpu()) | |
predictions = np.zeros(probs.shape) | |
predictions[np.where(probs >= 0.5)] = 1 | |
return predictions | |
# Read dataset and randomly select testing texts and respective labels | |
df = pd.read_csv("milestone3/comp/train.csv") | |
labels = df.columns[2:] | |
num_label = len(labels) | |
train_texts = df["comment_text"].values | |
train_labels = df[labels].values | |
np.random.seed(1) | |
small_test_texts = np.random.choice(train_texts, size=TEST_SIZE, replace=False) | |
np.random.seed(1) | |
small_test_labels_idx = np.random.choice(train_labels.shape[0], size=TEST_SIZE, replace=False) | |
small_test_labels = train_labels[small_test_labels_idx, :] | |
# Load model and tokenizer. Prepare for analysis loop | |
model = AutoModelForSequenceClassification.from_pretrained(FINE_TUNED_MODEL) | |
tokenizer = AutoTokenizer.from_pretrained(FINE_TUNED_MODEL) | |
total_true = 0 | |
total_success = 0 | |
TP, FP, TN, FN = 0, 0, 0, 0 | |
# Analysis Loop | |
for comment_idx in tqdm(range(TEST_SIZE), desc="Analyzing..."): | |
comment = small_test_texts[comment_idx] | |
target = small_test_labels[comment_idx] | |
result = analyze(comment[:500]) | |
# Counting TP, FP, TN, FN | |
for i in range(num_label): | |
if result[i] == target[i]: | |
if result[i] == 1: | |
TP += 1 | |
else: | |
TN += 1 | |
else: | |
if result[i] == 1: | |
FP += 1 | |
else: | |
FN += 1 | |
# Counting success prediction of 1) each label, 2) label array | |
num_true = (result == target).sum() | |
if num_true == len(labels): | |
total_success += 1 | |
total_true += num_true | |
# Calculate performance | |
performance = {} | |
performance["label_accuracy"] = total_true/(len(labels) * TEST_SIZE) # Success prediction of each label | |
performance["prediction_accuracy"] = total_success/TEST_SIZE # Success prediction of all 6 labels for 1 sample | |
performance["precision"] = TP / (TP + FP) # Label precision | |
performance["recall"] = TP / (TP + FN) # Label recall | |
print(performance) |