|
"""All app-specific data and disk-IO related functionality implemented here""" |
|
|
|
import subprocess |
|
import joblib |
|
import pandas as pd |
|
import neattext.functions as nfx |
|
import nltk |
|
import spacy |
|
import streamlit as st |
|
|
|
|
|
@st.cache_resource |
|
def load_lang_model(model): |
|
"""Download and then instantiate then language model""" |
|
|
|
nlp = spacy.load(model) |
|
return nlp |
|
|
|
|
|
@st.cache_resource |
|
def load_nltk_punkt(): |
|
"""Downloads NLTK tokenizers""" |
|
nltk.download("punkt") |
|
|
|
|
|
@st.cache_resource |
|
def load_emotions_data(data_file_path): |
|
"""Reads a given data-file and returns a DataFrame""" |
|
return pd.read_csv(data_file_path) |
|
|
|
|
|
def preprocess_data(df): |
|
"""Cleans and transforms data""" |
|
df["Clean_Text"] = df["Text"].apply(nfx.remove_userhandles) |
|
df["Clean_Text"] = df["Clean_Text"].apply(nfx.remove_stopwords) |
|
df["Clean_Text"] = df["Clean_Text"].apply(nfx.remove_urls) |
|
df["Clean_Text"] = df["Clean_Text"].apply(nfx.remove_punctuations) |
|
return df |
|
|
|
|
|
def preprocess_pred_data(input_data): |
|
input_data = nfx.remove_userhandles(input_data) |
|
input_data = nfx.remove_stopwords(input_data) |
|
input_data = nfx.remove_urls(input_data) |
|
input_data = nfx.remove_punctuations(input_data) |
|
return [input_data] |
|
|
|
|
|
def save_model(model_obj, model_file_path): |
|
joblib.dump(value=model_obj, filename=model_file_path) |
|
|
|
|
|
@st.cache_resource |
|
def load_model(model_file_path): |
|
return joblib.load(model_file_path) |
|
|