Sunil Surendra Singh
First commit
769af1a
"""All app-specific data and disk-IO related functionality implemented here"""
import subprocess
import joblib
import pandas as pd
import neattext.functions as nfx
import nltk
import spacy
import streamlit as st
@st.cache_resource
def load_lang_model(model):
"""Download and then instantiate then language model"""
# subprocess.run(["python", "-m", "spacy", "download", model])
nlp = spacy.load(model)
return nlp
@st.cache_resource
def load_nltk_punkt():
"""Downloads NLTK tokenizers"""
nltk.download("punkt")
@st.cache_resource
def load_emotions_data(data_file_path):
"""Reads a given data-file and returns a DataFrame"""
return pd.read_csv(data_file_path)
def preprocess_data(df):
"""Cleans and transforms data"""
df["Clean_Text"] = df["Text"].apply(nfx.remove_userhandles)
df["Clean_Text"] = df["Clean_Text"].apply(nfx.remove_stopwords)
df["Clean_Text"] = df["Clean_Text"].apply(nfx.remove_urls)
df["Clean_Text"] = df["Clean_Text"].apply(nfx.remove_punctuations)
return df
def preprocess_pred_data(input_data):
input_data = nfx.remove_userhandles(input_data)
input_data = nfx.remove_stopwords(input_data)
input_data = nfx.remove_urls(input_data)
input_data = nfx.remove_punctuations(input_data)
return [input_data]
def save_model(model_obj, model_file_path):
joblib.dump(value=model_obj, filename=model_file_path)
@st.cache_resource
def load_model(model_file_path):
return joblib.load(model_file_path)