File size: 1,504 Bytes
769af1a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
"""All app-specific data and disk-IO related functionality implemented here"""

import subprocess
import joblib
import pandas as pd
import neattext.functions as nfx
import nltk
import spacy
import streamlit as st


@st.cache_resource
def load_lang_model(model):
    """Download and then instantiate then language model"""
    # subprocess.run(["python", "-m", "spacy", "download", model])
    nlp = spacy.load(model)
    return nlp


@st.cache_resource
def load_nltk_punkt():
    """Downloads NLTK tokenizers"""
    nltk.download("punkt")


@st.cache_resource
def load_emotions_data(data_file_path):
    """Reads a given data-file and returns a DataFrame"""
    return pd.read_csv(data_file_path)


def preprocess_data(df):
    """Cleans and transforms data"""
    df["Clean_Text"] = df["Text"].apply(nfx.remove_userhandles)
    df["Clean_Text"] = df["Clean_Text"].apply(nfx.remove_stopwords)
    df["Clean_Text"] = df["Clean_Text"].apply(nfx.remove_urls)
    df["Clean_Text"] = df["Clean_Text"].apply(nfx.remove_punctuations)
    return df


def preprocess_pred_data(input_data):
    input_data = nfx.remove_userhandles(input_data)
    input_data = nfx.remove_stopwords(input_data)
    input_data = nfx.remove_urls(input_data)
    input_data = nfx.remove_punctuations(input_data)
    return [input_data]


def save_model(model_obj, model_file_path):
    joblib.dump(value=model_obj, filename=model_file_path)


@st.cache_resource
def load_model(model_file_path):
    return joblib.load(model_file_path)