|
import pickle |
|
import streamlit as st |
|
|
|
model_data = pickle.load(open('gib_model.pki', 'rb')) |
|
|
|
|
|
import math |
|
import pickle |
|
|
|
accepted_chars = 'abcdefghijklmnopqrstuvwxyz ' |
|
|
|
pos = dict([(char, idx) for idx, char in enumerate(accepted_chars)]) |
|
|
|
|
|
def normalize(line): |
|
""" Return only the subset of chars from accepted_chars. |
|
This helps keep the model relatively small by ignoring punctuation, |
|
infrequently symbols, etc. """ |
|
return [c.lower() for c in line if c.lower() in accepted_chars] |
|
|
|
|
|
def ngram(n, l): |
|
""" Return all n grams from l after normalizing """ |
|
filtered = normalize(l) |
|
for start in range(0, len(filtered) - n + 1): |
|
yield ''.join(filtered[start:start + n]) |
|
|
|
|
|
def get_lines(): |
|
datasets = ['big.txt'] |
|
for filename in datasets: |
|
with open(filename) as fp: |
|
for line in fp: |
|
yield line |
|
|
|
|
|
def avg_transition_prob(l, log_prob_mat): |
|
""" Return the average transition prob from l through log_prob_mat. """ |
|
log_prob = 0.0 |
|
transition_ct = 0 |
|
for a, b, c in ngram(3, l): |
|
log_prob += log_prob_mat[pos[a]][pos[b]][pos[c]] |
|
transition_ct += 1 |
|
|
|
return math.exp(log_prob / (transition_ct or 1)) |
|
|
|
return math.exp(log_prob / (transition_ct or 1)) |
|
|
|
while True: |
|
l = st.text_area('enter a prospection message') |
|
model_mat = model_data['mat'] |
|
threshold = model_data['thresh'] |
|
st.write(avg_transition_prob(l, model_mat) > threshold) |
|
|