ierhon commited on
Commit
edd38af
1 Parent(s): 68135cd

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +103 -0
app.py ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ from numba import njit
3
+ import math
4
+ import random
5
+ import pickle
6
+ import gradio as gr
7
+
8
+ def text_to_arr(text: str):
9
+ return np.array([ord(x) for x in text.lower()])
10
+
11
+ @njit
12
+ def longest_common_substring(s1, s2):
13
+ current_match_start = -1
14
+ current_match_end = -1
15
+
16
+ best_match_start = current_match_start
17
+ best_match_end = current_match_end
18
+
19
+ min_len = min(len(s1), len(s2))
20
+ for i in range(min_len):
21
+ if s1[i] == s2[i]:
22
+ current_match_start = current_match_end = i
23
+ j = 0
24
+ while s1[i+j] == s2[i+j] and i+j < min_len:
25
+ j += 1
26
+ current_match_end = current_match_start + j
27
+
28
+ if current_match_end - current_match_start > best_match_end - best_match_start:
29
+ best_match_start = current_match_start
30
+ best_match_end = current_match_end
31
+
32
+ return s1[best_match_start:best_match_end]
33
+
34
+ def not_found_in(q, data):
35
+ for l in data:
36
+ count = 0
37
+ lq = len(q)-1
38
+ for v in l:
39
+ if v == q[count]:
40
+ count += 1
41
+ else:
42
+ count = 0
43
+ if count == lq:
44
+ return False
45
+ return True
46
+
47
+ class Layer:
48
+ def __init__(self, mem_len: int = 100, max_size: int = 6):
49
+ self.mem_len = mem_len
50
+ self.common_strings = []
51
+ self.previously_seen = []
52
+ self.max_size = max_size+1
53
+ def __call__(self, input_arr, training: bool = True):
54
+ o = []
55
+ li = len(input_arr)
56
+ for i in range(li):
57
+ for y, common_substring in enumerate(self.common_strings):
58
+ if (i+common_substring.shape[0]) <= li and (input_arr[i:i+common_substring.shape[0]] == common_substring).all():
59
+ o.append(y)
60
+ if training:
61
+ current_max_len = 0
62
+ n = None
63
+ for i, line in enumerate(self.previously_seen):
64
+ t = longest_common_substring(input_arr, line)
65
+ l = len(t)
66
+ if l > current_max_len and l < self.max_size:
67
+ current_max_len = l
68
+ n = i
69
+ result = t
70
+ if self.previously_seen != []:
71
+ if n is not None and len(result) > 1:
72
+ self.previously_seen.pop(n)
73
+ if not_found_in(result, self.common_strings):
74
+ self.common_strings.append(result)
75
+ self.previously_seen = self.previously_seen[-self.mem_len:]
76
+ self.previously_seen.append(input_arr)
77
+ return o
78
+
79
+ with open("l1_large.pckl", "rb") as f: layer = pickle.load(f)
80
+ with open("l2_large.pckl", "rb") as f: layer2 = pickle.load(f)
81
+ with open("w1_large.pckl", "rb") as f: w = pickle.load(f)
82
+ with open("w2_large.pckl", "rb") as f: w2 = pickle.load(f)
83
+
84
+ def generate(msg):
85
+ if len(msg) < 4:
86
+ return threeletterai.getresp(msg)
87
+ continue
88
+ processed = layer(text_to_arr(msg), training=False)
89
+ processed = np.array(processed)
90
+ processed2 = layer2(processed, training=False)
91
+ # print(processed)
92
+ # print(processed2)
93
+ o = np.zeros(len(lines), dtype=np.int16)
94
+ for a in processed:
95
+ if a in w:
96
+ o[w[a]] += 1
97
+ for a in processed2:
98
+ if a in w2:
99
+ o[w2[a]] += 1
100
+ return lines[np.argmax(o)]
101
+
102
+ app = gr.Interface(fn=generate, inputs="text", outputs="text")
103
+ app.launch()