scn1 / main.py
ierhon's picture
Upload main.py
60aeba2 verified
raw
history blame
8.16 kB
import numpy as np
from numba import njit
from tqdm import tqdm
import math
import random
from matplotlib import pyplot as plt
import pickle
# whitelist = "ёйцукенгшщзхъфывапролджэячсмитьбю "
def text_to_arr(text: str):
return np.array([ord(x) for x in text.lower()])
@njit
def longest_common_substring(s1, s2):
current_match_start = -1
current_match_end = -1
best_match_start = current_match_start
best_match_end = current_match_end
min_len = min(len(s1), len(s2))
for i in range(min_len):
if s1[i] == s2[i]:
current_match_start = current_match_end = i
j = 0
while s1[i+j] == s2[i+j] and i+j < min_len:
j += 1
current_match_end = current_match_start + j
if current_match_end - current_match_start > best_match_end - best_match_start:
best_match_start = current_match_start
best_match_end = current_match_end
return s1[best_match_start:best_match_end]
def not_found_in(q, data):
for l in data:
count = 0
lq = len(q)-1
for v in l:
if v == q[count]:
count += 1
else:
count = 0
if count == lq:
return False
return True
class Layer:
def __init__(self, mem_len: int = 100, max_size: int = 6):
self.mem_len = mem_len
self.common_strings = []
self.previously_seen = []
self.max_size = max_size+1
def __call__(self, input_arr, training: bool = True):
o = []
li = len(input_arr)
for i in range(li):
for y, cs in enumerate(self.common_strings):
if (i+cs.shape[0]) <= li and (input_arr[i:i+cs.shape[0]] == cs).all():
o.append(y)
if training:
cl = 0
n = None
for i, line in enumerate(self.previously_seen):
t = longest_common_substring(input_arr, line)
l = len(t)
if l > cl and l < self.max_size:
cl = l
n = i
r = t
if self.previously_seen != []:
if n is not None and len(r) > 1:
self.previously_seen.pop(n)
if not_found_in(r, self.common_strings):
self.common_strings.append(r)
self.previously_seen = self.previously_seen[-self.mem_len:]
self.previously_seen.append(input_arr)
return o
def comparefilter(f1, f2):
o = 0
hss = 0.5
for k in f1:
if k in f2 and k in f1:
o += np.sum((f2[k] > hss)==(f1[k] > hss))
return (o >= len(f1)*hss)
class StrConv:
def __init__(self, filters: int, size: int = 4):
self.filter_amount = filters
self.filters = [{} for _ in range(filters)] # [{43: [3 2 0 3]},]
self.bias = np.zeros((self.filter_amount,))
self.size = 3
def regularize(self):
for n, f in enumerate(self.filters):
for f2 in self.filters[:n]:
if random.randint(0, 100) < 10 and comparefilter(f, f2):
self.filters[n] = {}
def __call__(self, input_arr, training: bool = True, debug=False):
if len(input_arr) <= self.size:
return []
o = np.zeros((input_arr.shape[0]-self.size, self.filter_amount))
for i in range(input_arr.shape[0]-self.size):
for n, c in enumerate(input_arr[i:i+self.size]):
for fn, f in enumerate(self.filters):
if c in f:
o[i, fn] += f[c][n]
o += self.bias
m = np.max(np.abs(o))
if m != 0: o /= m
if debug:
plt.imshow(o)
plt.show()
if training:
for i in range(input_arr.shape[0]-self.size):
for n, c in enumerate(input_arr[i:i+self.size]):
for fn, f in enumerate(self.filters):
if c in f:
# s = np.sum(f[c])
# if s > 1000:
# f[c] = (f[c]/(s/(self.size*1000))).astype(np.int64)
self.filters[fn][c][n] = o[i, fn]*0.1+f[c][n]*0.9
else:
f[c] = np.random.uniform(0, 1, (self.size))
f[c][n] = o[i, fn]
# for t in range(self.size, input_arr.shape[0]):
# for f in range(self.filter_amount):
# self.filters[f] = o[t-self.size, f]
"""
s = 0
for a in self.filters:
for b in a:
s += np.sum(b)
if s > 100:
s /= self.filter_amount
for a in self.filters:
for b in a:
a[b] = (a[b]/s).astype(dtype=np.int64)
"""
self.bias -= np.sum(o, axis=0)# / o.shape[0]
# print(o)
maxed = np.zeros((o.shape[0],)) # could have different outputs, not only max of o, like o>(self.size//2) or o without processing
for i in range(maxed.shape[0]):
maxed[i] = np.argmax(o[i])
return maxed
with open("dataset.txt", "r") as f:
lines = f.read().rstrip("\n").split("\n")[:40000]
w = {}
w2 = {}
c = 0
#layer = Layer(mem_len=1000, max_size=4)
#layer2 = Layer(mem_len=1000, max_size=6)
with open("l1_large.pckl", "rb") as f: layer = pickle.load(f)
with open("l2_large.pckl", "rb") as f: layer2 = pickle.load(f)
with open("w1_large.pckl", "rb") as f: w = pickle.load(f)
with open("w2_large.pckl", "rb") as f: w2 = pickle.load(f)
"""
for n, text in tqdm(enumerate(lines[:-1])):
if text.strip() != "" and lines[n+1].strip() != "" and text != lines[n+1]:
t = layer(text_to_arr(text), training=True)
t = layer(text_to_arr(text), training=False)
c += 1
# if c == 10:
# c = 0
# layer.regularize()
# layer2.regularize()
if len(t) != 0:
t2 = layer2(np.array(t), training=True)
t2 = layer2(np.array(t), training=False)
for a in t2:
if a in w2:
w2[a].append(n+1)
else:
w2[a] = [n+1,]
for a in t:
if a in w:
w[a].append(n+1)
else:
w[a] = [n+1,]
for n, text in tqdm(enumerate(lines[:200])):
if text.strip() != "" and lines[n+1].strip() != "" and text != lines[n+1]:
t = layer(text_to_arr(text), training=True)
t = layer(text_to_arr(text), training=False)
c += 1
# if c == 10:
# c = 0
# layer.regularize()
# layer2.regularize()
if len(t) != 0:
t2 = layer2(np.array(t), training=True)
t2 = layer2(np.array(t), training=False)
for a in t2:
if a in w2:
w2[a].append(n+1)
else:
w2[a] = [n+1,]
for a in t:
if a in w:
w[a].append(n+1)
else:
w[a] = [n+1,]
with open("l1_large.pckl", "wb") as f: pickle.dump(layer, f)
with open("l2_large.pckl", "wb") as f: pickle.dump(layer2, f)
with open("w1_large.pckl", "wb") as f: pickle.dump(w, f)
with open("w2_large.pckl", "wb") as f: pickle.dump(w2, f)
"""
# print(layer.filters)
#for arr in layer.common_strings:
# print(''.join([chr(a) for a in arr]))
print(len(lines), "responses available")
import threeletterai
while True:
msg = input("Message: ")
if len(msg) < 4:
print(threeletterai.getresp(msg))
continue
processed = layer(text_to_arr(msg), training=False)
processed = np.array(processed)
processed2 = layer2(processed, training=False)
# print(processed)
# print(processed2)
o = np.zeros(len(lines), dtype=np.int16)
for a in processed:
if a in w:
o[w[a]] += 1
for a in processed2:
if a in w2:
o[w2[a]] += 1
print(lines[np.argmax(o)], f" {np.max(o)} sure")