Spaces:

MoyAI
/

scn1

Sleeping

App Files Files Community

scn1 / main.py

ierhon

Upload main.py

60aeba2 verified 2 months ago

raw

history blame

8.16 kB

	import numpy as np
	from numba import njit
	from tqdm import tqdm
	import math
	import random
	from matplotlib import pyplot as plt
	import pickle


	# whitelist = "ёйцукенгшщзхъфывапролджэячсмитьбю "

	def text_to_arr(text: str):
	return np.array([ord(x) for x in text.lower()])

	@njit
	def longest_common_substring(s1, s2):
	current_match_start = -1
	current_match_end = -1

	best_match_start = current_match_start
	best_match_end = current_match_end

	min_len = min(len(s1), len(s2))
	for i in range(min_len):
	if s1[i] == s2[i]:
	current_match_start = current_match_end = i
	j = 0
	while s1[i+j] == s2[i+j] and i+j < min_len:
	j += 1
	current_match_end = current_match_start + j

	if current_match_end - current_match_start > best_match_end - best_match_start:
	best_match_start = current_match_start
	best_match_end = current_match_end

	return s1[best_match_start:best_match_end]

	def not_found_in(q, data):
	for l in data:
	count = 0
	lq = len(q)-1
	for v in l:
	if v == q[count]:
	count += 1
	else:
	count = 0
	if count == lq:
	return False
	return True

	class Layer:
	def __init__(self, mem_len: int = 100, max_size: int = 6):
	self.mem_len = mem_len
	self.common_strings = []
	self.previously_seen = []
	self.max_size = max_size+1
	def __call__(self, input_arr, training: bool = True):
	o = []
	li = len(input_arr)
	for i in range(li):
	for y, cs in enumerate(self.common_strings):
	if (i+cs.shape[0]) <= li and (input_arr[i:i+cs.shape[0]] == cs).all():
	o.append(y)
	if training:
	cl = 0
	n = None
	for i, line in enumerate(self.previously_seen):
	t = longest_common_substring(input_arr, line)
	l = len(t)
	if l > cl and l < self.max_size:
	cl = l
	n = i
	r = t
	if self.previously_seen != []:
	if n is not None and len(r) > 1:
	self.previously_seen.pop(n)
	if not_found_in(r, self.common_strings):
	self.common_strings.append(r)
	self.previously_seen = self.previously_seen[-self.mem_len:]
	self.previously_seen.append(input_arr)
	return o

	def comparefilter(f1, f2):
	o = 0
	hss = 0.5
	for k in f1:
	if k in f2 and k in f1:
	o += np.sum((f2[k] > hss)==(f1[k] > hss))
	return (o >= len(f1)*hss)

	class StrConv:
	def __init__(self, filters: int, size: int = 4):
	self.filter_amount = filters
	self.filters = [{} for _ in range(filters)] # [{43: [3 2 0 3]},]
	self.bias = np.zeros((self.filter_amount,))
	self.size = 3
	def regularize(self):
	for n, f in enumerate(self.filters):
	for f2 in self.filters[:n]:
	if random.randint(0, 100) < 10 and comparefilter(f, f2):
	self.filters[n] = {}
	def __call__(self, input_arr, training: bool = True, debug=False):
	if len(input_arr) <= self.size:
	return []
	o = np.zeros((input_arr.shape[0]-self.size, self.filter_amount))
	for i in range(input_arr.shape[0]-self.size):
	for n, c in enumerate(input_arr[i:i+self.size]):
	for fn, f in enumerate(self.filters):
	if c in f:
	o[i, fn] += f[c][n]
	o += self.bias
	m = np.max(np.abs(o))
	if m != 0: o /= m
	if debug:
	plt.imshow(o)
	plt.show()
	if training:
	for i in range(input_arr.shape[0]-self.size):
	for n, c in enumerate(input_arr[i:i+self.size]):
	for fn, f in enumerate(self.filters):
	if c in f:
	# s = np.sum(f[c])
	# if s > 1000:
	# f[c] = (f[c]/(s/(self.size*1000))).astype(np.int64)
	self.filters[fn][c][n] = o[i, fn]0.1+f[c][n]0.9
	else:
	f[c] = np.random.uniform(0, 1, (self.size))
	f[c][n] = o[i, fn]
	# for t in range(self.size, input_arr.shape[0]):
	# for f in range(self.filter_amount):
	# self.filters[f] = o[t-self.size, f]
	"""
	s = 0
	for a in self.filters:
	for b in a:
	s += np.sum(b)
	if s > 100:
	s /= self.filter_amount
	for a in self.filters:
	for b in a:
	a[b] = (a[b]/s).astype(dtype=np.int64)
	"""
	self.bias -= np.sum(o, axis=0)# / o.shape[0]
	# print(o)
	maxed = np.zeros((o.shape[0],)) # could have different outputs, not only max of o, like o>(self.size//2) or o without processing
	for i in range(maxed.shape[0]):
	maxed[i] = np.argmax(o[i])
	return maxed

	with open("dataset.txt", "r") as f:
	lines = f.read().rstrip("\n").split("\n")[:40000]

	w = {}
	w2 = {}

	c = 0

	#layer = Layer(mem_len=1000, max_size=4)
	#layer2 = Layer(mem_len=1000, max_size=6)

	with open("l1_large.pckl", "rb") as f: layer = pickle.load(f)
	with open("l2_large.pckl", "rb") as f: layer2 = pickle.load(f)
	with open("w1_large.pckl", "rb") as f: w = pickle.load(f)
	with open("w2_large.pckl", "rb") as f: w2 = pickle.load(f)
	"""
	for n, text in tqdm(enumerate(lines[:-1])):
	if text.strip() != "" and lines[n+1].strip() != "" and text != lines[n+1]:
	t = layer(text_to_arr(text), training=True)
	t = layer(text_to_arr(text), training=False)
	c += 1
	# if c == 10:
	# c = 0
	# layer.regularize()
	# layer2.regularize()
	if len(t) != 0:
	t2 = layer2(np.array(t), training=True)
	t2 = layer2(np.array(t), training=False)
	for a in t2:
	if a in w2:
	w2[a].append(n+1)
	else:
	w2[a] = [n+1,]
	for a in t:
	if a in w:
	w[a].append(n+1)
	else:
	w[a] = [n+1,]

	for n, text in tqdm(enumerate(lines[:200])):
	if text.strip() != "" and lines[n+1].strip() != "" and text != lines[n+1]:
	t = layer(text_to_arr(text), training=True)
	t = layer(text_to_arr(text), training=False)
	c += 1
	# if c == 10:
	# c = 0
	# layer.regularize()
	# layer2.regularize()
	if len(t) != 0:
	t2 = layer2(np.array(t), training=True)
	t2 = layer2(np.array(t), training=False)
	for a in t2:
	if a in w2:
	w2[a].append(n+1)
	else:
	w2[a] = [n+1,]
	for a in t:
	if a in w:
	w[a].append(n+1)
	else:
	w[a] = [n+1,]

	with open("l1_large.pckl", "wb") as f: pickle.dump(layer, f)
	with open("l2_large.pckl", "wb") as f: pickle.dump(layer2, f)
	with open("w1_large.pckl", "wb") as f: pickle.dump(w, f)
	with open("w2_large.pckl", "wb") as f: pickle.dump(w2, f)
	"""
	# print(layer.filters)

	#for arr in layer.common_strings:
	# print(''.join([chr(a) for a in arr]))

	print(len(lines), "responses available")

	import threeletterai

	while True:
	msg = input("Message: ")
	if len(msg) < 4:
	print(threeletterai.getresp(msg))
	continue
	processed = layer(text_to_arr(msg), training=False)
	processed = np.array(processed)
	processed2 = layer2(processed, training=False)
	# print(processed)
	# print(processed2)
	o = np.zeros(len(lines), dtype=np.int16)
	for a in processed:
	if a in w:
	o[w[a]] += 1
	for a in processed2:
	if a in w2:
	o[w2[a]] += 1
	print(lines[np.argmax(o)], f" {np.max(o)} sure")