Spaces:
Sleeping
Sleeping
File size: 6,453 Bytes
d3bc923 7c9a8a2 5e22f32 5158737 5807bc7 9e6c667 5e22f32 ebc3421 09acac3 5e22f32 d3bc923 ee96a26 5e22f32 ec8a8b1 c5cb10d 11dae3a c5cb10d d3bc923 3ac7ed9 d3bc923 c22db75 c5cb10d ab0a5c5 c5cb10d c22db75 2698c59 09acac3 5e22f32 6de5c93 31dee74 5e22f32 5158737 5e22f32 7b5b35f 5e22f32 7b5b35f 6de5c93 5158737 5e22f32 6de5c93 5158737 6de5c93 5158737 6de5c93 5158737 5e22f32 5158737 f72cb30 5e22f32 ec8a8b1 5a24822 ec8a8b1 5e22f32 228e5ed 5e22f32 ef442b3 2698c59 d3bc923 ba1fa51 09acac3 74c3b09 d3bc923 699ee01 ec8a8b1 31dee74 699ee01 0dd252c 55f0212 ec0ebad 699ee01 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 |
import gradio as gr
import numpy as np
from keras.models import Model
from keras.saving import load_model
from keras.layers import *
from keras.regularizers import L1
from tensorflow.keras.optimizers import RMSprop
from keras.preprocessing.text import Tokenizer
import os
import hashlib
import keras
os.mkdir("cache")
def todset(text: str):
lines = [x.rstrip("\n").lower().split("→") for x in text.split("\n")]
lines = [(x[0].replace("\\n", "\n"), x[1].replace("\\n", "\n")) for x in lines]
responses = []
for i in lines:
if i[1] not in responses:
responses.append(i[1])
dset = {}
for sample in lines:
dset[sample[0]] = responses.index(sample[1])
return (dset, responses)
def hash_str(data: str):
return hashlib.md5(data.encode('utf-8')).hexdigest()
def train(message: str = "", regularization: float = 0.0001, dropout: float = 0.1, learning_rate: float = 0.001, epochs: int = 16, emb_size: int = 128, input_len: int = 16, kernels_count: int = 8, kernel_size: int = 8, left_padding: bool = True, data: str = ""):
data_hash = None
if "→" not in data or "\n" not in data:
if data in os.listdir("cache"):
data_hash = data
else:
return "Dataset example:\nquestion→answer\nquestion→answer\netc."
dset, responses = todset(data)
resps_len = len(responses)
tokenizer = Tokenizer()
tokenizer.fit_on_texts(list(dset.keys()))
vocab_size = len(tokenizer.word_index) + 1
inp_len = input_len
if data_hash is None:
data_hash = hash_str(data)+"_"+str(regularization)+"_"+str(dropout)+"_"+str(learning_rate)+"_"+str(epochs)+"_"+str(emb_size)+"_"+str(inp_len)+"_"+str(kernels_count)+"_"+str(kernel_size)+".keras"
elif message == "!getmodelhash":
return data_hash
else:
inp_len = int(data_hash.split("_")[-3])
if data_hash in os.listdir("cache"):
model = load_model("cache/"+data_hash)
else:
input_layer = Input(shape=(inp_len,))
emb_layer = Embedding(input_dim=vocab_size, output_dim=emb_size, input_length=inp_len)(input_layer)
dropout1_layer = Dropout(dropout)(emb_layer)
attn_layer = MultiHeadAttention(num_heads=4, key_dim=128)(dropout1_layer, dropout1_layer, dropout1_layer)
noise_layer = GaussianNoise(0.1)(attn_layer)
conv1_layer = Conv1D(kernels_count, kernel_size, padding='same', activation='relu', strides=1, input_shape=(64, 128), kernel_regularizer=L1(regularization))(noise_layer)
conv2_layer = Conv1D(16, 4, padding='same', activation='relu', strides=1, kernel_regularizer=L1(regularization))(conv1_layer)
conv3_layer = Conv1D(8, 2, padding='same', activation='relu', strides=1, kernel_regularizer=L1(regularization))(conv2_layer)
flatten_layer = Flatten()(conv3_layer)
attn_flatten_layer = Flatten()(attn_layer)
conv1_flatten_layer = Flatten()(conv1_layer)
conv2_flatten_layer = Flatten()(conv2_layer)
conv3_flatten_layer = Flatten()(conv3_layer)
concat1_layer = Concatenate()([flatten_layer, attn_flatten_layer, conv1_flatten_layer, conv2_flatten_layer, conv3_flatten_layer])
dropout2_layer = Dropout(dropout)(concat1_layer)
dense1_layer = Dense(512, activation="linear", kernel_regularizer=L1(regularization))(dropout2_layer)
prelu1_layer = PReLU()(dense1_layer)
dropout3_layer = Dropout(dropout)(prelu1_layer)
dense2_layer = Dense(256, activation="tanh", kernel_regularizer=L1(regularization))(dropout3_layer)
dropout4_layer = Dropout(dropout)(dense2_layer)
dense3_layer = Dense(256, activation="relu", kernel_regularizer=L1(regularization))(dropout4_layer)
dropout5_layer = Dropout(dropout)(dense3_layer)
dense4_layer = Dense(100, activation="tanh", kernel_regularizer=L1(regularization))(dropout5_layer)
concat2_layer = Concatenate()([dense4_layer, prelu1_layer, attn_flatten_layer, conv1_flatten_layer])
dense4_layer = Dense(resps_len, activation="softmax", kernel_regularizer=L1(regularization))(concat2_layer)
model = Model(inputs=input_layer, outputs=dense4_layer)
X = []
y = []
if left_padding:
for key in dset:
tokens = tokenizer.texts_to_sequences([key,])[0]
X.append(np.array(([0,]*inp_len+list(tokens))[-inp_len:]))
y.append(dset[key])
else:
for key in dset:
tokens = tokenizer.texts_to_sequences([key,])[0]
X.append(np.array(list(tokens)+[0,]*inp_len[:inp_len]))
y.append(dset[key])
X = np.array(X)
y = np.array(y)
model.compile(optimizer=RMSprop(learning_rate=learning_rate), loss="sparse_categorical_crossentropy", metrics=["accuracy",])
model.fit(X, y, epochs=epochs, batch_size=8, workers=4, use_multiprocessing=True)
model.save(f"cache/{data_hash}")
tokens = tokenizer.texts_to_sequences([message,])[0]
prediction = model.predict(np.array([(list(tokens)+[0,]*inp_len)[:inp_len],]))[0]
keras.backend.clear_session()
return responses[np.argmax(prediction)]
if __name__ == "__main__":
iface = gr.Interface(fn=train, inputs=["text",
gr.inputs.Slider(0, 0.01, default=0.0001, step=1e-8, label="Regularization L1"),
gr.inputs.Slider(0, 0.5, default=0.1, step=1e-8, label="Dropout"),
gr.inputs.Slider(1e-8, 0.01, default=0.001, step=1e-8, label="Learning rate"),
gr.inputs.Slider(1, 64, default=32, step=1, label="Epochs"),
gr.inputs.Slider(1, 256, default=100, step=1, label="Embedding size"),
gr.inputs.Slider(1, 128, default=16, step=1, label="Input Length"),
gr.inputs.Slider(1, 128, default=64, step=1, label="Convolution kernel count"),
gr.inputs.Slider(1, 16, default=8, step=1, label="Convolution kernel size"),
gr.inputs.Checkbox(False, label="Use left padding"),
"text"],
outputs="text")
iface.launch()
|