OmegLSTM
OmegLSTM is a lstm-based character level text generation network trained on chat logs from the now nonfunctional random chat website "Omegle"
WARNING!!!!!!
This model is will produce highly sexual content. Do not if you are a child under 18!!!!
Prompt format
Interests: interests seperated by space, or leave empty
You: ...
Stranger: ...
etc
Architecture
This model uses an (as far as I am aware) novel architecture which stacks the lstms in parralel, then in sequence like in transformer blocks. It has 4,987,763 parameters.
Inference code
You need to place this in a python file in the same folder as the .pt file and then run it. change device = torch.device("cuda:0")
to device = torch.device("cpu")
for CPU inference
import torch
import torch.nn as nn
import numpy as np
from tqdm import tqdm
from einops import rearrange
import pandas as pd
import torch.nn.functional as F
device = torch.device("cuda:0")
class MHL(nn.Module):
def __init__(self, embed_dim,heads):
super(MHL, self).__init__()
self.lstms = nn.ModuleList([nn.LSTM(embed_dim,embed_dim,batch_first=True) for i in range(heads)])
self.ln = nn.LayerNorm(embed_dim)
def forward(self,x):
combo = [lstm(x)[0] for lstm in self.lstms]
combo = torch.sum(torch.stack(combo), dim=0)
return self.ln(combo)
class SLSTM_block(nn.Module):
def __init__(self, embed_dim,heads):
super(SLSTM_block, self).__init__()
self.lstms = MHL(embed_dim,heads)
self.c1 = nn.Conv1d(embed_dim,embed_dim,1,1)
self.c2 = nn.Conv1d(embed_dim,embed_dim,1,1)
self.ac = nn.GELU()
self.norm = nn.LayerNorm(embed_dim)
self.norm2 = nn.LayerNorm(embed_dim)
def forward(self, x):
skip = x
x = self.norm(x)
x= self.lstms(x)
x = x + skip
skip = x
x = self.norm2(x)
x = rearrange(x,"b l c -> b c l")
x = self.c1(x)
x = self.ac(x)
x = self.c2(x)
x = self.ac(x)
x = rearrange(x,"b c l -> b l c")
x = x + skip
return x
class CharGenModel(nn.Module):
def __init__(self, vocab_size, embed_dim,layers,heads):
super(CharGenModel, self).__init__()
# Embedding layer
self.embedding = nn.Embedding(vocab_size, embed_dim)
self.blocks = nn.ModuleList([SLSTM_block(embed_dim,heads) for i in range(layers)])
# Output layer to predict next character
self.fc = nn.Linear(embed_dim, vocab_size)
def forward(self, x):
# x: (batch_size, sequence_length)
# Get embeddings
x = self.embedding(x) # (batch_size, sequence_length, embed_dim)
for block in self.blocks:
x = block(x)
# Output layer
x = self.fc(x) # (batch_size, sequence_length, vocab_size)
return x
# Helper function to generate text
def generate_text(model, char_to_idx, idx_to_char, start_text, gen_length=100, temperature=1.0, top_k=10):
model.eval()
input_seq = torch.tensor([char_to_idx[char] for char in start_text], dtype=torch.long).unsqueeze(0).to(device)
generated_text = start_text
for _ in tqdm(range(gen_length)):
with torch.no_grad():
# Get the output for the last 128 tokens
output = model(input_seq[:, -128:])
# Get the logits for the last timestep
logits = output[:, -1, :]
# Apply temperature scaling
logits = logits / temperature
# Apply top-k filtering
if top_k > 0:
values, indices = torch.topk(logits, top_k)
logits_top_k = torch.zeros_like(logits).fill_(-float('Inf'))
logits_top_k.scatter_(1, indices, values)
logits = logits_top_k
# Apply softmax to get probabilities
probabilities = F.softmax(logits, dim=-1)
# Sample the next character index from the probability distribution
next_char_idx = torch.multinomial(probabilities, num_samples=1).item()
next_char = idx_to_char[next_char_idx]
# Append the generated character to the result
generated_text += next_char
# Update the input sequence
input_seq = torch.cat((input_seq, torch.tensor([[next_char_idx]]).to(device)), dim=1)
model.train()
return generated_text
chars = ['\n', ' ', '!', '"', '#', '$', '&', '(', ')', '*', '+', ',', '-', '.', '/', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', ';', '<', '>', '?', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', ']', '_', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '|', '\x86', '\x8d', '\x91', '\x9f', '©', 'ª', '\xad', '´', 'µ', '¸', '½', '¿', 'æ', 'ï', 'ð', 'č', 'ļ', 'Ś', 'Ÿ', 'Ż', 'জ', 'ত',
'ন', 'া', 'ে', 'ো', '‡', '√', '┐', '�']
char_to_idx = {char: idx for idx, char in enumerate(chars)}
idx_to_char = {idx: char for char, idx in char_to_idx.items()}
vocab_size = len(chars)
embed_dim = 128
model = CharGenModel(vocab_size, embed_dim,6,6).to(device)
model_parameters = filter(lambda p: p.requires_grad, model.parameters())
params = sum([np.prod(p.size()) for p in model_parameters])
print(f"Model has {round(params/1000000,3)}M parameters")
print(f"Model has {params} parameters")
model.load_state_dict(torch.load("OmegLSTM.pt"))
print(generate_text(model,char_to_idx,idx_to_char,"""Interests: \nYou: Hello\nStranger: """,gen_length=128,temperature=0.7,top_k=4))
Training
The model was trained over 10 epochs on the entire dataset. Loss Graph: