OmegLSTM

OmegLSTM is a lstm-based character level text generation network trained on chat logs from the now nonfunctional random chat website "Omegle"

WARNING!!!!!!

This model is will produce highly sexual content. Do not if you are a child under 18!!!!

Prompt format

Interests: interests seperated by space, or leave empty
You: ...
Stranger: ...
etc

Architecture

This model uses an (as far as I am aware) novel architecture which stacks the lstms in parralel, then in sequence like in transformer blocks. It has 4,987,763 parameters.

Inference code

You need to place this in a python file in the same folder as the .pt file and then run it. change device = torch.device("cuda:0") to device = torch.device("cpu") for CPU inference

import torch
import torch.nn as nn
import numpy as np
from tqdm import tqdm
from einops import rearrange
import pandas as pd
import torch.nn.functional as F
device = torch.device("cuda:0")

class MHL(nn.Module):
    def __init__(self, embed_dim,heads):
        super(MHL, self).__init__()
        self.lstms = nn.ModuleList([nn.LSTM(embed_dim,embed_dim,batch_first=True) for i in range(heads)])
        self.ln = nn.LayerNorm(embed_dim)
    def forward(self,x):
        combo = [lstm(x)[0] for lstm in self.lstms]
        combo = torch.sum(torch.stack(combo), dim=0)
        return self.ln(combo)
class SLSTM_block(nn.Module):
    def __init__(self, embed_dim,heads):
        super(SLSTM_block, self).__init__()
        self.lstms = MHL(embed_dim,heads)
        self.c1 = nn.Conv1d(embed_dim,embed_dim,1,1)
        self.c2 = nn.Conv1d(embed_dim,embed_dim,1,1)
        self.ac = nn.GELU()
        self.norm = nn.LayerNorm(embed_dim)
        self.norm2 = nn.LayerNorm(embed_dim)
    def forward(self, x):
        skip = x
        x = self.norm(x)
        x= self.lstms(x)
        x = x + skip
        skip = x
        x = self.norm2(x)
        x = rearrange(x,"b l c -> b c l")
        x = self.c1(x)
        x = self.ac(x)
        x = self.c2(x)
        x = self.ac(x)
        x = rearrange(x,"b c l -> b l c")
        x = x + skip
        return x

class CharGenModel(nn.Module):
    def __init__(self, vocab_size, embed_dim,layers,heads):
        super(CharGenModel, self).__init__()
        
        # Embedding layer
        self.embedding = nn.Embedding(vocab_size, embed_dim)
        
        self.blocks = nn.ModuleList([SLSTM_block(embed_dim,heads) for i in range(layers)])
        # Output layer to predict next character
        self.fc = nn.Linear(embed_dim, vocab_size)
    
    def forward(self, x):
        # x: (batch_size, sequence_length)
        
        # Get embeddings
        x = self.embedding(x)  # (batch_size, sequence_length, embed_dim)
        
        for block in self.blocks:
            x = block(x)

        # Output layer
        x = self.fc(x)  # (batch_size, sequence_length, vocab_size)
        return x
# Helper function to generate text
def generate_text(model, char_to_idx, idx_to_char, start_text, gen_length=100, temperature=1.0, top_k=10):
    model.eval()
    input_seq = torch.tensor([char_to_idx[char] for char in start_text], dtype=torch.long).unsqueeze(0).to(device)
    generated_text = start_text
    
    for _ in tqdm(range(gen_length)):
        with torch.no_grad():
            # Get the output for the last 128 tokens
            output = model(input_seq[:, -128:])
            
            # Get the logits for the last timestep
            logits = output[:, -1, :]
            
            # Apply temperature scaling
            logits = logits / temperature
            
            # Apply top-k filtering
            if top_k > 0:
                values, indices = torch.topk(logits, top_k)
                logits_top_k = torch.zeros_like(logits).fill_(-float('Inf'))
                logits_top_k.scatter_(1, indices, values)
                logits = logits_top_k
            
            # Apply softmax to get probabilities
            probabilities = F.softmax(logits, dim=-1)
            
            # Sample the next character index from the probability distribution
            next_char_idx = torch.multinomial(probabilities, num_samples=1).item()
            next_char = idx_to_char[next_char_idx]
            
            # Append the generated character to the result
            generated_text += next_char
            
            # Update the input sequence
            input_seq = torch.cat((input_seq, torch.tensor([[next_char_idx]]).to(device)), dim=1)
    
    model.train()
    return generated_text



chars = ['\n', ' ', '!', '"', '#', '$', '&', '(', ')', '*', '+', ',', '-', '.', '/', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', ';', '<', '>', '?', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', ']', '_', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '|', '\x86', '\x8d', '\x91', '\x9f', '©', 'ª', '\xad', '´', 'µ', '¸', '½', '¿', 'æ', 'ï', 'ð', 'č', 'ļ', 'Ś', 'Ÿ', 'Ż', 'জ', 'ত', 
'ন', 'া', 'ে', 'ো', '‡', '√', '┐', '�']
char_to_idx = {char: idx for idx, char in enumerate(chars)}
idx_to_char = {idx: char for char, idx in char_to_idx.items()}


vocab_size = len(chars)
embed_dim = 128
model = CharGenModel(vocab_size, embed_dim,6,6).to(device)


model_parameters = filter(lambda p: p.requires_grad, model.parameters())
params = sum([np.prod(p.size()) for p in model_parameters])
print(f"Model has {round(params/1000000,3)}M parameters")
print(f"Model has {params} parameters")

model.load_state_dict(torch.load("OmegLSTM.pt"))
print(generate_text(model,char_to_idx,idx_to_char,"""Interests: \nYou: Hello\nStranger: """,gen_length=128,temperature=0.7,top_k=4))

Training

The model was trained over 10 epochs on the entire dataset. Loss Graph:

takeraparterer
/

OmegLSTM