Spaces:

Kc-12
/

TinyStories_Transformer

Sleeping

App Files Files Community

Kc-12 commited on Dec 6, 2023

Commit

b5f6465

1 Parent(s): 87e4ce7

model files uploaded

Browse files

Files changed (4) hide show

app.py +118 -0
better_transformer.py +399 -0
bt_8_LAYERs_100_DATA_PCT_768_EMBD_DIM_epoch_10.pt +3 -0
requirements +70 -0

app.py ADDED Viewed

	@@ -0,0 +1,118 @@

+import streamlit as st
+import time
+from better_transformer import *
+def main():
+    # Enable CUDA if available and load in tokenizer
+    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+    tokenizer, EMPTY_TOKENS = load_tokenizer(device)
+    st.title("Scaling Transformers")
+    st.subheader("UCLA DSU Project, Fall 2023")
+    st.markdown("Daniel Mendelevitch  \n Terry Ming  \n Casey Tattersall  \n Sean Tjoa")
+    st.header("What Are Transformers? 🚗🔄🤖")
+    header_text = """A transformer is a specific type of neural network that uses a mechanism called self-attention to learn the context (and
+        thus meaning) of sequential data. Transformer-based models can be used in many different domains, such as processing language, predicting
+        the weather, or even generating images.  \n\n You might be familiar with ChatGPT, a Transformer-based model which cost over \$100 million to train.  \n In contrast, we spent \$40*.
+        """
+    st.markdown(header_text)
+    st.header("Let's make some stories! 📖")
+    # Input from user
+    user_input = st.text_input("Enter your prompt:", placeholder="Write a prompt to make a story of your own or leave it empty for a random story!").strip()
+    if st.checkbox("Show Prompting Tips"):
+        st.markdown("Our model was trained on the TinyStories dataset, a collection of synthetic short stories generated by GPT-4. These stories only contain words and themes that a typical 3-4 year old would understand.")
+        st.markdown(
+            """
+            - Use simple vocabulary - words and themes that would appear in a children's story
+            - Avoid using idioms - for example, instead of "hit the gym", say "went to the gym"
+            - Include plenty of descriptive adjectives
+            - The model often struggles with names - using common names and only including a person's first name can help
+            """
+        )
+    ## Default values for advanced settings
+    user_seed = 27 # Remove if we're not rigging the "random" demo
+    generation_method = "top-k"
+    specified_k = 5
+    specified_nucleus = 0.5
+    specified_temperature = 0.9
+    max_tokens = 400
+    if st.checkbox("Show Advanced Settings"):
+        user_seed = st.number_input("Randomness Seed:", value = None, step = 1, placeholder="Use to replicate response", min_value = 1)
+        generation_method = st.selectbox("Method of Generation:", ("top-k", "multinomial", "temperature", "greedy", "nucleus"), index = 0).strip()
+        if generation_method == "top-k":
+            specified_k = st.number_input("Value for k:", value = 5, step = 1)
+        if generation_method == "nucleus":
+            specified_nucleus = st.number_input("Value for k:", value = 0.5, step = 0.05, min_value = 0.0, max_value = 1.0)
+        if generation_method == "temperature":
+            specified_temperature = st.number_input("Value for temperature:", value = 0.9, step = 0.05, min_value = 0.0, max_value = 1.0)
+        max_tokens = st.slider('Max Tokens Generated:', 100, 500, 400)
+    ## Settings Clean up
+    if not user_seed:
+        user_seed = 7
+    # model_version = st.radio("Which model would you like to use?", ["smoll", "beeg"])
+    # small_model = load_casey_model(tokenizer, device)
+    model = load_big_model(tokenizer, device)
+    if st.button('Write my story!'):
+        placeholder = st.empty()
+        # if model_version == 'smoll':
+        #     model = load_casey_model(tokenizer, device)
+        # elif model_version == 'beeg':
+        #     model = load_big_model(tokenizer, device)
+        # with placeholder.container():
+        #     st.write("Model Loaded! Preparing to Generate...")
+        with st.spinner(""):
+            result = generate(model, tokenizer, device, method=generation_method, k=specified_k,
+                            p_nucleus=specified_nucleus, temp=specified_temperature, max_new_tokens=max_tokens,
+                            cond=user_input, deterministic=user_seed)
+        streamed_input = ""
+        for word in user_input.split(' '):
+            streamed_input += word
+            with placeholder.container():
+                st.markdown(f"**{streamed_input}**")
+            streamed_input += " "
+            time.sleep(0.1)
+        if user_input != "": ##conditional
+            result = result[len(user_input) + 3 :]
+            streamed_result = f"**{streamed_input[:-1]}**"
+            time.sleep(1)
+        else: ##unconditional
+            streamed_result = ""
+        for word in result.split(' '):
+            streamed_result += word + ' '
+            with placeholder.container():
+                st.write(streamed_result)
+            time.sleep(0.1)
+        if st.button('Clear Output'):
+            placeholder = st.empty()
+if __name__ == "__main__":
+    main()

better_transformer.py ADDED Viewed

	@@ -0,0 +1,399 @@

+import os
+import json
+import random
+import time
+import streamlit as st
+import re
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from transformers import AutoTokenizer
+MODEL_FILE = r'bt_8_LAYERs_100_DATA_PCT_768_EMBD_DIM_epoch_10.pt' ##place model file in same directory as app.py
+# Better Transformer Class –––––––––––––––––––––––––––––––––––––––––––––––
+class MLP(nn.Module):
+    def __init__(self, n_embd, dropout=0.1):
+        super().__init__()
+        self.net = nn.Sequential(
+            nn.Linear(n_embd, 4 * n_embd),
+            nn.GELU(), # replaced ReLU
+            nn.Dropout(p=dropout),
+            nn.Linear(4 * n_embd, n_embd),
+        )
+    def forward(self, x):
+        return self.net(x)
+class MultiHeadAttention(nn.Module):
+    def __init__(self, n_embd, n_head, seq_length, dropout=0.1):
+        super().__init__()
+        self.n_embd = n_embd
+        self.n_head = n_head
+        self.head_dim = n_embd // n_head # Dimension of each head's key, query, and value
+        assert self.head_dim * n_head == self.n_embd, "n_embd must be divisible by n_head"
+        self.seq_length = seq_length
+        self.drop = nn.Dropout(p=dropout)
+        self.query = nn.Linear(n_embd, n_embd, bias=False)
+        self.key = nn.Linear(n_embd, n_embd, bias=False)
+        self.value = nn.Linear(n_embd, n_embd, bias=False)
+        self.out = nn.Linear(n_embd, n_embd, bias=False) # multi-head combining weight matrix
+    def split_heads(self, x):
+        B, S, D = x.size()
+        # split dimension into n_head * head_dim, then transpose the sequence length w/ n_head
+        # output: [B, n_head, S, head_dim]
+        return x.view(B, S, self.n_head, self.head_dim).transpose(1, 2)
+    def combine_heads(self, x):
+        # use permute or transpose to reverse
+        # taking a view earlier may produce a non-contiguous tensor, so we convert back because view needs a contiguous input
+        B, _, S, head_dim = x.size() # _ is n_head which we will merge
+        # output: [B, S, n_embd]
+        return x.transpose(1, 2).contiguous().view(B, S, self.n_embd)
+    def scaled_dot_product(self, q, k, v, dropout, mask=None):
+        # q,k,v are [B, n_head, S, head_dim]
+        # the key transpose sets up batch multiplication s.t. wei = [B, n_head, S, S]
+        wei = q @ k.transpose(-2,-1) / np.sqrt(self.head_dim)
+        # mask is [B, 1, S, S], so simply broadcasted across each head and works as expected
+        if mask is not None:
+          wei = wei.masked_fill(mask, float('-inf'))
+        wei = dropout(F.softmax(wei, dim=-1))
+        out = wei @ v
+        return out
+    def forward(self, x, mask=None):
+        # x: (B, S, n_embd)
+        # Step 1 and 2: Project full query, key, value, then split via reshaping
+        q = self.split_heads(self.query(x))
+        k = self.split_heads(self.key(x))
+        v = self.split_heads(self.value(x))
+        # Step 3: Compute scaled dot-product attention with causal mask
+        # not done. should use generate_mask
+        attn = self.scaled_dot_product(q, k, v, self.drop, mask)
+        # Step 4 and 5: Concatenate attention scores, return projected output matrix
+        out = self.out(self.combine_heads(attn)) # (B, S, n_embd)
+        return out
+class Block(nn.Module):
+    def __init__(self, n_embd, n_head, seq_length, dropout=0.1):
+        super().__init__()
+        self.sa = MultiHeadAttention(n_embd, n_head, seq_length, dropout)
+        self.mlp = MLP(n_embd, dropout)
+        self.ln1 = nn.LayerNorm(n_embd)
+        self.ln2 = nn.LayerNorm(n_embd)
+        # experimentally, apply layer norm before attention/MLP
+        self.drop = nn.Dropout(p=dropout)
+    def forward(self, x, mask):
+        # residual connection (stream)
+        x = x + self.drop(self.sa(self.ln1(x), mask))
+        x = x + self.drop(self.mlp(self.ln2(x)))
+        return x
+class PositionalEncoding(nn.Module):
+  """
+  Formula taken from the original Transformer paper:
+  PE(pos, 2i (even)) = sin(pos/(10000^{2i/d_model}))
+  PE(pos, 2i+1 (odd)) = cos(pos/(10000^{2i/d_model}))
+  See reference for more details:
+  https://kikaben.com/transformers-positional-encoding/
+  """
+  def __init__(self, d_model, max_len):
+      # just set d_model = n_embd and max_len = seq_len
+      super().__init__()
+      position = torch.arange(max_len).unsqueeze(1) # [max_len, 1]
+      divisor = torch.exp(torch.arange(0, d_model, 2) * (-np.log(10000.0) / d_model)) # [d_model / 2, half for each of sin and cos]
+      pe = torch.zeros(max_len, d_model)
+      pe[:, 0::2] = torch.sin(position * divisor) # 0 for second dim or :?
+      pe[:, 1::2] = torch.cos(position * divisor)
+      self.register_buffer('pe', pe) # result: self.pe = [max_len, d_model], mapping each token index to a vector of length d_model as desired
+  def forward(self, x):
+      # x = torch.arange(seq_length) has shape [seq_length], so x.size(0) extracts it, then we index self.pe for the first seq_length mappings
+      # note we do not add the positional embeddings to x itself yet, we simply return them
+      # output = (seq_length, d_model=n_embd)
+      return self.pe[:x.size(0)]
+class BetterTransformer(nn.Module):
+    def __init__(self, vocab_size, seq_length, n_embd, n_head, n_layer, pad_idx, eos_token_id, device, dropout=0.1):
+        super().__init__()
+        self.token_embedding = nn.Embedding(vocab_size, n_embd, padding_idx=pad_idx)
+        # we need to make sure the embedding ignores the padding token right?
+        self.position_embedding = PositionalEncoding(n_embd, seq_length)
+        self.blocks = nn.Sequential(*[Block(n_embd,
+                                            n_head,
+                                            seq_length,
+                                            dropout) for _ in range(n_layer)])
+        self.lm_head = nn.Linear(n_embd, vocab_size)
+        self.drop = nn.Dropout(dropout)
+        self.seq_length = seq_length
+        self.pad_idx = pad_idx
+        self.eos_token_id = eos_token_id
+        self.device = device
+        self.init_params()
+    # optional weight initialization (e.g. Xavier uniform)
+    def init_params(self, default_initialization=False):
+        if not default_initialization:
+            for name, p in self.named_parameters():
+                if p.dim() > 1:
+                    nn.init.xavier_uniform_(p)
+    def get_causal_mask(self, x):
+        """
+        Generates causal mask for decoding
+        """
+        seq_len = x.size(-1) # x = (batch_size x seq_len)
+        attn_shape = (1, seq_len, seq_len)
+        subsequent_mask = np.triu(np.ones(attn_shape), k=1).astype('uint8') # k = 1 shifts the diagonal, so that the main diagonal gets 0's
+        return (torch.from_numpy(subsequent_mask) == 0).to(self.device) # (1, seq_len x seq_len)
+        # True along main diagonal + below, False elsewhere
+    def get_pad_mask(self, x, pad_idx):
+        """
+        Generates padding mask
+        """
+        return (x != pad_idx).unsqueeze(1).unsqueeze(-2).to(self.device)
+        # (batch_size x 1 x 1 x seq_len)
+    def forward(self, x, targets=None):
+        # should alr be int64 tokens but explicit cast in case
+        x = x.to(torch.int64)
+        B, S = x.shape
+        # get mask
+        mask = self.get_pad_mask(x, self.pad_idx) & self.get_causal_mask(x).to(self.device)
+        # mask = (batch_size x 1 x seq_len x seq_len)
+        tok_emb = self.token_embedding(x)
+        pos_emb = self.position_embedding(torch.arange(S))
+        x = self.drop(tok_emb + pos_emb)
+        # (B, S, n_embd)
+        for block in self.blocks:
+            x = block(x, ~mask) # (batch_size, seq_length, n_embd)
+        # negate mask to fill originally False values with -inf later
+        logits = self.lm_head(x) # (batch_size, seq_length, vocab_size)
+        # this code assumes teacher forcing——for each text of seq length S we have S autoregressive predictions,
+        # thus we have B*S logits and B*S targets
+        if targets is None:
+            loss = None
+        else:
+            B, S, C = logits.shape
+            logits = logits.view(B*S, C)
+            targets = targets.view(B*S)
+            loss = F.cross_entropy(logits, targets, ignore_index=self.pad_idx)
+            # we need to make sure loss ignores the padding token right?
+            # this helps it avoid wasting compute on learning PAD -> PAD, etc.
+        return logits, loss
+    def generate(self, input_ids, method='multinomial',
+                 max_new_tokens=1000, temp=None,
+                 num_beams=None, p_nucleus=None, k=None):
+        # TODO: see Huggingface's .generate() function
+        # https://huggingface.co/transformers/v3.4.0/_modules/transformers/generation_utils.html
+        if method == 'temperature':
+            assert (temp is not None) and (0 < temp) and (temp <= 1)
+        # if method == 'num_beams':
+        #     assert isinstance(num_beams, int) and (num_beams) > 0 and (num_beams) < 100
+        if method == 'top-k':
+            assert isinstance(k, int) and (k > 0)
+        # input_ids begins as (batch_size, seq_length)
+        for _ in range(max_new_tokens):
+            if method in ['multinomial', 'temperature', 'greedy', 'nucleus', 'top-k']:
+                # i) Truncate to the most recent `max length` tokens
+                text_cond = input_ids[:, -self.seq_length:]
+                # ii) Retrieve predictions
+                logits, loss = self(text_cond) # no loss because no targets ofc
+                # model output: (batch_size, seq_length, vocab_size)
+                # iii) Find last token logits of each
+                logits = logits[:, -1, :] # (batch_size, vocab_size)
+                # aside: if temperature sampling, divide logits by temp before applying softmax
+                if method == 'temperature':
+                    logits = logits / temp
+                # iv) Take softmax along each
+                probs = F.softmax(logits, dim=-1)
+                # v) Sample next token depending on method
+                if method == 'greedy':
+                    next_idx = probs.argmax(dim=-1).unsqueeze(-1)
+                elif method in ['multinomial', 'temperature', 'nucleus', 'top-k']:
+                    if method == 'nucleus':
+                        assert p_nucleus is not None and (0 < p_nucleus) and (p_nucleus <= 1)
+                        sorted_probs, sorted_idx = probs.sort(dim=-1, descending=True)
+                        prob_cumsum = sorted_probs.cumsum(dim=-1)
+                        idx_remove = prob_cumsum > p_nucleus
+                        # shift one right to ensure the first token is above the threshold
+                        idx_remove[..., 1:] = idx_remove[..., :-1].clone()
+                        idx_remove[..., 0] = False
+                        # retrieve original indices by reverse-sorting
+                        remove_mask = idx_remove.gather(dim=-1,
+                                          index=sorted_idx.argsort(dim=-1))
+                        # ^ specifically, we do this by first argsorting the indices which were returned from argsort. this is crazy y'all
+                        # you can show that this returns indices that when used to subset a sorted array, returns the original array in unsorted order
+                        # https://stackoverflow.com/questions/52127723/pytorch-better-way-to-get-back-original-tensor-order-after-torch-sort
+                        # torch.gather is how we apply a multi-dimensional index
+                        # https://stackoverflow.com/questions/50999977/what-does-the-gather-function-do-in-pytorch-in-layman-terms
+                        probs[remove_mask] = 0
+                    if method == 'top-k':
+                        remove_mask = probs < torch.topk(probs, k).values[..., -1, None] # the topk returns (B, 1), leaving only the
+                        # kth largest probs (i.e. the cutoff value for each). Then mask is same size as probs (B, vocab_size)
+                        probs[remove_mask] = 0
+                    # Sample probabilistically via scores
+                    next_idx = torch.multinomial(probs, num_samples=1) # (batch_size, 1)
+                # vi) Autoregressively append to input_text
+                input_ids = torch.cat((input_ids, next_idx), dim=-1)
+                # end prematurely if <EOS> generated
+                if next_idx == self.eos_token_id:
+                  break
+                # now input_text = (batch_size, seq_length + 1)
+        return input_ids
+# END OF Better Transformer Class –––––––––––––––––––––––––––––––––––––––––––––––
+def set_seed(seed = 42):
+    random.seed(seed)
+    np.random.seed(seed)
+    os.environ['PYTHONHASHSEED'] = str(seed)
+    torch.manual_seed(seed)
+    torch.cuda.manual_seed(seed)
+    # torch.cuda.manual_seed_all(seed) # if multi-GPU
+    torch.backends.cudnn.deterministic=True # only applies to CUDA convolution operations
+    torch.backends.cudnn.benchmark = False
+    # usually CuDNN has heuristics as to which algorithm to pick. cudnn.benchmark benchmarks several algorithms and picks the fastest
+    # often helpful if your input shapes are fixed and not changing a lot during training
+    # however, this means it may pick a different algorithm even when the deterministic flag is set.
+    # As such it is good practice to turn off cudnn.benchmark when turning on cudnn.deterministic
+def load_tokenizer(device):
+    tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-neo-1.3B")
+    if tokenizer.pad_token is None:
+        tokenizer.add_special_tokens({'pad_token': '[PAD]'})
+    EMPTY_TOKENS = torch.full((1,1), tokenizer.bos_token_id, dtype=torch.long).to(device)
+    return tokenizer, EMPTY_TOKENS
+def load_big_model(tokenizer, device):
+    ## Model architecture
+    set_seed(42)
+    N_HEAD = 16
+    N_LAYER = 8
+    N_EMBD = 768
+    VOCAB_SIZE = 50258
+    SEQ_LENGTH = 384
+    model = BetterTransformer(VOCAB_SIZE, SEQ_LENGTH, N_EMBD, N_HEAD, N_LAYER, tokenizer.pad_token_id, tokenizer.eos_token_id, device=device)
+    model.init_params()
+    path = MODEL_FILE
+    model.load_state_dict(torch.load(path, map_location=device)["model_state_dict"])
+    return model
+def generate(model, tokenizer, device, method=None, k=None,
+    p_nucleus=None, temp=None, max_new_tokens=None, cond="", deterministic=None):
+    """
+    Wrapper for generating text using the specified model. Generates unconditionally if cond=None.
+    Inputs:
+      -model: Decoder model to be used for text generation
+      -tokenizer: Compatible tokenizer
+      -device: Device of model (CPU/CUDA)
+      -method (str): Decoding method for text generation ('multinomial', 'temperature', 'greedy', 'nucleus', or 'top-k')
+      -k (int): Positive integer for top-k logits to sample if top-k decoding
+      -p_nucleus (float/int): Cumulative probability cutoff if nucleus/top-p decoding
+      -temp (float/int): Temperature if temperature decoding
+      -max_new_tokens (int): Maximum number of tokens to generate
+      -cond (str=None): If provided, will serve as conditional prompt for text generation
+      -deterministic (int): If deterministic, uses the specified seed for model generation
+    Returns:
+      -res (str): Generated text string
+    """
+    assert method in ['multinomial', 'temperature', 'greedy', 'nucleus', 'top-k'], \
+        "method must be 'multinomial', 'temperature', 'greedy', 'nucleus', or 'top-k'"
+    #if method == 'temperature':
+    #    assert (temp is not None) and isinstance(temp, (int, float)) and (0 < temp) and (temp <= 1), \
+    #    "temp must be defined as a number between (0, 1]"
+    #if method == 'nucleus':
+    #    assert (p_nucleus is not None) and isinstance(p_nucleus, (int, float)) and (0 < p_nucleus) and (p_nucleus <= 1), \
+    #    "p_nucleus must be defined as a number between (0, 1]"
+    ## if method == 'num_beams':
+    ##     assert isinstance(num_beams, int) and (num_beams) > 0 and (num_beams) < 100
+    #if method == 'top-k':
+    #    assert (k is not None) and isinstance(k, int) and (k > 0) and (k < SEQ_LENGTH), \
+    #    "k must be defined as an integer greater than 0 and less than the model sequence length"
+    #if max_new_tokens is None:
+    #    print('No max_new_tokens provided, using a default value of 250\n')
+    #    max_new_tokens = 250
+    #assert (max_new_tokens is not None) and isinstance(max_new_tokens, int) and (max_new_tokens) > 0 and (max_new_tokens) <= 1000, \
+    #"max_new_tokens must be an integer between (0, 1000]"
+    if deterministic is not None:
+        set_seed(deterministic)
+    if cond != "":
+        cond_tokens = tokenizer(cond).input_ids
+        gen_tokens = model.generate(torch.tensor(cond_tokens).unsqueeze(0).long().to(device),
+                                    method=method, k=k, p_nucleus=p_nucleus, temp=temp,
+                                    max_new_tokens=max_new_tokens)[0]
+        # Insert delimiter to indicate where prompt ends
+        gen_prep = torch.zeros(len(gen_tokens)+2).long() # make space for two more tokens for delimiter
+        gen_prep -= 1
+        gen_prep[:len(cond_tokens)] = gen_tokens[:len(cond_tokens)]
+        gen_prep[-(len(gen_tokens)-len(cond_tokens)):] = gen_tokens[-(len(gen_tokens)-len(cond_tokens)):]
+        gen_prep[gen_prep == -1] = torch.tensor(tokenizer.encode(' || ')) # insert tokens for || in between
+        res = tokenizer.decode(gen_prep)
+        res = re.sub(re.escape(tokenizer.bos_token), '', res, count=1) ## Remove end token
+    else:
+        empty_tokens = torch.full((1,1), tokenizer.bos_token_id, dtype=torch.long).to(device)
+        res = tokenizer.batch_decode(model.generate(empty_tokens,
+                                                    method=method, k=k,
+                                                    p_nucleus=p_nucleus, temp=temp,
+                                                    max_new_tokens=max_new_tokens))[0]
+        res = re.sub(re.escape(tokenizer.bos_token), '', res, count=2) ## Remove start and end tokens
+    # Clean up Unicode character issues
+    # 'â€œ' then 'â€' = opening and closing double quotes
+    # 'â€™' = apostrophe
+    res = re.sub(r'â€œ', '"', res)
+    res = re.sub(r'â€™', "'", res)
+    res = re.sub(r'â€', '"', res)
+    res = res + " [END]" ## better end token
+    return res

bt_8_LAYERs_100_DATA_PCT_768_EMBD_DIM_epoch_10.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cb144d619ba6f662571efa9936e655274289d6773d3f4ee37601f16e9484a20d
+size 1608405399

requirements ADDED Viewed

	@@ -0,0 +1,70 @@

+aiohttp==3.9.1
+aiosignal==1.3.1
+altair==4.0.0
+async-timeout==4.0.3
+attrs==23.1.0
+blinker==1.7.0
+cachetools==5.3.2
+certifi==2023.11.17
+charset-normalizer==3.3.2
+click==8.1.7
+datasets==2.15.0
+dill==0.3.7
+entrypoints==0.4
+filelock==3.13.1
+frozenlist==1.4.0
+fsspec==2023.10.0
+gitdb==4.0.11
+GitPython==3.1.40
+huggingface-hub==0.19.4
+idna==3.6
+importlib-metadata==7.0.0
+Jinja2==3.1.2
+jsonschema==4.20.0
+jsonschema-specifications==2023.11.2
+markdown-it-py==3.0.0
+MarkupSafe==2.1.3
+mdurl==0.1.2
+mpmath==1.3.0
+multidict==6.0.4
+multiprocess==0.70.15
+networkx==3.2.1
+numpy==1.26.2
+packaging==23.2
+pandas==2.1.3
+Pillow==10.1.0
+protobuf==3.20.3
+pyarrow==14.0.1
+pyarrow-hotfix==0.6
+pydeck==0.8.1b0
+Pygments==2.17.2
+Pympler==1.0.1
+python-dateutil==2.8.2
+pytz==2023.3.post1
+PyYAML==6.0.1
+referencing==0.31.1
+regex==2023.10.3
+requests==2.31.0
+rich==13.7.0
+rpds-py==0.13.2
+safetensors==0.4.1
+semver==3.0.2
+six==1.16.0
+smmap==5.0.1
+streamlit==1.12.0
+sympy==1.12
+tokenizers==0.15.0
+toml==0.10.2
+toolz==0.12.0
+torch==2.1.1
+tornado==6.4
+tqdm==4.66.1
+transformers==4.35.2
+typing_extensions==4.8.0
+tzdata==2023.3
+tzlocal==5.2
+urllib3==2.1.0
+validators==0.22.0
+xxhash==3.4.1
+yarl==1.9.3
+zipp==3.17.0