carolinefrascasnowflake commited on
Commit
3ae0fe0
1 Parent(s): 3c2cf7c
Files changed (1) hide show
  1. app.py +32 -3
app.py CHANGED
@@ -1,6 +1,16 @@
1
  import streamlit as st
2
  import replicate
3
  import os
 
 
 
 
 
 
 
 
 
 
4
 
5
  # App title
6
  st.set_page_config(page_title="Snowflake Arctic")
@@ -21,7 +31,7 @@ with st.sidebar:
21
 
22
  os.environ['REPLICATE_API_TOKEN'] = replicate_api
23
  st.subheader("Adjust model parameters")
24
- temperature = st.sidebar.slider('temperature', min_value=0.01, max_value=5.0, value=0.6, step=0.01)
25
  top_p = st.sidebar.slider('top_p', min_value=0.01, max_value=1.0, value=0.9, step=0.01)
26
 
27
  # Store LLM-generated responses
@@ -37,7 +47,20 @@ def clear_chat_history():
37
  st.session_state.messages = [{"role": "assistant", "content": "Hi. I'm Arctic, a new, efficient, intelligent, and truly open language model created by Snowflake AI Research. Ask me anything."}]
38
  st.sidebar.button('Clear chat history', on_click=clear_chat_history)
39
 
40
- st.sidebar.caption('Built by [Snowflake](https://snowflake.com/) to demonstrate [Snowflake Arctic](https://www.snowflake.com/blog/arctic-open-and-efficient-foundation-language-models-snowflake).')
 
 
 
 
 
 
 
 
 
 
 
 
 
41
 
42
  # Function for generating Snowflake Arctic response
43
  def generate_arctic_response():
@@ -50,9 +73,15 @@ def generate_arctic_response():
50
 
51
  prompt.append("<|im_start|>assistant")
52
  prompt.append("")
 
53
 
 
 
 
 
 
54
  for event in replicate.stream("snowflake/snowflake-arctic-instruct",
55
- input={"prompt": "\n".join(prompt),
56
  "prompt_template": r"{prompt}",
57
  "temperature": temperature,
58
  "top_p": top_p,
 
1
  import streamlit as st
2
  import replicate
3
  import os
4
+ from transformers import AutoTokenizer
5
+
6
+ # # Assuming you have a specific tokenizers for Llama; if not, use an appropriate one like this
7
+ # tokenizer = AutoTokenizer.from_pretrained("allenai/llama")
8
+
9
+ # text = "Example text to tokenize."
10
+ # tokens = tokenizer.tokenize(text)
11
+ # num_tokens = len(tokens)
12
+
13
+ # print("Number of tokens:", num_tokens)
14
 
15
  # App title
16
  st.set_page_config(page_title="Snowflake Arctic")
 
31
 
32
  os.environ['REPLICATE_API_TOKEN'] = replicate_api
33
  st.subheader("Adjust model parameters")
34
+ temperature = st.sidebar.slider('temperature', min_value=0.01, max_value=5.0, value=0.3, step=0.01)
35
  top_p = st.sidebar.slider('top_p', min_value=0.01, max_value=1.0, value=0.9, step=0.01)
36
 
37
  # Store LLM-generated responses
 
47
  st.session_state.messages = [{"role": "assistant", "content": "Hi. I'm Arctic, a new, efficient, intelligent, and truly open language model created by Snowflake AI Research. Ask me anything."}]
48
  st.sidebar.button('Clear chat history', on_click=clear_chat_history)
49
 
50
+ st.sidebar.caption('Built by [Snowflake](https://snowflake.com/) to demonstrate [Snowflake Arctic](https://www.snowflake.com/blog/arctic-open-and-efficient-foundation-language-models-snowflake). App hosted on [Streamlit Community Cloud](https://streamlit.io/cloud). Model hosted by [Replicate](https://replicate.com/snowflake/snowflake-arctic-instruct).')
51
+
52
+ @st.cache_resource
53
+ def get_tokenizer():
54
+ """Get a tokenizer to make sure we're not sending too much text
55
+ text to the Model. Eventually we will replace this with ArcticTokenizer
56
+ """
57
+ return AutoTokenizer.from_pretrained("huggyllama/llama-7b")
58
+
59
+ def get_num_tokens(prompt):
60
+ """Get the number of tokens in a given prompt"""
61
+ tokenizer = get_tokenizer()
62
+ tokens = tokenizer.tokenize(prompt)
63
+ return len(tokens)
64
 
65
  # Function for generating Snowflake Arctic response
66
  def generate_arctic_response():
 
73
 
74
  prompt.append("<|im_start|>assistant")
75
  prompt.append("")
76
+ prompt_str = "\n".join(prompt)
77
 
78
+ if get_num_tokens(prompt_str) >= 4096:
79
+ st.error("Conversation length too long. Please keep it under 4096 tokens.")
80
+ st.button('Clear chat history', on_click=clear_chat_history, key="clear_chat_history")
81
+ st.stop()
82
+
83
  for event in replicate.stream("snowflake/snowflake-arctic-instruct",
84
+ input={"prompt": prompt_str,
85
  "prompt_template": r"{prompt}",
86
  "temperature": temperature,
87
  "top_p": top_p,