crang commited on
Commit
1970af8
1 Parent(s): 0bcc140

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -21
app.py CHANGED
@@ -8,9 +8,9 @@ import time
8
  import subprocess
9
 
10
  MIN_TOKENS=128
11
- MAX_TOKENS=128000
12
- DEFAULT_TOKENS=4096
13
- DURATION=120
14
 
15
  # Install flash attention
16
  subprocess.run(
@@ -36,19 +36,6 @@ model.to(device)
36
  def handle_error(error):
37
  return {"error": str(error)}
38
 
39
- # Define a custom component for the token counter
40
- class TokenCounter(gr.components.Textbox):
41
- def __init__(self, **kwargs):
42
- super().__init__(**kwargs)
43
- self.token_count = 0
44
-
45
- def update(self, message):
46
- self.token_count = len(tokenizer.encode(message, truncation=True))
47
- self.update_text(f"Token count: {self.token_count}")
48
-
49
- # Create a token counter component
50
- token_counter = TokenCounter(label="Token Count", interactive=False)
51
-
52
  # Define chat function with input validation and error handling
53
  @spaces.GPU(duration=DURATION)
54
  def chat(message, history, temperature, do_sample, max_tokens):
@@ -69,9 +56,6 @@ def chat(message, history, temperature, do_sample, max_tokens):
69
  chat.append({"role": "assistant", "content": item[1]})
70
  chat.append({"role": "user", "content": message})
71
 
72
- # Update the token counter
73
- token_counter.update(message)
74
-
75
  # Generate response
76
  messages = tokenizer.apply_chat_template(chat, tokenize=False, add_generation_prompt=True)
77
  model_inputs = tokenizer([messages], return_tensors="pt").to(device)
@@ -121,7 +105,6 @@ demo = gr.ChatInterface(
121
  label="Max new tokens",
122
  render=False,
123
  ),
124
- token_counter, # Add the token counter component
125
  ],
126
  stop_btn="Stop Generation",
127
  title="Chat With LLMs",
@@ -129,4 +112,4 @@ demo = gr.ChatInterface(
129
  )
130
 
131
  # Launch Gradio app
132
- demo.launch()
 
8
  import subprocess
9
 
10
  MIN_TOKENS=128
11
+ MAX_TOKENS=8192
12
+ DEFAULT_TOKENS=2048
13
+ DURATION=60
14
 
15
  # Install flash attention
16
  subprocess.run(
 
36
  def handle_error(error):
37
  return {"error": str(error)}
38
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
  # Define chat function with input validation and error handling
40
  @spaces.GPU(duration=DURATION)
41
  def chat(message, history, temperature, do_sample, max_tokens):
 
56
  chat.append({"role": "assistant", "content": item[1]})
57
  chat.append({"role": "user", "content": message})
58
 
 
 
 
59
  # Generate response
60
  messages = tokenizer.apply_chat_template(chat, tokenize=False, add_generation_prompt=True)
61
  model_inputs = tokenizer([messages], return_tensors="pt").to(device)
 
105
  label="Max new tokens",
106
  render=False,
107
  ),
 
108
  ],
109
  stop_btn="Stop Generation",
110
  title="Chat With LLMs",
 
112
  )
113
 
114
  # Launch Gradio app
115
+ demo.launch()