ybelkada commited on
Commit
70dbbe5
1 Parent(s): fe18ba8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -2
app.py CHANGED
@@ -4,7 +4,7 @@ from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStream
4
  import gradio as gr
5
  from threading import Thread
6
 
7
- MODEL = "tiiuae/falcon3-7b-1.58bit"
8
 
9
  TITLE = "<h1><center>Falcon3-1.58 bit playground</center></h1>"
10
  SUB_TITLE = """<center>This interface has been created for quick validation purposes, do not use it for production. Bear also in mind the model is a pretrained model.</center>"""
@@ -46,7 +46,20 @@ def stream_chat(
46
  penalty: float = 1.2,
47
  ):
48
  print(f'message: {message}')
49
- inputs = tokenizer.encode(message, return_tensors="pt").to(device)
 
 
 
 
 
 
 
 
 
 
 
 
 
50
  streamer = TextIteratorStreamer(tokenizer, timeout=60.0, skip_prompt=True, skip_special_tokens=True)
51
 
52
  generate_kwargs = dict(
 
4
  import gradio as gr
5
  from threading import Thread
6
 
7
+ MODEL = "tiiuae/falcon3-7b-instruct-1.58bit"
8
 
9
  TITLE = "<h1><center>Falcon3-1.58 bit playground</center></h1>"
10
  SUB_TITLE = """<center>This interface has been created for quick validation purposes, do not use it for production. Bear also in mind the model is a pretrained model.</center>"""
 
46
  penalty: float = 1.2,
47
  ):
48
  print(f'message: {message}')
49
+ print(f'history: {history}')
50
+
51
+ conversation = []
52
+ for prompt, answer in history:
53
+ conversation.extend([
54
+ {"role": "user", "content": prompt},
55
+ {"role": "assistant", "content": answer},
56
+ ])
57
+
58
+
59
+ conversation.append({"role": "user", "content": message})
60
+ input_text = tokenizer.apply_chat_template(conversation, tokenize=False, add_generation_prompt = True)
61
+
62
+ inputs = tokenizer.encode(input_text, return_tensors="pt").to(device)
63
  streamer = TextIteratorStreamer(tokenizer, timeout=60.0, skip_prompt=True, skip_special_tokens=True)
64
 
65
  generate_kwargs = dict(