ffreemt commited on
Commit
26b527f
1 Parent(s): a8537f4
Files changed (1) hide show
  1. app.py +16 -4
app.py CHANGED
@@ -1,6 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
  # import torch
2
  import gradio as gr
3
- from transformers import AutoModelForCausalLM, AutoTokenizer, AutoModel
4
 
5
  # device = "cuda" if torch.cuda.is_available() else "cpu"
6
 
@@ -12,7 +24,7 @@ tokenizer = AutoTokenizer.from_pretrained(
12
  "THUDM/chatglm2-6b-int4", trust_remote_code=True
13
  )
14
  chat_model = AutoModel.from_pretrained(
15
- "THUDM/chatglm2-6b-int4", trust_remote_code=True
16
  ).float()
17
 
18
 
@@ -21,7 +33,7 @@ def chat(message, history):
21
  # inputs = tokenizer(prompt, return_tensors="pt").to(device=device)
22
  # output = model.generate(**inputs, do_sample=True, top_p=0.95, top_k=0, max_new_tokens=256)
23
  # return tokenizer.decode(output[0], skip_special_tokens=True)
24
- for response, history in chat_model.stream_chat(
25
  tokenizer, message, history, max_length=2048, top_p=0.7, temperature=0.95
26
  ):
27
  yield response
@@ -34,5 +46,5 @@ gr.ChatInterface(
34
  examples=[
35
  ["test me"],
36
  ],
37
- theme=gr.themes.Soft(),
38
  ).queue(max_size=2).launch()
 
1
+ """Try out gradio.Chatinterface.
2
+
3
+ colab gradio-chatinterface.
4
+
5
+ %%writefile reuirements.txt
6
+ gradio
7
+ transformers
8
+ sentencepiece
9
+ torch
10
+
11
+ """
12
+ # pylint: disable=line-too-long, missing-module-docstring, missing-function-docstring
13
  # import torch
14
  import gradio as gr
15
+ from transformers import AutoModel, AutoTokenizer # AutoModelForCausalLM,
16
 
17
  # device = "cuda" if torch.cuda.is_available() else "cpu"
18
 
 
24
  "THUDM/chatglm2-6b-int4", trust_remote_code=True
25
  )
26
  chat_model = AutoModel.from_pretrained(
27
+ "THUDM/chatglm2-6b-int4", trust_remote_code=True # 3.92G
28
  ).float()
29
 
30
 
 
33
  # inputs = tokenizer(prompt, return_tensors="pt").to(device=device)
34
  # output = model.generate(**inputs, do_sample=True, top_p=0.95, top_k=0, max_new_tokens=256)
35
  # return tokenizer.decode(output[0], skip_special_tokens=True)
36
+ for response, _ in chat_model.stream_chat(
37
  tokenizer, message, history, max_length=2048, top_p=0.7, temperature=0.95
38
  ):
39
  yield response
 
46
  examples=[
47
  ["test me"],
48
  ],
49
+ theme=gr.themes.Glass(text_size="sm", spacing_size="sm"),
50
  ).queue(max_size=2).launch()