sdung commited on
Commit
cb08a5b
·
verified ·
1 Parent(s): b4cb98a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -9
app.py CHANGED
@@ -9,14 +9,24 @@ from transformers import (
9
  )
10
  from threading import Thread
11
 
12
- # The huggingface model id for Microsoft's phi-2 model
13
- checkpoint = "microsoft/phi-2"
14
 
15
- # Download and load model and tokenizer
16
- tokenizer = AutoTokenizer.from_pretrained(checkpoint, trust_remote_code=True)
17
- model = AutoModelForCausalLM.from_pretrained(
18
- checkpoint, torch_dtype=torch.float32, device_map="cpu", trust_remote_code=True
19
- )
 
 
 
 
 
 
 
 
 
 
20
 
21
  # Text generation pipeline
22
  phi2 = pipeline(
@@ -80,9 +90,9 @@ with gr.Blocks() as demo:
80
  gr.Markdown(
81
  """
82
  # Phi-2 Chatbot Demo
83
- This chatbot was created using Microsoft's 2.7 billion parameter [phi-2](https://huggingface.co/microsoft/phi-2) Transformer model.
84
 
85
- In order to reduce the response time on this hardware, `max_new_tokens` has been set to `128` in the text generation pipeline. With this default configuration, it takes approximately `60 seconds` for the response to start being generated, and streamed one word at a time. Use the slider below to increase or decrease the length of the generated text.
86
  """
87
  )
88
 
 
9
  )
10
  from threading import Thread
11
 
12
+ ## The huggingface model id for Microsoft's phi-2 model
13
+ #checkpoint = "microsoft/phi-2"
14
 
15
+ ## Download and load model and tokenizer
16
+ #tokenizer = AutoTokenizer.from_pretrained(checkpoint, trust_remote_code=True)
17
+ #model = AutoModelForCausalLM.from_pretrained(
18
+ # checkpoint, torch_dtype=torch.float32, device_map="cpu", trust_remote_code=True
19
+ #)
20
+
21
+ model_name_or_path = "TheBloke/phi-2-GPTQ"
22
+ # To use a different branch, change revision
23
+ # For example: revision="gptq-4bit-32g-actorder_True"
24
+ model = AutoModelForCausalLM.from_pretrained(model_name_or_path,
25
+ device_map="auto",
26
+ trust_remote_code=True,
27
+ revision="main")
28
+
29
+ tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, use_fast=True)
30
 
31
  # Text generation pipeline
32
  phi2 = pipeline(
 
90
  gr.Markdown(
91
  """
92
  # Phi-2 Chatbot Demo
93
+ This chatbot was created using TheBloke/phi-2-GPTQ from Microsoft's 2.7 billion parameter [phi-2](https://huggingface.co/microsoft/phi-2) Transformer model.
94
 
95
+ In order to reduce the response time on this hardware, set `max_new_tokens` to lower number in the text generation pipeline.
96
  """
97
  )
98