Spaces:

sdung
/

phi-2

Sleeping

sdung commited on Apr 9, 2024

Commit

cb08a5b

verified ·

1 Parent(s): b4cb98a

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -9,14 +9,24 @@ from transformers import (
 )
 from threading import Thread
-# The huggingface model id for Microsoft's phi-2 model
-checkpoint = "microsoft/phi-2"
-# Download and load model and tokenizer
-tokenizer = AutoTokenizer.from_pretrained(checkpoint, trust_remote_code=True)
-model = AutoModelForCausalLM.from_pretrained(
-    checkpoint, torch_dtype=torch.float32, device_map="cpu", trust_remote_code=True
-)
 # Text generation pipeline
 phi2 = pipeline(
@@ -80,9 +90,9 @@ with gr.Blocks() as demo:
     gr.Markdown(
         """
   # Phi-2 Chatbot Demo
-  This chatbot was created using Microsoft's 2.7 billion parameter [phi-2](https://huggingface.co/microsoft/phi-2) Transformer model.
-  In order to reduce the response time on this hardware, `max_new_tokens` has been set to `128` in the text generation pipeline. With this default configuration, it takes approximately `60 seconds` for the response to start being generated, and streamed one word at a time. Use the slider below to increase or decrease the length of the generated text.
   """
     )

 )
 from threading import Thread
+## The huggingface model id for Microsoft's phi-2 model
+#checkpoint = "microsoft/phi-2"
+## Download and load model and tokenizer
+#tokenizer = AutoTokenizer.from_pretrained(checkpoint, trust_remote_code=True)
+#model = AutoModelForCausalLM.from_pretrained(
+#    checkpoint, torch_dtype=torch.float32, device_map="cpu", trust_remote_code=True
+#)
+model_name_or_path = "TheBloke/phi-2-GPTQ"
+# To use a different branch, change revision
+# For example: revision="gptq-4bit-32g-actorder_True"
+model = AutoModelForCausalLM.from_pretrained(model_name_or_path,
+                                             device_map="auto",
+                                             trust_remote_code=True,
+                                             revision="main")
+tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, use_fast=True)
 # Text generation pipeline
 phi2 = pipeline(
     gr.Markdown(
         """
   # Phi-2 Chatbot Demo
+  This chatbot was created using TheBloke/phi-2-GPTQ from Microsoft's 2.7 billion parameter [phi-2](https://huggingface.co/microsoft/phi-2) Transformer model.
+  In order to reduce the response time on this hardware, set `max_new_tokens` to lower number in the text generation pipeline.
   """
     )