Spaces:
Sleeping
Sleeping
File size: 4,476 Bytes
fb38431 c4f947a 0439661 c4f947a 5315eed c778ae5 996c7bc 0439661 4d07925 c778ae5 b461977 29a2d0c 0439661 b461977 c778ae5 0439661 0f36821 0439661 c4f947a 0439661 c4f947a 0439661 ec6b66c 0f36821 ec6b66c 0439661 0f36821 c4f947a 0439661 c4f947a 0439661 0f36821 0439661 c4f947a 5315eed b461977 0f36821 0439661 0f36821 0439661 b461977 839fca3 ff04433 839fca3 0f36821 839fca3 ff04433 839fca3 0f36821 839fca3 0f36821 839fca3 b461977 5315eed c3faf6b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 |
import torch
from transformers import (
AutoTokenizer,
AutoModelForCausalLM,
TextIteratorStreamer,
StoppingCriteria,
)
from threading import Thread
import gradio as gr
has_gpu = torch.cuda.is_available()
device = "cuda" if has_gpu else "cpu"
torch.set_default_device(device)
tokenizer = AutoTokenizer.from_pretrained("microsoft/phi-2", trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(
"microsoft/phi-2",
# torch_dtype=torch.float16 if has_gpu else torch.float32,
torch_dtype=torch.float32,
device_map=device,
trust_remote_code=True,
)
# custom stopping criteria (avoid generating hallucinated prompts)
# still includes these tokens in the output but stops generating after them
class Phi2StoppingCriteria(StoppingCriteria):
def __init__(self):
stop_list = ["Exercise", "Exercises", "exercises:", "<|endoftext|>"]
tokenphrases = []
for token in stop_list:
tokenphrases.append(
tokenizer(token, return_tensors="pt").input_ids[0].tolist()
)
self.tokenphrases = tokenphrases
def __call__(
self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs
) -> bool:
for tokenphrase in self.tokenphrases:
if tokenphrase == input_ids[0].tolist()[-len(tokenphrase):]:
return True
def generate(
prompt,
max_new_tokens=75,
terminate_hallucinated_prompts=True,
sampling=False,
temperature=1.0,
top_k=50,
top_p=1.0,
):
inputs = tokenizer(prompt, return_tensors="pt").to(device)
# thanks https://huggingface.co/spaces/joaogante/transformers_streaming/blob/main/app.py
streamer = TextIteratorStreamer(tokenizer)
generation_kwargs = dict(
inputs,
streamer=streamer,
max_new_tokens=max_new_tokens,
do_sample=sampling,
stopping_criteria=[Phi2StoppingCriteria()]
if terminate_hallucinated_prompts
else None,
temperature=temperature,
top_k=top_k,
top_p=top_p,
)
thread = Thread(target=model.generate, kwargs=generation_kwargs)
thread.start()
model_output = ""
for new_text in streamer:
model_output += new_text
yield model_output
return model_output
demo = gr.Interface(
fn=generate,
inputs=[
gr.Text(
label="prompt",
value="Write a detailed analogy between mathematics and a lighthouse.",
),
gr.Slider(minimum=0, maximum=500, step=1, value=50, label="max new tokens"),
gr.Checkbox(
value=True,
label="terminate hallucinated prompts",
info="stop generation after getting tokens like 'Exercise' or '<|endoftext|>, but will not remove them.",
),
gr.Checkbox(
label="do sampling",
info="introduce randomness for non-deterministic results. required for below options",
value=True,
),
gr.Slider(
label="temperature",
info="higher temperature means more randomness",
value=1.0,
minimum=0.1,
maximum=1.5,
step=0.1,
),
gr.Slider(
label="top-k",
info="consider only the k most likely tokens",
value=50,
minimum=1,
maximum=100,
step=1,
),
gr.Slider(
label="top-p",
info="choose from the smallest possible set of words whose cumulative probability exceeds the probability p",
value=1.0,
minimum=0.1,
maximum=1.0,
step=0.1,
),
],
outputs="text",
examples=[
[
"Write a detailed analogy between mathematics and a lighthouse.",
75,
],
[
"Instruct: Write a detailed analogy between mathematics and a lighthouse.\nOutput:",
100,
],
[
"Alice: I don't know why, I'm struggling to maintain focus while studying. Any suggestions?\n\nBob: ",
150,
],
[
'''```
def print_prime(n):
"""
Print all primes between 1 and n
"""\n''',
125,
],
],
title="Microsoft Phi-2",
description="Unofficial demo of Microsoft Phi-2, a high performing model with only 2.7B parameters.",
)
if __name__ == "__main__":
demo.queue().launch(show_api=False)
|