File size: 3,343 Bytes
a63bb15
 
ea60921
a63bb15
 
 
 
 
 
 
 
 
 
 
 
 
ea60921
 
 
b78812a
 
 
 
 
 
 
 
a63bb15
 
399784d
a63bb15
 
 
 
 
 
 
 
 
399784d
a63bb15
 
 
91d32ae
a63bb15
 
 
91d32ae
 
 
a63bb15
bb9126f
a63bb15
 
 
91d32ae
a63bb15
 
 
 
bb9126f
a63bb15
 
 
 
fd09543
 
a63bb15
 
 
 
 
 
 
 
 
bde9acd
a63bb15
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
import spaces
import torch
import os
from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer
import gradio as gr
from threading import Thread
device = "cpu"
if torch.cuda.is_available():
    device = "cuda"
if torch.backends.mps.is_available():
    device = "mps"

theme = gr.themes.Base(
    font=[gr.themes.GoogleFont('Libre Franklin'), gr.themes.GoogleFont('Public Sans'), 'system-ui', 'sans-serif'],
)

from huggingface_hub import login
login(token=os.getenv('ACCESS_TOKEN'))

tokenizer = AutoTokenizer.from_pretrained("Writer/Palmyra-Fin-70B-32K")
model = AutoModelForCausalLM.from_pretrained("Writer/Palmyra-Fin-70B-32K")
#tokenizer = AutoTokenizer.from_pretrained("yam-peleg/Experiment26-7B", trust_remote_code=True)
#model = AutoModelForCausalLM.from_pretrained(
#    "yam-peleg/Experiment26-7B",
#    torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
#    trust_remote_code=True,
#).to(device)
@spaces.GPU(enable_queue=True)
def generate_text(text, temperature, maxLen):
    text = text.lstrip().lstrip('<s>').lstrip()
    inputs = tokenizer([text], return_tensors="pt").to(device)
    streamer = TextIteratorStreamer(tokenizer)
    generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=maxLen, temperature=temperature)
    thread = Thread(target=model.generate, kwargs=generation_kwargs)
    thread.start()
    t = ""
    toks = 0
    for out in streamer:
        t += out
        t = t.lstrip().lstrip('<s>').lstrip()
        yield t
with gr.Blocks(theme=theme) as demo:
    gr.Markdown("""
# (Unofficial) Demo of Experiment26-7B

The model is suitable for commercial use and is licensed under the Apache license. I am not responsible for any outputs you generate. You are solely responsible for ensuring that your usage of the model complies with applicable laws and regulations.

I am not affiliated with the authors of the model.

**This model is not an instruct model but a base model.**

Note: for longer generations (>1024), keep clicking "Generate!" The demo is currently limited to 1024 demos per generation to ensure all users have access to this service. Please note that once you start generating, you cannot stop generating until the generation is done.

By [mrfakename](https://twitter.com/realmrfakename).

## [Model card & download](https://huggingface.co/yam-peleg/Experiment26-7B)
""".strip())
    gr.DuplicateButton()
    text = gr.Textbox(label="Prompt", lines=10, interactive=True, placeholder="Write a detailed analogy between mathematics and a lighthouse.")
    temp = gr.Slider(label="Temperature", minimum=0.1, maximum=1.5, value=0.7)
    maxlen = gr.Slider(label="Max Length", minimum=4, maximum=1024, value=75)
    go = gr.Button("Generate", variant="primary")
    go.click(generate_text, inputs=[text, temp, maxlen], outputs=[text], concurrency_limit=2)
    examples = gr.Examples(
        [
            ['Question: Write a detailed analogy between mathematics and a lighthouse.\nAnswer: ', 0.7, 75],
            ['Question: Generate a story involving a dog, an astronaut and a baker\nAnswer: ', 0.7, 1024],
            ['''def print_prime(n):
   """
   Print all primes between 1 and n
   """\n''', 0.2, 100],
        ],
        [text, temp, maxlen]
    )

if __name__ == "__main__":
    demo.queue(api_open=False).launch(show_api=False)