Chris4K philschmid HF staff commited on
Commit
95bfc89
0 Parent(s):

Duplicate from philschmid/igel-playground

Browse files

Co-authored-by: Philipp Schmid <philschmid@users.noreply.huggingface.co>

Files changed (4) hide show
  1. .gitattributes +34 -0
  2. README.md +13 -0
  3. app.py +206 -0
  4. requirements.txt +6 -0
.gitattributes ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tflite filter=lfs diff=lfs merge=lfs -text
29
+ *.tgz filter=lfs diff=lfs merge=lfs -text
30
+ *.wasm filter=lfs diff=lfs merge=lfs -text
31
+ *.xz filter=lfs diff=lfs merge=lfs -text
32
+ *.zip filter=lfs diff=lfs merge=lfs -text
33
+ *.zst filter=lfs diff=lfs merge=lfs -text
34
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Igel Playground
3
+ emoji: 🧠
4
+ colorFrom: red
5
+ colorTo: yellow
6
+ sdk: gradio
7
+ sdk_version: 3.23.0
8
+ app_file: app.py
9
+ pinned: false
10
+ duplicated_from: philschmid/igel-playground
11
+ ---
12
+
13
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,206 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import gradio as gr
3
+ from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig, TextIteratorStreamer
4
+ import torch
5
+ from threading import Thread
6
+ from huggingface_hub import Repository
7
+ import json
8
+
9
+ theme = gr.themes.Monochrome(
10
+ primary_hue="indigo",
11
+ secondary_hue="blue",
12
+ neutral_hue="slate",
13
+ radius_size=gr.themes.sizes.radius_sm,
14
+ font=[gr.themes.GoogleFont("Open Sans"), "ui-sans-serif", "system-ui", "sans-serif"],
15
+ )
16
+ # filesystem to save input and outputs
17
+ HF_TOKEN = os.environ.get("HF_TOKEN", None)
18
+ os.environ["TOKENIZERS_PARALLELISM"] = "false"
19
+ if HF_TOKEN:
20
+ repo = Repository(
21
+ local_dir="data", clone_from="philschmid/playground-prompts", use_auth_token=HF_TOKEN, repo_type="dataset"
22
+ )
23
+
24
+
25
+ # Load peft config for pre-trained checkpoint etc.
26
+ device = "cuda" if torch.cuda.is_available() else "cpu"
27
+ model_id = "philschmid/instruct-igel-001"
28
+ if device == "cpu":
29
+ model = AutoModelForCausalLM.from_pretrained(model_id, low_cpu_mem_usage=True)
30
+ else:
31
+ # torch_dtype = torch.bfloat16 if torch.cuda.get_device_capability()[0] == 8 else torch.float16
32
+ # model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch_dtype, device_map="auto")
33
+ model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", load_in_8bit=True)
34
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
35
+
36
+ prompt_template = f"### Anweisung:\n{{input}}\n\n### Antwort:"
37
+
38
+
39
+ def generate(instruction, temperature=1.0, max_new_tokens=256, top_p=0.9, length_penalty=1.0):
40
+ formatted_instruction = prompt_template.format(input=instruction)
41
+
42
+ # make sure temperature top_p and length_penalty are floats
43
+ temperature = float(temperature)
44
+ top_p = float(top_p)
45
+ length_penalty = float(length_penalty)
46
+
47
+ # COMMENT IN FOR NON STREAMING
48
+ # generation_config = GenerationConfig(
49
+ # do_sample=True,
50
+ # top_p=top_p,
51
+ # top_k=0,
52
+ # temperature=temperature,
53
+ # max_new_tokens=max_new_tokens,
54
+ # early_stopping=True,
55
+ # length_penalty=length_penalty,
56
+ # eos_token_id=tokenizer.eos_token_id,
57
+ # pad_token_id=tokenizer.pad_token_id,
58
+ # )
59
+
60
+ # input_ids = tokenizer(
61
+ # formatted_instruction, return_tensors="pt", truncation=True, max_length=2048
62
+ # ).input_ids.cuda()
63
+
64
+ # with torch.inference_mode(), torch.autocast("cuda"):
65
+ # outputs = model.generate(input_ids=input_ids, generation_config=generation_config)[0]
66
+
67
+ # output = tokenizer.decode(outputs.detach().cpu().numpy(), skip_special_tokens=True)
68
+ # return output.split("### Antwort:\n")[1]
69
+
70
+ # STREAMING BASED ON git+https://github.com/gante/transformers.git@streamer_iterator
71
+
72
+ # streaming
73
+ streamer = TextIteratorStreamer(tokenizer)
74
+ model_inputs = tokenizer(formatted_instruction, return_tensors="pt", truncation=True, max_length=2048)
75
+ # move to gpu
76
+ model_inputs = {k: v.to(device) for k, v in model_inputs.items()}
77
+
78
+ generate_kwargs = dict(
79
+ top_p=top_p,
80
+ top_k=0,
81
+ temperature=temperature,
82
+ do_sample=True,
83
+ max_new_tokens=max_new_tokens,
84
+ early_stopping=True,
85
+ length_penalty=length_penalty,
86
+ eos_token_id=tokenizer.eos_token_id,
87
+ pad_token_id=tokenizer.eos_token_id,
88
+ )
89
+ t = Thread(target=model.generate, kwargs={**dict(model_inputs, streamer=streamer), **generate_kwargs})
90
+ t.start()
91
+
92
+ output = ""
93
+ hidden_output = ""
94
+ for new_text in streamer:
95
+ # skip streaming until new text is available
96
+ if len(hidden_output) <= len(formatted_instruction):
97
+ hidden_output += new_text
98
+ continue
99
+ # replace eos token
100
+ if tokenizer.eos_token in new_text:
101
+ new_text = new_text.replace(tokenizer.eos_token, "")
102
+ output += new_text
103
+ yield output
104
+ if HF_TOKEN:
105
+ save_inputs_and_outputs(formatted_instruction, output, generate_kwargs)
106
+ return output
107
+
108
+
109
+ def save_inputs_and_outputs(inputs, outputs, generate_kwargs):
110
+ with open(os.path.join("data", "prompts.jsonl"), "a") as f:
111
+ json.dump({"inputs": inputs, "outputs": outputs, "generate_kwargs": generate_kwargs}, f, ensure_ascii=False)
112
+ f.write("\n")
113
+ commit_url = repo.push_to_hub()
114
+
115
+
116
+ examples = [
117
+ """Beantworten Sie die Frage am Ende des Textes anhand der folgenden Zusammenhänge. Wenn Sie die Antwort nicht wissen, sagen Sie, dass Sie es nicht wissen, versuchen Sie nicht, eine Antwort zu erfinden.
118
+ "Das Unternehmen wurde 2016 von den französischen Unternehmern Clément Delangue, Julien Chaumond und Thomas Wolf gegründet und entwickelte ursprünglich eine Chatbot-App, die sich an Teenager richtete.[2] Nachdem das Modell hinter dem Chatbot offengelegt wurde, konzentrierte sich das Unternehmen auf eine Plattform für maschinelles Lernen.
119
+ Im März 2021 sammelte Hugging Face in einer Serie-B-Finanzierungsrunde 40 Millionen US-Dollar ein[3].
120
+ Am 28. April 2021 rief das Unternehmen in Zusammenarbeit mit mehreren anderen Forschungsgruppen den BigScience Research Workshop ins Leben, um ein offenes großes Sprachmodell zu veröffentlichen.[4] Im Jahr 2022 wurde der Workshop mit der Ankündigung von BLOOM abgeschlossen, einem mehrsprachigen großen Sprachmodell mit 176 Milliarden Parametern.[5]"
121
+ Frage: Wann wurde Hugging Face gegründet?""",
122
+ "Erklären Sie, was eine API ist.",
123
+ "Bitte beantworten Sie die folgende Frage. Wer wird der nächste Ballon d'or sein?",
124
+ "Beantworten Sie die folgende Ja/Nein-Frage, indem Sie Schritt für Schritt argumentieren. Kannst du ein ganzes Haiku in einem einzigen Tweet schreiben?",
125
+ "Schreibe eine Produktbeschreibung für einen LG 43UQ75009LF 109 cm (43 Zoll) UHD Fernseher (Active HDR, 60 Hz, Smart TV) [Modelljahr 2022]",
126
+ ]
127
+
128
+ def process_example(args):
129
+ for x in generate(args):
130
+ pass
131
+ return x
132
+
133
+ with gr.Blocks(theme=theme) as demo:
134
+ with gr.Column():
135
+ gr.Markdown(
136
+ """<h1><center>IGEL - Instruction-tuned German large Language Model for Text</center></h1>
137
+ <p>
138
+ IGEL is a LLM model family developed for the German language. The first version of IGEL is built on top <a href="https://bigscience.huggingface.co/blog/bloom" target="_blank">BigScience BLOOM</a> adapted to the <a href="https://huggingface.co/malteos/bloom-6b4-clp-german">German language by Malte Ostendorff</a>. IGEL designed to provide accurate and reliable language understanding capabilities for a wide range of natural language understanding tasks, including sentiment analysis, language translation, and question answering.
139
+
140
+ The IGEL family includes instruction [instruct-igel-001](https://huggingface.co/philschmid/instruct-igel-001) and `chat-igel-001` _coming soon_.
141
+ </p>
142
+ """
143
+ )
144
+ with gr.Row():
145
+ with gr.Column(scale=3):
146
+ instruction = gr.Textbox(placeholder="Hier Anweisung eingeben...", label="Anweisung")
147
+ output = gr.Textbox(
148
+ interactive=False,
149
+ lines=8,
150
+ label="Antwort",
151
+ placeholder="Hier Antwort erscheint...",
152
+ )
153
+ submit = gr.Button("Generate", variant="primary")
154
+ gr.Examples(
155
+ examples=examples,
156
+ inputs=[instruction],
157
+ cache_examples=True,
158
+ fn=process_example,
159
+ outputs=[output],
160
+ )
161
+
162
+ with gr.Column(scale=1):
163
+ temperature = gr.Slider(
164
+ label="Temperature",
165
+ value=1.0,
166
+ minimum=0.01,
167
+ maximum=1.0,
168
+ step=0.1,
169
+ interactive=True,
170
+ info="The higher more random",
171
+ )
172
+ max_new_tokens = gr.Slider(
173
+ label="Max new tokens",
174
+ value=256,
175
+ minimum=0,
176
+ maximum=2048,
177
+ step=5,
178
+ interactive=True,
179
+ info="The maximum numbers of new tokens",
180
+ )
181
+ top_p = gr.Slider(
182
+ label="Top p",
183
+ value=0.9,
184
+ minimum=0.01,
185
+ maximum=1,
186
+ step=0.05,
187
+ interactive=True,
188
+ info="probabilities that add up are kept",
189
+ )
190
+ length_penalty = gr.Slider(
191
+ label="Length penalty",
192
+ value=1.0,
193
+ minimum=-10.0,
194
+ maximum=10.0,
195
+ step=0.1,
196
+ interactive=True,
197
+ info="> 0.0 longer, < 0.0 shorter",
198
+ )
199
+
200
+ submit.click(generate, inputs=[instruction, temperature, max_new_tokens, top_p, length_penalty], outputs=[output])
201
+ instruction.submit(
202
+ generate, inputs=[instruction, temperature, max_new_tokens, top_p, length_penalty], outputs=[output]
203
+ )
204
+
205
+ demo.queue(concurrency_count=1)
206
+ demo.launch(enable_queue=True)
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ git+https://github.com/huggingface/peft.git
2
+ git+https://github.com/huggingface/transformers.git
3
+ huggingface_hub
4
+ accelerate
5
+ bitsandbytes
6
+ scipy