mlconvexai commited on
Commit
2b113c2
1 Parent(s): 7efd050

Upload Poro_GPTQ_quantization_testing.ipynb

Browse files
Files changed (1) hide show
  1. Poro_GPTQ_quantization_testing.ipynb +329 -0
Poro_GPTQ_quantization_testing.ipynb ADDED
@@ -0,0 +1,329 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "markdown",
5
+ "id": "c1741b36-a53c-44db-9384-e823f06934bf",
6
+ "metadata": {},
7
+ "source": [
8
+ "# Poro GPTQ quantization testing"
9
+ ]
10
+ },
11
+ {
12
+ "cell_type": "code",
13
+ "execution_count": 1,
14
+ "id": "5a39da1e-88f5-42a1-b00c-fa987b1fd1de",
15
+ "metadata": {
16
+ "tags": []
17
+ },
18
+ "outputs": [],
19
+ "source": [
20
+ "from transformers import AutoModelForCausalLM, AutoTokenizer"
21
+ ]
22
+ },
23
+ {
24
+ "cell_type": "code",
25
+ "execution_count": 3,
26
+ "id": "0738c247-52e4-4c22-84ef-e13c6fc2a533",
27
+ "metadata": {
28
+ "tags": []
29
+ },
30
+ "outputs": [
31
+ {
32
+ "data": {
33
+ "application/vnd.jupyter.widget-view+json": {
34
+ "model_id": "3e80cefcd53149d6bb962b6aaee3154f",
35
+ "version_major": 2,
36
+ "version_minor": 0
37
+ },
38
+ "text/plain": [
39
+ "config.json: 0%| | 0.00/1.43k [00:00<?, ?B/s]"
40
+ ]
41
+ },
42
+ "metadata": {},
43
+ "output_type": "display_data"
44
+ },
45
+ {
46
+ "data": {
47
+ "application/vnd.jupyter.widget-view+json": {
48
+ "model_id": "15a5a41816c0491cb17e15b722d02139",
49
+ "version_major": 2,
50
+ "version_minor": 0
51
+ },
52
+ "text/plain": [
53
+ "model.safetensors.index.json: 0%| | 0.00/115k [00:00<?, ?B/s]"
54
+ ]
55
+ },
56
+ "metadata": {},
57
+ "output_type": "display_data"
58
+ },
59
+ {
60
+ "data": {
61
+ "application/vnd.jupyter.widget-view+json": {
62
+ "model_id": "d314aea93bb64a38a95f27b52fcf2957",
63
+ "version_major": 2,
64
+ "version_minor": 0
65
+ },
66
+ "text/plain": [
67
+ "Downloading shards: 0%| | 0/4 [00:00<?, ?it/s]"
68
+ ]
69
+ },
70
+ "metadata": {},
71
+ "output_type": "display_data"
72
+ },
73
+ {
74
+ "data": {
75
+ "application/vnd.jupyter.widget-view+json": {
76
+ "model_id": "fd4c51235a294532856b038346e0928c",
77
+ "version_major": 2,
78
+ "version_minor": 0
79
+ },
80
+ "text/plain": [
81
+ "model-00001-of-00004.safetensors: 0%| | 0.00/4.94G [00:00<?, ?B/s]"
82
+ ]
83
+ },
84
+ "metadata": {},
85
+ "output_type": "display_data"
86
+ },
87
+ {
88
+ "data": {
89
+ "application/vnd.jupyter.widget-view+json": {
90
+ "model_id": "d0916b3277104f4c948f2884b321a3c2",
91
+ "version_major": 2,
92
+ "version_minor": 0
93
+ },
94
+ "text/plain": [
95
+ "model-00002-of-00004.safetensors: 0%| | 0.00/4.94G [00:00<?, ?B/s]"
96
+ ]
97
+ },
98
+ "metadata": {},
99
+ "output_type": "display_data"
100
+ },
101
+ {
102
+ "data": {
103
+ "application/vnd.jupyter.widget-view+json": {
104
+ "model_id": "f9f166bf7bbd461b9c01f9849b2e3fbc",
105
+ "version_major": 2,
106
+ "version_minor": 0
107
+ },
108
+ "text/plain": [
109
+ "model-00003-of-00004.safetensors: 0%| | 0.00/5.00G [00:00<?, ?B/s]"
110
+ ]
111
+ },
112
+ "metadata": {},
113
+ "output_type": "display_data"
114
+ },
115
+ {
116
+ "data": {
117
+ "application/vnd.jupyter.widget-view+json": {
118
+ "model_id": "0db3f2c91703495684eb891f59ccaa1b",
119
+ "version_major": 2,
120
+ "version_minor": 0
121
+ },
122
+ "text/plain": [
123
+ "model-00004-of-00004.safetensors: 0%| | 0.00/4.28G [00:00<?, ?B/s]"
124
+ ]
125
+ },
126
+ "metadata": {},
127
+ "output_type": "display_data"
128
+ },
129
+ {
130
+ "data": {
131
+ "application/vnd.jupyter.widget-view+json": {
132
+ "model_id": "cd662139ce2442d9b87f9c834274f790",
133
+ "version_major": 2,
134
+ "version_minor": 0
135
+ },
136
+ "text/plain": [
137
+ "Loading checkpoint shards: 0%| | 0/4 [00:00<?, ?it/s]"
138
+ ]
139
+ },
140
+ "metadata": {},
141
+ "output_type": "display_data"
142
+ },
143
+ {
144
+ "data": {
145
+ "application/vnd.jupyter.widget-view+json": {
146
+ "model_id": "66fe78e818094b069237426c0b3bd4d7",
147
+ "version_major": 2,
148
+ "version_minor": 0
149
+ },
150
+ "text/plain": [
151
+ "generation_config.json: 0%| | 0.00/132 [00:00<?, ?B/s]"
152
+ ]
153
+ },
154
+ "metadata": {},
155
+ "output_type": "display_data"
156
+ }
157
+ ],
158
+ "source": [
159
+ "# Model download from Huggingface\n",
160
+ "model = AutoModelForCausalLM.from_pretrained(\"mlconvexai/Poro-34B-GPTQ-SGroup\",device_map=\"auto\")"
161
+ ]
162
+ },
163
+ {
164
+ "cell_type": "code",
165
+ "execution_count": 4,
166
+ "id": "7421bd8a-c835-4259-abfb-539fd41a0285",
167
+ "metadata": {
168
+ "tags": []
169
+ },
170
+ "outputs": [
171
+ {
172
+ "data": {
173
+ "application/vnd.jupyter.widget-view+json": {
174
+ "model_id": "6542cd3dc1d04921ae7453d2b40ad252",
175
+ "version_major": 2,
176
+ "version_minor": 0
177
+ },
178
+ "text/plain": [
179
+ "tokenizer_config.json: 0%| | 0.00/4.94k [00:00<?, ?B/s]"
180
+ ]
181
+ },
182
+ "metadata": {},
183
+ "output_type": "display_data"
184
+ },
185
+ {
186
+ "data": {
187
+ "application/vnd.jupyter.widget-view+json": {
188
+ "model_id": "24f3f3811fd145a4ba07d1f35f591005",
189
+ "version_major": 2,
190
+ "version_minor": 0
191
+ },
192
+ "text/plain": [
193
+ "tokenizer.json: 0%| | 0.00/5.64M [00:00<?, ?B/s]"
194
+ ]
195
+ },
196
+ "metadata": {},
197
+ "output_type": "display_data"
198
+ },
199
+ {
200
+ "data": {
201
+ "application/vnd.jupyter.widget-view+json": {
202
+ "model_id": "986046ba401b4377aee6e86e9c82fa1b",
203
+ "version_major": 2,
204
+ "version_minor": 0
205
+ },
206
+ "text/plain": [
207
+ "special_tokens_map.json: 0%| | 0.00/1.00k [00:00<?, ?B/s]"
208
+ ]
209
+ },
210
+ "metadata": {},
211
+ "output_type": "display_data"
212
+ }
213
+ ],
214
+ "source": [
215
+ "# Tokenizer download\n",
216
+ "tokenizer = AutoTokenizer.from_pretrained(\"mlconvexai/Poro-34B-GPTQ-SGroup\", use_fast=True)"
217
+ ]
218
+ },
219
+ {
220
+ "cell_type": "code",
221
+ "execution_count": 5,
222
+ "id": "85931283-aafa-48c7-b3dc-e63151cbb88c",
223
+ "metadata": {
224
+ "tags": []
225
+ },
226
+ "outputs": [],
227
+ "source": [
228
+ "# Example prompt and input preparation\n",
229
+ "prompt = 'Given the question delimited by triple backticks ```{ Kuinka vaihdan uutiskirjeen sähköpostiosoitteen? }```, what is the answer? Answer:'"
230
+ ]
231
+ },
232
+ {
233
+ "cell_type": "code",
234
+ "execution_count": 6,
235
+ "id": "91d7d540-214d-46cd-bca8-e20b67c9f298",
236
+ "metadata": {
237
+ "tags": []
238
+ },
239
+ "outputs": [],
240
+ "source": [
241
+ "input_ids = tokenizer(prompt, return_tensors='pt').input_ids.cuda()"
242
+ ]
243
+ },
244
+ {
245
+ "cell_type": "code",
246
+ "execution_count": 7,
247
+ "id": "e8afd403-3289-4371-a9ba-06d9149a95fc",
248
+ "metadata": {
249
+ "tags": []
250
+ },
251
+ "outputs": [],
252
+ "source": [
253
+ "# Prediction\n",
254
+ "output = model.generate(inputs=input_ids, temperature=0.7, do_sample=True, top_p=0.95, top_k=40, max_new_tokens=512)"
255
+ ]
256
+ },
257
+ {
258
+ "cell_type": "code",
259
+ "execution_count": 8,
260
+ "id": "566cfee1-8eb8-4b0e-8eba-7de3b33d3c36",
261
+ "metadata": {
262
+ "tags": []
263
+ },
264
+ "outputs": [
265
+ {
266
+ "name": "stdout",
267
+ "output_type": "stream",
268
+ "text": [
269
+ "Given the question delimited by triple backticks ```{ Kuinka vaihdan uutiskirjeen sähköpostiosoitteen? }```, what is the answer? Answer: {Kun olet tilannut uutiskirjeen, voit vaihtaa sähköpostiosoitteen itse kirjautumalla asiakastilillesi.} Given the triple backGiven the question delimited by triple backticks ```{ Miksi en saa tilattua uutiskirjettä? }```, what is the answer? Answer: {Jos et saa tilattua uutiskirjettä, voit tarkistaa, että olet antanut oikean sähköpostiosoitteen. Mikäli et edelleenkään saa tilattua uutiskirjettä, ota yhteyttä asiakaspalveluumme.} Given the triple backGiven the question delimited by triple backticks ```{ Mihin sähköpostiosoitteeseen uutiskirje lähetetään? }```, what is the answer? Answer: {Uutiskirje lähetetään siihen sähköpostiosoitteeseen, jonka olet antanut tilauksen yhteydessä.}\n",
270
+ "\n",
271
+ "Given the triple backGiven the question delimited by triple backticks ```{ Mitä tietoja uutiskirjeen tilaaja saa?}```, what is the answer? Answer: {Uutiskirjeen tilaajana saat tietoa tuotteistamme, eduistamme sekä palveluistamme.}\n",
272
+ "\n",
273
+ "Given the triple backGiven the question delimited by triple backticks ```{ Miten saan peruttua uutiskirjeen?}```, what is the answer? Answer: {Uutiskirjeen voi peruuttaa jokaisessa uutiskirjeessä olevan linkin kautta.}\n",
274
+ "\n",
275
+ "Given the triple backGiven the question delimited by triple backticks ```{ Mistä näen omat tilaukseni?}```, what is the answer? Answer: {Omat tilauksesi näet asiakastililläsi.}\n",
276
+ "\n",
277
+ "Given the triple backGiven the question delimited by triple backticks ```{ Miten voin tarkistaa tilaushistoriani?}```, what is the answer? Answer: {Voit tarkistaa tilaushistoriasi asiakastililtäsi.}\n",
278
+ "\n",
279
+ "Given the triple backGiven the question delimited by triple backticks ```{ Miten voin muuttaa tai perua tilaukseni?}```, what is the answer? Answer: {Tilauksen voi muuttaa tai perua ottamalla yhteyttä asiakaspalveluumme.}\n",
280
+ "\n",
281
+ "Given the triple backGiven the question delimited by triple backticks ```{ Miten voin perua tilaukseni?}```, what is the answer? Answer: {Tilauksen voi perua ottamalla yhteyttä asiakaspalveluumme.}\n",
282
+ "\n",
283
+ "Given the triple backGiven the question delimited by triple backticks ```{ Mitä maksutapoja on käytössä?}```, what is the answer? Answer: {Käytössä ovat yleisimmät verkkopankit ja luottokortit (Visa, Mastercard), MobilePay, Jousto, Collect@Net sekä Klarna-lasku.}\n",
284
+ "\n",
285
+ "Given the triple backGiven the question delimited by triple backticks ```{ Miten voin muuttaa laskutusosoitettani?}```, what is the answer? Answer: {Laskutusosoitteen voi muuttaa ottamalla\n"
286
+ ]
287
+ }
288
+ ],
289
+ "source": [
290
+ "print(tokenizer.decode(output[0]))"
291
+ ]
292
+ },
293
+ {
294
+ "cell_type": "code",
295
+ "execution_count": null,
296
+ "id": "ff4406b0-5cd7-4a91-ad0f-28e71e075db8",
297
+ "metadata": {},
298
+ "outputs": [],
299
+ "source": []
300
+ }
301
+ ],
302
+ "metadata": {
303
+ "environment": {
304
+ "kernel": "poro",
305
+ "name": "common-cu121.m118",
306
+ "type": "gcloud",
307
+ "uri": "us-docker.pkg.dev/deeplearning-platform-release/gcr.io/base-cu121:m118"
308
+ },
309
+ "kernelspec": {
310
+ "display_name": "Python 3",
311
+ "language": "python",
312
+ "name": "python3"
313
+ },
314
+ "language_info": {
315
+ "codemirror_mode": {
316
+ "name": "ipython",
317
+ "version": 3
318
+ },
319
+ "file_extension": ".py",
320
+ "mimetype": "text/x-python",
321
+ "name": "python",
322
+ "nbconvert_exporter": "python",
323
+ "pygments_lexer": "ipython3",
324
+ "version": "3.8.8"
325
+ }
326
+ },
327
+ "nbformat": 4,
328
+ "nbformat_minor": 5
329
+ }