mlconvexai commited on
Commit
7efd050
1 Parent(s): 0530606

Upload Poro_34B_GPTQ_quantization.ipynb

Browse files
Files changed (1) hide show
  1. Poro_34B_GPTQ_quantization.ipynb +736 -0
Poro_34B_GPTQ_quantization.ipynb ADDED
@@ -0,0 +1,736 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "markdown",
5
+ "id": "c9399417-92ea-4474-a6cb-ce1ecf14f8ea",
6
+ "metadata": {},
7
+ "source": [
8
+ "# Poro 34B GPTQ quantization"
9
+ ]
10
+ },
11
+ {
12
+ "cell_type": "markdown",
13
+ "id": "8bea76a0-0cce-461e-b167-2f1b6207395e",
14
+ "metadata": {},
15
+ "source": [
16
+ "## Step 1: Import transformers libraries and check the CUDA availability"
17
+ ]
18
+ },
19
+ {
20
+ "cell_type": "code",
21
+ "execution_count": 1,
22
+ "id": "1ca2fc08-52ed-4ca3-b849-fbcc72df11f6",
23
+ "metadata": {},
24
+ "outputs": [],
25
+ "source": [
26
+ "from transformers import AutoModelForCausalLM, AutoTokenizer, GPTQConfig"
27
+ ]
28
+ },
29
+ {
30
+ "cell_type": "code",
31
+ "execution_count": 2,
32
+ "id": "97e1ee06-325a-4ca5-8426-39ee43fd02f1",
33
+ "metadata": {},
34
+ "outputs": [],
35
+ "source": [
36
+ "import torch"
37
+ ]
38
+ },
39
+ {
40
+ "cell_type": "code",
41
+ "execution_count": 4,
42
+ "id": "17be0537-e39a-4ad7-b29b-6f4f7d72ead7",
43
+ "metadata": {},
44
+ "outputs": [
45
+ {
46
+ "data": {
47
+ "text/plain": [
48
+ "'2.2.1+cu121'"
49
+ ]
50
+ },
51
+ "execution_count": 4,
52
+ "metadata": {},
53
+ "output_type": "execute_result"
54
+ }
55
+ ],
56
+ "source": [
57
+ "torch.__version__"
58
+ ]
59
+ },
60
+ {
61
+ "cell_type": "code",
62
+ "execution_count": 5,
63
+ "id": "e05ee325-ce5d-49b6-985e-c66ff88ee3e5",
64
+ "metadata": {},
65
+ "outputs": [
66
+ {
67
+ "data": {
68
+ "text/plain": [
69
+ "True"
70
+ ]
71
+ },
72
+ "execution_count": 5,
73
+ "metadata": {},
74
+ "output_type": "execute_result"
75
+ }
76
+ ],
77
+ "source": [
78
+ "torch.cuda.is_available()"
79
+ ]
80
+ },
81
+ {
82
+ "cell_type": "code",
83
+ "execution_count": null,
84
+ "id": "c8114af7-2cdb-425f-ab8a-2d35462c2977",
85
+ "metadata": {},
86
+ "outputs": [],
87
+ "source": []
88
+ },
89
+ {
90
+ "cell_type": "markdown",
91
+ "id": "495fc0f8-ecc9-4c76-8251-2829246ee68a",
92
+ "metadata": {},
93
+ "source": [
94
+ "## Step 2: Load the original Poro 34B model from Huggingface and save it locally"
95
+ ]
96
+ },
97
+ {
98
+ "cell_type": "code",
99
+ "execution_count": 3,
100
+ "id": "a5a24fba-71e7-4192-aafc-f95648b261d4",
101
+ "metadata": {},
102
+ "outputs": [],
103
+ "source": [
104
+ "model_name='LumiOpen/Poro-34B'"
105
+ ]
106
+ },
107
+ {
108
+ "cell_type": "code",
109
+ "execution_count": 4,
110
+ "id": "148eeafd-6aae-440e-b30d-5ebdd1a8a4a5",
111
+ "metadata": {},
112
+ "outputs": [
113
+ {
114
+ "data": {
115
+ "application/vnd.jupyter.widget-view+json": {
116
+ "model_id": "c6a1ba90df9147489c1c4af10080d933",
117
+ "version_major": 2,
118
+ "version_minor": 0
119
+ },
120
+ "text/plain": [
121
+ "tokenizer_config.json: 0%| | 0.00/286 [00:00<?, ?B/s]"
122
+ ]
123
+ },
124
+ "metadata": {},
125
+ "output_type": "display_data"
126
+ },
127
+ {
128
+ "data": {
129
+ "application/vnd.jupyter.widget-view+json": {
130
+ "model_id": "4c407d194d1742b091947f92ad455236",
131
+ "version_major": 2,
132
+ "version_minor": 0
133
+ },
134
+ "text/plain": [
135
+ "tokenizer.json: 0%| | 0.00/5.64M [00:00<?, ?B/s]"
136
+ ]
137
+ },
138
+ "metadata": {},
139
+ "output_type": "display_data"
140
+ },
141
+ {
142
+ "data": {
143
+ "application/vnd.jupyter.widget-view+json": {
144
+ "model_id": "0392890e4392402086239952185801b1",
145
+ "version_major": 2,
146
+ "version_minor": 0
147
+ },
148
+ "text/plain": [
149
+ "special_tokens_map.json: 0%| | 0.00/545 [00:00<?, ?B/s]"
150
+ ]
151
+ },
152
+ "metadata": {},
153
+ "output_type": "display_data"
154
+ }
155
+ ],
156
+ "source": [
157
+ "org_tokenizer = AutoTokenizer.from_pretrained(model_name)"
158
+ ]
159
+ },
160
+ {
161
+ "cell_type": "code",
162
+ "execution_count": null,
163
+ "id": "3d3ac738-3bc6-4c4f-bfc2-81692a80a662",
164
+ "metadata": {},
165
+ "outputs": [
166
+ {
167
+ "data": {
168
+ "application/vnd.jupyter.widget-view+json": {
169
+ "model_id": "14cac38e76464e748d261f4398b62085",
170
+ "version_major": 2,
171
+ "version_minor": 0
172
+ },
173
+ "text/plain": [
174
+ "config.json: 0%| | 0.00/697 [00:00<?, ?B/s]"
175
+ ]
176
+ },
177
+ "metadata": {},
178
+ "output_type": "display_data"
179
+ },
180
+ {
181
+ "data": {
182
+ "application/vnd.jupyter.widget-view+json": {
183
+ "model_id": "383ffbe929054df2beb163e89af423eb",
184
+ "version_major": 2,
185
+ "version_minor": 0
186
+ },
187
+ "text/plain": [
188
+ "model.safetensors.index.json: 0%| | 0.00/57.0k [00:00<?, ?B/s]"
189
+ ]
190
+ },
191
+ "metadata": {},
192
+ "output_type": "display_data"
193
+ },
194
+ {
195
+ "data": {
196
+ "application/vnd.jupyter.widget-view+json": {
197
+ "model_id": "22f290c533504c78891a85764d4a4dee",
198
+ "version_major": 2,
199
+ "version_minor": 0
200
+ },
201
+ "text/plain": [
202
+ "Downloading shards: 0%| | 0/14 [00:00<?, ?it/s]"
203
+ ]
204
+ },
205
+ "metadata": {},
206
+ "output_type": "display_data"
207
+ },
208
+ {
209
+ "data": {
210
+ "application/vnd.jupyter.widget-view+json": {
211
+ "model_id": "a52ddab3f4c04bcd968d481e53d4ce83",
212
+ "version_major": 2,
213
+ "version_minor": 0
214
+ },
215
+ "text/plain": [
216
+ "model-00001-of-00014.safetensors: 0%| | 0.00/4.71G [00:00<?, ?B/s]"
217
+ ]
218
+ },
219
+ "metadata": {},
220
+ "output_type": "display_data"
221
+ },
222
+ {
223
+ "data": {
224
+ "application/vnd.jupyter.widget-view+json": {
225
+ "model_id": "20afc515d7e64b0193d936d8fdb17a1e",
226
+ "version_major": 2,
227
+ "version_minor": 0
228
+ },
229
+ "text/plain": [
230
+ "model-00002-of-00014.safetensors: 0%| | 0.00/4.93G [00:00<?, ?B/s]"
231
+ ]
232
+ },
233
+ "metadata": {},
234
+ "output_type": "display_data"
235
+ },
236
+ {
237
+ "data": {
238
+ "application/vnd.jupyter.widget-view+json": {
239
+ "model_id": "431fbc52ce0a49219cb73664bfd1f9a6",
240
+ "version_major": 2,
241
+ "version_minor": 0
242
+ },
243
+ "text/plain": [
244
+ "model-00003-of-00014.safetensors: 0%| | 0.00/4.93G [00:00<?, ?B/s]"
245
+ ]
246
+ },
247
+ "metadata": {},
248
+ "output_type": "display_data"
249
+ },
250
+ {
251
+ "data": {
252
+ "application/vnd.jupyter.widget-view+json": {
253
+ "model_id": "fd7453a0ddd64b3299173e7d5586dab1",
254
+ "version_major": 2,
255
+ "version_minor": 0
256
+ },
257
+ "text/plain": [
258
+ "model-00004-of-00014.safetensors: 0%| | 0.00/4.93G [00:00<?, ?B/s]"
259
+ ]
260
+ },
261
+ "metadata": {},
262
+ "output_type": "display_data"
263
+ },
264
+ {
265
+ "data": {
266
+ "application/vnd.jupyter.widget-view+json": {
267
+ "model_id": "44114f0a82144b3ebdc5e1ae083bfbf7",
268
+ "version_major": 2,
269
+ "version_minor": 0
270
+ },
271
+ "text/plain": [
272
+ "model-00005-of-00014.safetensors: 0%| | 0.00/4.93G [00:00<?, ?B/s]"
273
+ ]
274
+ },
275
+ "metadata": {},
276
+ "output_type": "display_data"
277
+ },
278
+ {
279
+ "data": {
280
+ "application/vnd.jupyter.widget-view+json": {
281
+ "model_id": "f8052d0df811492eb6af45897fe568d2",
282
+ "version_major": 2,
283
+ "version_minor": 0
284
+ },
285
+ "text/plain": [
286
+ "model-00006-of-00014.safetensors: 0%| | 0.00/4.93G [00:00<?, ?B/s]"
287
+ ]
288
+ },
289
+ "metadata": {},
290
+ "output_type": "display_data"
291
+ },
292
+ {
293
+ "data": {
294
+ "application/vnd.jupyter.widget-view+json": {
295
+ "model_id": "787b3d7a208348f3ad0141b6c839faf1",
296
+ "version_major": 2,
297
+ "version_minor": 0
298
+ },
299
+ "text/plain": [
300
+ "model-00007-of-00014.safetensors: 0%| | 0.00/4.93G [00:00<?, ?B/s]"
301
+ ]
302
+ },
303
+ "metadata": {},
304
+ "output_type": "display_data"
305
+ },
306
+ {
307
+ "data": {
308
+ "application/vnd.jupyter.widget-view+json": {
309
+ "model_id": "acc82eeb37af48088a00b9e4537d4de6",
310
+ "version_major": 2,
311
+ "version_minor": 0
312
+ },
313
+ "text/plain": [
314
+ "model-00008-of-00014.safetensors: 0%| | 0.00/4.93G [00:00<?, ?B/s]"
315
+ ]
316
+ },
317
+ "metadata": {},
318
+ "output_type": "display_data"
319
+ },
320
+ {
321
+ "data": {
322
+ "application/vnd.jupyter.widget-view+json": {
323
+ "model_id": "588c9bec1e15411e929538a83a820356",
324
+ "version_major": 2,
325
+ "version_minor": 0
326
+ },
327
+ "text/plain": [
328
+ "model-00009-of-00014.safetensors: 0%| | 0.00/4.93G [00:00<?, ?B/s]"
329
+ ]
330
+ },
331
+ "metadata": {},
332
+ "output_type": "display_data"
333
+ },
334
+ {
335
+ "data": {
336
+ "application/vnd.jupyter.widget-view+json": {
337
+ "model_id": "70d44bda93724d62a069dc9c7d3abb02",
338
+ "version_major": 2,
339
+ "version_minor": 0
340
+ },
341
+ "text/plain": [
342
+ "model-00010-of-00014.safetensors: 0%| | 0.00/4.93G [00:00<?, ?B/s]"
343
+ ]
344
+ },
345
+ "metadata": {},
346
+ "output_type": "display_data"
347
+ },
348
+ {
349
+ "data": {
350
+ "application/vnd.jupyter.widget-view+json": {
351
+ "model_id": "4ef2e73b8204473b868043ce8d547148",
352
+ "version_major": 2,
353
+ "version_minor": 0
354
+ },
355
+ "text/plain": [
356
+ "model-00011-of-00014.safetensors: 0%| | 0.00/4.93G [00:00<?, ?B/s]"
357
+ ]
358
+ },
359
+ "metadata": {},
360
+ "output_type": "display_data"
361
+ },
362
+ {
363
+ "data": {
364
+ "application/vnd.jupyter.widget-view+json": {
365
+ "model_id": "3b463343388d42079f6f4452bb9931ce",
366
+ "version_major": 2,
367
+ "version_minor": 0
368
+ },
369
+ "text/plain": [
370
+ "model-00012-of-00014.safetensors: 0%| | 0.00/4.93G [00:00<?, ?B/s]"
371
+ ]
372
+ },
373
+ "metadata": {},
374
+ "output_type": "display_data"
375
+ },
376
+ {
377
+ "data": {
378
+ "application/vnd.jupyter.widget-view+json": {
379
+ "model_id": "1eb6c9827bdc481784d3069d04f7c318",
380
+ "version_major": 2,
381
+ "version_minor": 0
382
+ },
383
+ "text/plain": [
384
+ "model-00013-of-00014.safetensors: 0%| | 0.00/4.93G [00:00<?, ?B/s]"
385
+ ]
386
+ },
387
+ "metadata": {},
388
+ "output_type": "display_data"
389
+ },
390
+ {
391
+ "data": {
392
+ "application/vnd.jupyter.widget-view+json": {
393
+ "model_id": "5662ef8d064b4fa9b70c9b1d1329fdd9",
394
+ "version_major": 2,
395
+ "version_minor": 0
396
+ },
397
+ "text/plain": [
398
+ "model-00014-of-00014.safetensors: 0%| | 0.00/4.52G [00:00<?, ?B/s]"
399
+ ]
400
+ },
401
+ "metadata": {},
402
+ "output_type": "display_data"
403
+ },
404
+ {
405
+ "data": {
406
+ "application/vnd.jupyter.widget-view+json": {
407
+ "model_id": "7b7bd1bd16fb46068153e7b6b2f90b29",
408
+ "version_major": 2,
409
+ "version_minor": 0
410
+ },
411
+ "text/plain": [
412
+ "Loading checkpoint shards: 0%| | 0/14 [00:00<?, ?it/s]"
413
+ ]
414
+ },
415
+ "metadata": {},
416
+ "output_type": "display_data"
417
+ }
418
+ ],
419
+ "source": [
420
+ "branch = \"1000B\"\n",
421
+ "org_model = AutoModelForCausalLM.from_pretrained(model_name,\n",
422
+ " torch_dtype=torch.bfloat16,\n",
423
+ " revision=branch,\n",
424
+ ")"
425
+ ]
426
+ },
427
+ {
428
+ "cell_type": "code",
429
+ "execution_count": 6,
430
+ "id": "4edb55ba-908f-4070-8af3-92c7cf33f8d0",
431
+ "metadata": {},
432
+ "outputs": [],
433
+ "source": [
434
+ "model_configuration = org_model.config"
435
+ ]
436
+ },
437
+ {
438
+ "cell_type": "code",
439
+ "execution_count": 7,
440
+ "id": "69107145-6808-4add-83fe-c3577893d724",
441
+ "metadata": {},
442
+ "outputs": [],
443
+ "source": [
444
+ "# original model configuration is missing the sequence length parameter\n",
445
+ "model_configuration.sequence_length = 2048"
446
+ ]
447
+ },
448
+ {
449
+ "cell_type": "code",
450
+ "execution_count": 8,
451
+ "id": "33225c1e-6205-4ee0-95e9-2b15a2bf9a68",
452
+ "metadata": {},
453
+ "outputs": [
454
+ {
455
+ "data": {
456
+ "text/plain": [
457
+ "('Poro-34B/tokenizer_config.json',\n",
458
+ " 'Poro-34B/special_tokens_map.json',\n",
459
+ " 'Poro-34B/tokenizer.json')"
460
+ ]
461
+ },
462
+ "execution_count": 8,
463
+ "metadata": {},
464
+ "output_type": "execute_result"
465
+ }
466
+ ],
467
+ "source": [
468
+ "# Poro 34B is saved locally (this is not required but provides faster processing if there is a need for multiple runs)\n",
469
+ "org_model.save_pretrained(\"Poro-34B\", max_shard_size=\"5GB\",safe_serialization=True)\n",
470
+ "org_tokenizer.save_pretrained(\"Poro-34B\")"
471
+ ]
472
+ },
473
+ {
474
+ "cell_type": "code",
475
+ "execution_count": null,
476
+ "id": "8d28eb15-26aa-4aed-be2b-2e67dd243e92",
477
+ "metadata": {},
478
+ "outputs": [],
479
+ "source": []
480
+ },
481
+ {
482
+ "cell_type": "markdown",
483
+ "id": "1e3ea881-658f-41fc-b09c-df711219653d",
484
+ "metadata": {},
485
+ "source": [
486
+ "## Step 3: Fine-tuned parameters are loaded from local Poro-34B-Lora-185 directory and merged"
487
+ ]
488
+ },
489
+ {
490
+ "cell_type": "code",
491
+ "execution_count": 9,
492
+ "id": "c9cd3923-76ed-42d0-a882-5959cf0abf18",
493
+ "metadata": {},
494
+ "outputs": [],
495
+ "source": [
496
+ "from peft import PeftModel"
497
+ ]
498
+ },
499
+ {
500
+ "cell_type": "code",
501
+ "execution_count": 10,
502
+ "id": "118992d6-336e-4eb2-9392-9ca77081ece7",
503
+ "metadata": {},
504
+ "outputs": [],
505
+ "source": [
506
+ "model_id2 = \"Poro-34B-Lora-185\""
507
+ ]
508
+ },
509
+ {
510
+ "cell_type": "code",
511
+ "execution_count": 12,
512
+ "id": "10281b50-740f-4e2f-b2cc-f2d24a7e2f77",
513
+ "metadata": {},
514
+ "outputs": [],
515
+ "source": [
516
+ "loaded_model = PeftModel.from_pretrained(org_model,model_id2,is_trainable=True)"
517
+ ]
518
+ },
519
+ {
520
+ "cell_type": "code",
521
+ "execution_count": 13,
522
+ "id": "c4ba0fc3-1cea-4e15-b302-d6353d1e970e",
523
+ "metadata": {},
524
+ "outputs": [],
525
+ "source": [
526
+ "# Fine-tuned weights are merged to original Poro 34B model\n",
527
+ "merged_model = loaded_model.merge_and_unload()"
528
+ ]
529
+ },
530
+ {
531
+ "cell_type": "code",
532
+ "execution_count": 14,
533
+ "id": "07818cdc-d227-4abf-9437-8be3081aeb11",
534
+ "metadata": {},
535
+ "outputs": [
536
+ {
537
+ "data": {
538
+ "text/plain": [
539
+ "('Poro-34B-185c/tokenizer_config.json',\n",
540
+ " 'Poro-34B-185c/special_tokens_map.json',\n",
541
+ " 'Poro-34B-185c/tokenizer.json')"
542
+ ]
543
+ },
544
+ "execution_count": 14,
545
+ "metadata": {},
546
+ "output_type": "execute_result"
547
+ }
548
+ ],
549
+ "source": [
550
+ "# Merged model is saved locally\n",
551
+ "merged_model.save_pretrained(\"Poro-34B-185c\", max_shard_size=\"5GB\",safe_serialization=True)\n",
552
+ "org_tokenizer.save_pretrained(\"Poro-34B-185c\")"
553
+ ]
554
+ },
555
+ {
556
+ "cell_type": "code",
557
+ "execution_count": null,
558
+ "id": "f18349b3-2fd8-4927-9ba4-a442d6217e1b",
559
+ "metadata": {},
560
+ "outputs": [],
561
+ "source": []
562
+ },
563
+ {
564
+ "cell_type": "markdown",
565
+ "id": "93d3bbdc-ceaf-4b19-b5e6-05f4d11cf275",
566
+ "metadata": {},
567
+ "source": [
568
+ "## Step 4: GPTQ quantization is applied to merged fine-tuned model"
569
+ ]
570
+ },
571
+ {
572
+ "cell_type": "code",
573
+ "execution_count": 15,
574
+ "id": "e80c7627-da1f-4cb0-a079-6748806c0a0e",
575
+ "metadata": {},
576
+ "outputs": [],
577
+ "source": [
578
+ "model_id = \"Poro-34B-185c\""
579
+ ]
580
+ },
581
+ {
582
+ "cell_type": "code",
583
+ "execution_count": 16,
584
+ "id": "2af3a06a-0d71-4c0e-aa3b-6f352c278bf2",
585
+ "metadata": {},
586
+ "outputs": [],
587
+ "source": [
588
+ "tokenizer = AutoTokenizer.from_pretrained(model_id)"
589
+ ]
590
+ },
591
+ {
592
+ "cell_type": "code",
593
+ "execution_count": 17,
594
+ "id": "a302205a-0206-43b1-9bef-ed67547520f6",
595
+ "metadata": {},
596
+ "outputs": [],
597
+ "source": [
598
+ "# Dataset is a list of strings, we have here only one string to show the process\n",
599
+ "dataset = [\"Peruuta ensin vanhaan osoitteeseen tilattu uutiskirje kirjeen alareunan “Peruuta tilaus” -linkistä.\\nTilaa uutiskirje uudelleen oikeaan osoitteeseen.\"]"
600
+ ]
601
+ },
602
+ {
603
+ "cell_type": "code",
604
+ "execution_count": 18,
605
+ "id": "2fabbc2e-436d-4ce1-ad89-4f30bdd977fa",
606
+ "metadata": {},
607
+ "outputs": [],
608
+ "source": [
609
+ "gptq_config = GPTQConfig(bits=4, dataset = dataset, tokenizer=tokenizer)"
610
+ ]
611
+ },
612
+ {
613
+ "cell_type": "code",
614
+ "execution_count": null,
615
+ "id": "b6148609-3111-40a8-b748-fc876b9869f9",
616
+ "metadata": {},
617
+ "outputs": [],
618
+ "source": [
619
+ "model = AutoModelForCausalLM.from_pretrained(model_id, quantization_config=gptq_config,low_cpu_mem_usage=True)"
620
+ ]
621
+ },
622
+ {
623
+ "cell_type": "code",
624
+ "execution_count": null,
625
+ "id": "59a2a544-9ee1-4f4e-b26a-d91de5e8f321",
626
+ "metadata": {},
627
+ "outputs": [],
628
+ "source": [
629
+ "# Quantized model and tokenizer are saved locally\n",
630
+ "model.save_pretrained(\"Poro-34B-GPTQ-SGroup\", use_safetensors=True)\n",
631
+ "tokenizer.save_pretrained(\"Poro-34B-GPTQ-SGroup\")"
632
+ ]
633
+ },
634
+ {
635
+ "cell_type": "code",
636
+ "execution_count": null,
637
+ "id": "5e012ca3-966a-4480-aae3-6c2b67e6dde6",
638
+ "metadata": {},
639
+ "outputs": [],
640
+ "source": [
641
+ "# Login to Huggingface\n",
642
+ "from huggingface_hub import notebook_login\n",
643
+ "notebook_login()"
644
+ ]
645
+ },
646
+ {
647
+ "cell_type": "code",
648
+ "execution_count": null,
649
+ "id": "587afbc0-8b81-4807-bed8-6af145845b95",
650
+ "metadata": {},
651
+ "outputs": [],
652
+ "source": [
653
+ "# Quantized model and tokenizer are saved to Huggingface\n",
654
+ "model.push_to_hub(\"Poro-34B-GPTQ-SGroup\", use_safetensors=True)\n",
655
+ "tokenizer.push_to_hub(\"Poro-34B-GPTQ-SGroup\")"
656
+ ]
657
+ },
658
+ {
659
+ "cell_type": "code",
660
+ "execution_count": null,
661
+ "id": "f02d72bb-e75b-415f-b791-254246c5f971",
662
+ "metadata": {},
663
+ "outputs": [],
664
+ "source": []
665
+ },
666
+ {
667
+ "cell_type": "code",
668
+ "execution_count": null,
669
+ "id": "df85b2dc-22e0-40da-b1c1-fee4095c31be",
670
+ "metadata": {},
671
+ "outputs": [],
672
+ "source": []
673
+ },
674
+ {
675
+ "cell_type": "code",
676
+ "execution_count": null,
677
+ "id": "802bf734-e951-4aa0-9512-99ff7bf952f9",
678
+ "metadata": {},
679
+ "outputs": [],
680
+ "source": []
681
+ },
682
+ {
683
+ "cell_type": "code",
684
+ "execution_count": null,
685
+ "id": "f287018e-07b4-4080-b286-e905059f2f90",
686
+ "metadata": {},
687
+ "outputs": [],
688
+ "source": []
689
+ },
690
+ {
691
+ "cell_type": "code",
692
+ "execution_count": null,
693
+ "id": "6baf3c59-611b-47e0-9737-d25952c98c70",
694
+ "metadata": {},
695
+ "outputs": [],
696
+ "source": []
697
+ },
698
+ {
699
+ "cell_type": "code",
700
+ "execution_count": null,
701
+ "id": "84f6aef7-ff91-4bac-add3-3e3c6b4667ca",
702
+ "metadata": {},
703
+ "outputs": [],
704
+ "source": []
705
+ },
706
+ {
707
+ "cell_type": "code",
708
+ "execution_count": null,
709
+ "id": "8feaaca1-b05e-4e4e-97ce-18931d908eb7",
710
+ "metadata": {},
711
+ "outputs": [],
712
+ "source": []
713
+ }
714
+ ],
715
+ "metadata": {
716
+ "kernelspec": {
717
+ "display_name": "Python 3",
718
+ "language": "python",
719
+ "name": "python3"
720
+ },
721
+ "language_info": {
722
+ "codemirror_mode": {
723
+ "name": "ipython",
724
+ "version": 3
725
+ },
726
+ "file_extension": ".py",
727
+ "mimetype": "text/x-python",
728
+ "name": "python",
729
+ "nbconvert_exporter": "python",
730
+ "pygments_lexer": "ipython3",
731
+ "version": "3.8.8"
732
+ }
733
+ },
734
+ "nbformat": 4,
735
+ "nbformat_minor": 5
736
+ }