ivanleomk commited on
Commit
a9d57c2
1 Parent(s): 2ad2753

Add BERTopic model

Browse files
Files changed (4) hide show
  1. README.md +74 -0
  2. config.json +16 -0
  3. topic_embeddings.safetensors +3 -0
  4. topics.json +503 -0
README.md ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ ---
3
+ tags:
4
+ - bertopic
5
+ library_name: bertopic
6
+ pipeline_tag: text-classification
7
+ ---
8
+
9
+ # rag-topic-model
10
+
11
+ This is a [BERTopic](https://github.com/MaartenGr/BERTopic) model.
12
+ BERTopic is a flexible and modular topic modeling framework that allows for the generation of easily interpretable topics from large datasets.
13
+
14
+ ## Usage
15
+
16
+ To use this model, please install BERTopic:
17
+
18
+ ```
19
+ pip install -U bertopic
20
+ ```
21
+
22
+ You can use the model as follows:
23
+
24
+ ```python
25
+ from bertopic import BERTopic
26
+ topic_model = BERTopic.load("ivanleomk/rag-topic-model")
27
+
28
+ topic_model.get_topic_info()
29
+ ```
30
+
31
+ ## Topic overview
32
+
33
+ * Number of topics: 5
34
+ * Number of training documents: 243
35
+
36
+ <details>
37
+ <summary>Click here for an overview of all topics.</summary>
38
+
39
+ | Topic ID | Topic Keywords | Topic Frequency | Label |
40
+ |----------|----------------|-----------------|-------|
41
+ | -1 | the - verification - my - for - code | 24 | -1_the_verification_my_for |
42
+ | 0 | klarna - to - my - and - the | 19 | 0_klarna_to_my_and |
43
+ | 1 | my - the - return - store - still | 98 | 1_my_the_return_store |
44
+ | 2 | card - onetime - my - it - for | 69 | 2_card_onetime_my_it |
45
+ | 3 | payment - my - due - date - the | 33 | 3_payment_my_due_date |
46
+
47
+ </details>
48
+
49
+ ## Training hyperparameters
50
+
51
+ * calculate_probabilities: False
52
+ * language: None
53
+ * low_memory: False
54
+ * min_topic_size: 10
55
+ * n_gram_range: (1, 1)
56
+ * nr_topics: None
57
+ * seed_topic_list: None
58
+ * top_n_words: 10
59
+ * verbose: False
60
+ * zeroshot_min_similarity: 0.7
61
+ * zeroshot_topic_list: None
62
+
63
+ ## Framework versions
64
+
65
+ * Numpy: 2.0.2
66
+ * HDBSCAN: 0.8.40
67
+ * UMAP: 0.5.7
68
+ * Pandas: 2.2.3
69
+ * Scikit-Learn: 1.5.2
70
+ * Sentence-transformers: 3.3.1
71
+ * Transformers: 4.46.3
72
+ * Numba: 0.60.0
73
+ * Plotly: 5.24.1
74
+ * Python: 3.9.6
config.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "calculate_probabilities": false,
3
+ "language": null,
4
+ "low_memory": false,
5
+ "min_topic_size": 10,
6
+ "n_gram_range": [
7
+ 1,
8
+ 1
9
+ ],
10
+ "nr_topics": null,
11
+ "seed_topic_list": null,
12
+ "top_n_words": 10,
13
+ "verbose": false,
14
+ "zeroshot_min_similarity": 0.7,
15
+ "zeroshot_topic_list": null
16
+ }
topic_embeddings.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a1384d89ba5fb76e29d40fbf2f181cc734803e4f0132a177597658df4740d522
3
+ size 7768
topics.json ADDED
@@ -0,0 +1,503 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "topic_representations": {
3
+ "-1": [
4
+ [
5
+ "the",
6
+ 0.09492240491569903
7
+ ],
8
+ [
9
+ "verification",
10
+ 0.06970153771334807
11
+ ],
12
+ [
13
+ "my",
14
+ 0.06435254147435229
15
+ ],
16
+ [
17
+ "for",
18
+ 0.06319899047478812
19
+ ],
20
+ [
21
+ "code",
22
+ 0.057297419223293365
23
+ ],
24
+ [
25
+ "whats",
26
+ 0.05231884901424003
27
+ ],
28
+ [
29
+ "step",
30
+ 0.05048680075415213
31
+ ],
32
+ [
33
+ "email",
34
+ 0.04766755006544382
35
+ ],
36
+ [
37
+ "to",
38
+ 0.04691532073995217
39
+ ],
40
+ [
41
+ "up",
42
+ 0.046032377461675815
43
+ ]
44
+ ],
45
+ "0": [
46
+ [
47
+ "klarna",
48
+ 0.09050803422673742
49
+ ],
50
+ [
51
+ "to",
52
+ 0.07953560782302285
53
+ ],
54
+ [
55
+ "my",
56
+ 0.06794819842776671
57
+ ],
58
+ [
59
+ "and",
60
+ 0.054212681447924974
61
+ ],
62
+ [
63
+ "the",
64
+ 0.05221408034992652
65
+ ],
66
+ [
67
+ "for",
68
+ 0.04986631308226638
69
+ ],
70
+ [
71
+ "im",
72
+ 0.046128149776214315
73
+ ],
74
+ [
75
+ "but",
76
+ 0.04290244254915681
77
+ ],
78
+ [
79
+ "card",
80
+ 0.038263701287704434
81
+ ],
82
+ [
83
+ "with",
84
+ 0.03806685276761612
85
+ ]
86
+ ],
87
+ "1": [
88
+ [
89
+ "my",
90
+ 0.0775192391698972
91
+ ],
92
+ [
93
+ "the",
94
+ 0.07083659770502172
95
+ ],
96
+ [
97
+ "return",
98
+ 0.06282550489124095
99
+ ],
100
+ [
101
+ "store",
102
+ 0.06030699514817177
103
+ ],
104
+ [
105
+ "still",
106
+ 0.05232050981790259
107
+ ],
108
+ [
109
+ "and",
110
+ 0.048850488312254275
111
+ ],
112
+ [
113
+ "to",
114
+ 0.04440410670920632
115
+ ],
116
+ [
117
+ "but",
118
+ 0.04435407181783497
119
+ ],
120
+ [
121
+ "weeks",
122
+ 0.04390700519309823
123
+ ],
124
+ [
125
+ "refund",
126
+ 0.04301759717367365
127
+ ]
128
+ ],
129
+ "2": [
130
+ [
131
+ "card",
132
+ 0.15173156476046484
133
+ ],
134
+ [
135
+ "onetime",
136
+ 0.09616516102360437
137
+ ],
138
+ [
139
+ "my",
140
+ 0.06662856796086986
141
+ ],
142
+ [
143
+ "it",
144
+ 0.06433874526122811
145
+ ],
146
+ [
147
+ "for",
148
+ 0.062288342197053426
149
+ ],
150
+ [
151
+ "but",
152
+ 0.05809184320425647
153
+ ],
154
+ [
155
+ "to",
156
+ 0.049909093780171436
157
+ ],
158
+ [
159
+ "and",
160
+ 0.04702598592456893
161
+ ],
162
+ [
163
+ "is",
164
+ 0.04567791318517409
165
+ ],
166
+ [
167
+ "the",
168
+ 0.044549831866569335
169
+ ]
170
+ ],
171
+ "3": [
172
+ [
173
+ "payment",
174
+ 0.12112085020085236
175
+ ],
176
+ [
177
+ "my",
178
+ 0.0998606282868899
179
+ ],
180
+ [
181
+ "due",
182
+ 0.09304345529961551
183
+ ],
184
+ [
185
+ "date",
186
+ 0.08822107880850269
187
+ ],
188
+ [
189
+ "the",
190
+ 0.07715617939671793
191
+ ],
192
+ [
193
+ "is",
194
+ 0.06934959789854092
195
+ ],
196
+ [
197
+ "cant",
198
+ 0.06277156684324331
199
+ ],
200
+ [
201
+ "plan",
202
+ 0.06161981560504852
203
+ ],
204
+ [
205
+ "than",
206
+ 0.04929585248403881
207
+ ],
208
+ [
209
+ "an",
210
+ 0.04763634941129883
211
+ ]
212
+ ]
213
+ },
214
+ "topics": [
215
+ 0,
216
+ 3,
217
+ 0,
218
+ 0,
219
+ 0,
220
+ 1,
221
+ 1,
222
+ 0,
223
+ 3,
224
+ 0,
225
+ 2,
226
+ 0,
227
+ 1,
228
+ 3,
229
+ 2,
230
+ 0,
231
+ 1,
232
+ 0,
233
+ 1,
234
+ 0,
235
+ 1,
236
+ 1,
237
+ 0,
238
+ 0,
239
+ 1,
240
+ 0,
241
+ 2,
242
+ 3,
243
+ 3,
244
+ 1,
245
+ 0,
246
+ 0,
247
+ 0,
248
+ 1,
249
+ 3,
250
+ 3,
251
+ 1,
252
+ 1,
253
+ 0,
254
+ 0,
255
+ 1,
256
+ 0,
257
+ 1,
258
+ 0,
259
+ 0,
260
+ 3,
261
+ 3,
262
+ 1,
263
+ 2,
264
+ 0,
265
+ 1,
266
+ 0,
267
+ 0,
268
+ 0,
269
+ 1,
270
+ 1,
271
+ -1,
272
+ 3,
273
+ 3,
274
+ 1,
275
+ 2,
276
+ 0,
277
+ 2,
278
+ 2,
279
+ 2,
280
+ 0,
281
+ 1,
282
+ 2,
283
+ 1,
284
+ 1,
285
+ 0,
286
+ 0,
287
+ 3,
288
+ 1,
289
+ 2,
290
+ 3,
291
+ 1,
292
+ 0,
293
+ 0,
294
+ 0,
295
+ 3,
296
+ 0,
297
+ 2,
298
+ 3,
299
+ 1,
300
+ 0,
301
+ 1,
302
+ 1,
303
+ 0,
304
+ 0,
305
+ 1,
306
+ -1,
307
+ 0,
308
+ 3,
309
+ 2,
310
+ 0,
311
+ 2,
312
+ 1,
313
+ -1,
314
+ -1,
315
+ 0,
316
+ 1,
317
+ 0,
318
+ 1,
319
+ -1,
320
+ 0,
321
+ 2,
322
+ 3,
323
+ 0,
324
+ 3,
325
+ 1,
326
+ 1,
327
+ 0,
328
+ 0,
329
+ 3,
330
+ 3,
331
+ 1,
332
+ 1,
333
+ 3,
334
+ 3,
335
+ 1,
336
+ 0,
337
+ 0,
338
+ 3,
339
+ 0,
340
+ 1,
341
+ 3,
342
+ 1,
343
+ 1,
344
+ 0,
345
+ 0,
346
+ 0,
347
+ 0,
348
+ 1,
349
+ 0,
350
+ 0,
351
+ -1,
352
+ 0,
353
+ 0,
354
+ 0,
355
+ -1,
356
+ 0,
357
+ 0,
358
+ 0,
359
+ -1,
360
+ -1,
361
+ 0,
362
+ 0,
363
+ 2,
364
+ 0,
365
+ 1,
366
+ 0,
367
+ 0,
368
+ 0,
369
+ 0,
370
+ 0,
371
+ 2,
372
+ 1,
373
+ 0,
374
+ 0,
375
+ 0,
376
+ -1,
377
+ 0,
378
+ 0,
379
+ -1,
380
+ 0,
381
+ 1,
382
+ -1,
383
+ 2,
384
+ 2,
385
+ 2,
386
+ 1,
387
+ 1,
388
+ 1,
389
+ 2,
390
+ 0,
391
+ 0,
392
+ 2,
393
+ 0,
394
+ 0,
395
+ 0,
396
+ 0,
397
+ 0,
398
+ 0,
399
+ 0,
400
+ 0,
401
+ 0,
402
+ 0,
403
+ 0,
404
+ 0,
405
+ 2,
406
+ 1,
407
+ 1,
408
+ 2,
409
+ -1,
410
+ 2,
411
+ 1,
412
+ 1,
413
+ 2,
414
+ 2,
415
+ 1,
416
+ 1,
417
+ 1,
418
+ 1,
419
+ 1,
420
+ 1,
421
+ 2,
422
+ 1,
423
+ -1,
424
+ 1,
425
+ 1,
426
+ 2,
427
+ 1,
428
+ 1,
429
+ 1,
430
+ 1,
431
+ 0,
432
+ 0,
433
+ -1,
434
+ 0,
435
+ -1,
436
+ 1,
437
+ 1,
438
+ 2,
439
+ 0,
440
+ 0,
441
+ 0,
442
+ 1,
443
+ 0,
444
+ 0,
445
+ 0,
446
+ 1,
447
+ 2,
448
+ 2,
449
+ 1,
450
+ 2,
451
+ -1,
452
+ -1,
453
+ 1,
454
+ -1,
455
+ 2,
456
+ 0,
457
+ 0
458
+ ],
459
+ "topic_sizes": {
460
+ "0": 98,
461
+ "3": 24,
462
+ "1": 69,
463
+ "2": 33,
464
+ "-1": 19
465
+ },
466
+ "topic_mapper": [
467
+ [
468
+ -1,
469
+ -1,
470
+ -1
471
+ ],
472
+ [
473
+ 0,
474
+ 0,
475
+ 3
476
+ ],
477
+ [
478
+ 1,
479
+ 1,
480
+ 0
481
+ ],
482
+ [
483
+ 2,
484
+ 2,
485
+ 2
486
+ ],
487
+ [
488
+ 3,
489
+ 3,
490
+ 1
491
+ ]
492
+ ],
493
+ "topic_labels": {
494
+ "-1": "-1_the_verification_my_for",
495
+ "0": "0_klarna_to_my_and",
496
+ "1": "1_my_the_return_store",
497
+ "2": "2_card_onetime_my_it",
498
+ "3": "3_payment_my_due_date"
499
+ },
500
+ "custom_labels": null,
501
+ "_outliers": 1,
502
+ "topic_aspects": {}
503
+ }