Add SetFit model
Browse files- 1_Pooling/config.json +10 -0
- README.md +341 -0
- config.json +24 -0
- config_sentence_transformers.json +9 -0
- config_setfit.json +4 -0
- model.safetensors +3 -0
- model_head.pkl +3 -0
- modules.json +14 -0
- sentence_bert_config.json +4 -0
- special_tokens_map.json +51 -0
- tokenizer.json +0 -0
- tokenizer_config.json +59 -0
- vocab.txt +0 -0
1_Pooling/config.json
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"word_embedding_dimension": 768,
|
3 |
+
"pooling_mode_cls_token": false,
|
4 |
+
"pooling_mode_mean_tokens": true,
|
5 |
+
"pooling_mode_max_tokens": false,
|
6 |
+
"pooling_mode_mean_sqrt_len_tokens": false,
|
7 |
+
"pooling_mode_weightedmean_tokens": false,
|
8 |
+
"pooling_mode_lasttoken": false,
|
9 |
+
"include_prompt": true
|
10 |
+
}
|
README.md
ADDED
@@ -0,0 +1,341 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
library_name: setfit
|
3 |
+
tags:
|
4 |
+
- setfit
|
5 |
+
- sentence-transformers
|
6 |
+
- text-classification
|
7 |
+
- generated_from_setfit_trainer
|
8 |
+
base_model: sentence-transformers/paraphrase-mpnet-base-v2
|
9 |
+
metrics:
|
10 |
+
- accuracy
|
11 |
+
widget:
|
12 |
+
- text: in some aspects of it its individual and some aspects of these locations we
|
13 |
+
could have very granular you know type of changes we find its almost impossible
|
14 |
+
to communicate those to people you know that it just sounds like a smear and so
|
15 |
+
we cluster things at the at the boundary of what is a smear able sounding thing
|
16 |
+
for the most part and so thats what allows me to listen to an experience like
|
17 |
+
yours and think you know dont think that was so much a location three experience
|
18 |
+
probably as maybe something more in location two layer three yeah all right
|
19 |
+
- text: great for relationships
|
20 |
+
- text: i feel like i no longer decide to do things i just end up doing them
|
21 |
+
- text: merged with experience
|
22 |
+
- text: reductions in both conditioning and the importance of outcomes make goal attainment
|
23 |
+
increasingly less pertinent
|
24 |
+
pipeline_tag: text-classification
|
25 |
+
inference: true
|
26 |
+
model-index:
|
27 |
+
- name: SetFit with sentence-transformers/paraphrase-mpnet-base-v2
|
28 |
+
results:
|
29 |
+
- task:
|
30 |
+
type: text-classification
|
31 |
+
name: Text Classification
|
32 |
+
dataset:
|
33 |
+
name: Unknown
|
34 |
+
type: unknown
|
35 |
+
split: test
|
36 |
+
metrics:
|
37 |
+
- type: accuracy
|
38 |
+
value: 0.36705882352941177
|
39 |
+
name: Accuracy
|
40 |
+
---
|
41 |
+
|
42 |
+
# SetFit with sentence-transformers/paraphrase-mpnet-base-v2
|
43 |
+
|
44 |
+
This is a [SetFit](https://github.com/huggingface/setfit) model that can be used for Text Classification. This SetFit model uses [sentence-transformers/paraphrase-mpnet-base-v2](https://huggingface.co/sentence-transformers/paraphrase-mpnet-base-v2) as the Sentence Transformer embedding model. A [LogisticRegression](https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html) instance is used for classification.
|
45 |
+
|
46 |
+
The model has been trained using an efficient few-shot learning technique that involves:
|
47 |
+
|
48 |
+
1. Fine-tuning a [Sentence Transformer](https://www.sbert.net) with contrastive learning.
|
49 |
+
2. Training a classification head with features from the fine-tuned Sentence Transformer.
|
50 |
+
|
51 |
+
## Model Details
|
52 |
+
|
53 |
+
### Model Description
|
54 |
+
- **Model Type:** SetFit
|
55 |
+
- **Sentence Transformer body:** [sentence-transformers/paraphrase-mpnet-base-v2](https://huggingface.co/sentence-transformers/paraphrase-mpnet-base-v2)
|
56 |
+
- **Classification head:** a [LogisticRegression](https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html) instance
|
57 |
+
- **Maximum Sequence Length:** 512 tokens
|
58 |
+
- **Number of Classes:** 29 classes
|
59 |
+
<!-- - **Training Dataset:** [Unknown](https://huggingface.co/datasets/unknown) -->
|
60 |
+
<!-- - **Language:** Unknown -->
|
61 |
+
<!-- - **License:** Unknown -->
|
62 |
+
|
63 |
+
### Model Sources
|
64 |
+
|
65 |
+
- **Repository:** [SetFit on GitHub](https://github.com/huggingface/setfit)
|
66 |
+
- **Paper:** [Efficient Few-Shot Learning Without Prompts](https://arxiv.org/abs/2209.11055)
|
67 |
+
- **Blogpost:** [SetFit: Efficient Few-Shot Learning Without Prompts](https://huggingface.co/blog/setfit)
|
68 |
+
|
69 |
+
### Model Labels
|
70 |
+
| Label | Examples |
|
71 |
+
|:------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
72 |
+
| 16 | <ul><li>'visual effects and the sensory glitches'</li><li>'intrinsic to the unfolding'</li><li>'the reason for this lies in the brain regions involved and the impact that even rudimentary language use can have on the deepening process versus silence'</li></ul> |
|
73 |
+
| 7 | <ul><li>'nothing appears to have individual existence of itself distinctions begin to fade into the flatness which happens increasingly the further out in locations one goes'</li><li>'and if you really deepen into layer four in location two your system will sooner or later be unable to really stabilize that in location two and it will shift into location four'</li><li>'those are such subjective qualities that dont necessarily apply to layer four and so but lets go earlier here and make sure that you know you really are switching between location one and location two yeah thats a little unusual so lets make sure thats happening you actually feel like youre at times when youre in the world your system is literally going from our our nondual perception tests right which involve like a looker in here'</li></ul> |
|
74 |
+
| 24 | <ul><li>'fundamental for wellbeing for a long time very deepened got into the no emotion sort of place with it you know perhaps'</li><li>'because thats a tough place to be so if you and they doubt probably and they often doubt that they are even in fundamental being at that point because its such a difficult place to be it feels like they left fundamental wellbeing and entered a dark night of the soul of some kind'</li><li>'was able to settle back into fundamental wellbeing after it got shaken up'</li></ul> |
|
75 |
+
| 13 | <ul><li>'so i think you probably said that in your document but in that experience that sounded like location four probably layer two'</li><li>'there was no choice and it kept unfolding'</li><li>'the loss of perceptual access to layer 2 can affect things like dimensionality of perception which becomes progressively flat in later locations'</li></ul> |
|
76 |
+
| 5 | <ul><li>'and everything arises is objects and the subject is this space which is me so it is the'</li><li>'in location 2 this is a nondual experience of layer 2that is the perception of separation between subject observer and object observed is dissolvedand so it feels as though one is indistinct from the space in which everything arises and deeper into the layer that one is indistinct from the substance of everything arising as well'</li><li>'something into this experience of two so my experience of this has its just now releasing a lot of the as of a couple of days ago thought it might be wise to look at this yeah so ive been experiencing you know this very strange weird nonduality type'</li></ul> |
|
77 |
+
| 19 | <ul><li>'roommates and spouses can find what seem like irrational and imposing demands to be a common annoyance and their counter perspectives met with unyielding expectations by the finder'</li><li>'healing'</li><li>'divine or being in union'</li></ul> |
|
78 |
+
| 26 | <ul><li>'you have a sense of perception arising from spaciousness in terms of the thoughts in terms of the vision and auditory based exercises in terms of whats noticing it'</li><li>'i think the emptiness is a different one'</li><li>'it sounds a little bit like you might in a way you sound like almost locked in to layer two'</li></ul> |
|
79 |
+
| 6 | <ul><li>'i can see the broadband'</li><li>'and but theres this is like one very positive feeling where i in a very strong connectedness to to to other people um i so there is a sense of self but its not its not important so its but but i can feel the difference'</li><li>'and what you would be what would really indicate that shift is that duality returning to your experience where there really is a sense of something that is dissolving into or merging with the divine presence and everything and in addition to that meta emotion of love joy compassion um but you can also begin to experience in location two layer three'</li></ul> |
|
80 |
+
| 14 | <ul><li>'highly functioning'</li><li>'there is also some return of emotion on the path of humanity usually impersonal forms of love which also relate to layer 3 '</li><li>'it becomes more difficult access to layer 3 and to return to previous locations'</li></ul> |
|
81 |
+
| 1 | <ul><li>'silence and stillness'</li><li>'and its very its not what i am you know like theres no continuity between me and that spaciousness you know i can feel some spaciousness maybe but man theres you know no continuity between me and that and that would be fine that would be a location one layer two type experience certainly fine'</li><li>'and layer two of location one'</li></ul> |
|
82 |
+
| 10 | <ul><li>'it feels untouched and untouchable'</li><li>'so that would mean you know any qualities of joy or love compassion anything that feels sort of sacred or divine if you are prone to that experience and location three or otherwise that sense of presence pervading everything especially if you can get a sense of it being somehow conscious or intelligent those are sort of location three type layer three experiences'</li><li>'an all pervading field of presence or beingness that feels profoundly rich complete full alive'</li></ul> |
|
83 |
+
| 3 | <ul><li>'in location 1 layer 4 the marketplace is a forum of exchange not just of goods but of ideas cultures and kindness'</li><li>'expanded sense of self where my attention is most of the time feels more real than anything previous'</li><li>'the boundaries between work and play blur in location 1 layer 4 each act imbued with purpose and the joy of being'</li></ul> |
|
84 |
+
| 12 | <ul><li>'i think having introspect is a bit more i had some i was getting married in may this year so i had some resistance to the idea of progressing beyond layer two or maybe layer three because i felt that it would be doing a disservice to my partner to go into marriage without experiencing love'</li><li>'the most distinctive quality of layer 1 in location 4 and later is that the mind is experienced as functioning autonomously'</li><li>'instead of action being experienced as the consequence of thought and intentionality it feels intrinsic to the unfolding itself and is silent and selfrevealing'</li></ul> |
|
85 |
+
| 27 | <ul><li>'awareness knowingness'</li><li>'theres a quietness theres a vividness to the visual field things look more beautiful theres more appreciation was kind of a question that kind of had for you is it feels like for me beauty is a little bit of a gateway when i notice and im not talking about something that has to be you know prototypically beautiful but just like the pot sitting in the kitchen that suddenly look beautiful and'</li><li>'centralis and connected'</li></ul> |
|
86 |
+
| 20 | <ul><li>'theres no sense of divinity or anything theres no sense of panpsychist thing all there is things become very very alive everything is full of life and theres a joy i was thinking about love actually just this morning and theres really no sense of love because theres not a sense of other to be loved'</li><li>'it felt like my vision was going back like like something was wrong with my vision um so thats the only the only thing it just seems to mean it cant be more flat but you know whatever that sort of you know sense of just you know no extension you know it again it seems like there can be extension but its not the default at all you know like i said i wake up i wake up and everythings flat'</li><li>'life is just unfolding'</li></ul> |
|
87 |
+
| 21 | <ul><li>'i saw shapes in language'</li><li>'seeing transparent shapes'</li><li>'geometric shapes'</li></ul> |
|
88 |
+
| 17 | <ul><li>'news junkies may find themselves much less interested in the news'</li><li>'deconditioning and reconditioning'</li><li>'that on a deep level'</li></ul> |
|
89 |
+
| 2 | <ul><li>'as with layer 2 the movement in and out of these temporary experiences can give location 1 finders a sense of not being there yet which is usually not correctthere are simply more locations and depths within them to explore and they are at the beginning of that process unfolding'</li><li>'at the early end of layer 3 it feels like an essence or presence infusing but different from experience this is typically initially perceived as beginning to infuse the spaciousness of layer 2'</li><li>'the body can relax and the blocks can relax as well and a feeling of being of being connected to my environment feeling that around me and more through the center of the heart area and feeling like it is looking back at me and and yeah feeling a sense of of emptiness or loneliness or loneliness but but then it can change and it feels quite quite full and and that arises also within within the center of my heart or my space when my heart is and if i sort of feel feel feel into my heart theres a sense of sacredness or our maternal beingness there like a like a feeling of of of purity of purity of timelessness like like a sense of a sense of perfection um expression or play and um yeah sort of so thats when i do the exercises where i stop and look and thats what i tend to find but i guess sort of in my day to day life i feel as though me and i still have i still feel like im a self ive got a body but but i can just take a moment and i can feel that spaciousness around me and and often like ill work quite a lot so often ill be quite tired and i do that energy that really changes i feel much more energized and relaxed when i do that'</li></ul> |
|
90 |
+
| 0 | <ul><li>'most of the time curious and open rather than fearful'</li><li>'fundamental wellbeing also does not typically show up as persistent equanimity or bliss in location 1 as some also believe will be the case'</li><li>'very attached to my mind'</li></ul> |
|
91 |
+
| 15 | <ul><li>'there is an ever greater depth of stillnesssilence and an incomparable quality of freedom and peace which is the classical freedom from suffering pursued by spiritual traditions for millennia'</li><li>'the thing the thing to keep in mind is that for a system for a layer four location four especially but youre sort of close enough you know youre like a hair away from the thing type system what reading those books will do is basically prime you basically primes the system'</li><li>'the untouchable quality of layer 3 becomes irrelevant untouchable in relation to what because nothing but reality could ever be the peace feels absolute and of a different dimension than emotional or psychological peace'</li></ul> |
|
92 |
+
| 9 | <ul><li>'so if it were like for instance location three layer two you know it would be a spacious emptiness mixed with the divine or the panpsychist presence or whatever which is kind of a distance from god you know kind of a separation in a way'</li><li>'the quality of spaciousness emptiness expansiveness openness and so on feels as though it pervades everything as the presence of divinity or as the panpsychist presence depending on how location 3 is showing up for that person'</li><li>'feeling like youre dissolving into it'</li></ul> |
|
93 |
+
| 28 | <ul><li>'layer four and sort of more primed and ready when we make it to that content'</li><li>'something is just revealed in a moment'</li><li>'the losing of that illusion nothing is gained'</li></ul> |
|
94 |
+
| 4 | <ul><li>'location two layer one'</li><li>'normal enhanced functioning of the mind'</li><li>'the thing about all the things that youre mentioning is that they really you know for someone else could be something that might you know trigger them going to a lower location or something like that but theyre not necessarily things that would hold someone in a location you know like you can be in location one layer two as your dominant experience and still have you know triggers that pull you down to layer one or something but nonetheless be anchored you know at a higher layer than that or even over in location two'</li></ul> |
|
95 |
+
| 11 | <ul><li>'it is common for layer 4 to be experienced as an unknowable or a mystery'</li><li>'we want to think about it that i think part of it is always with someone that we have to you know really come to grips with that really give our systems permission for that really communicate that were okay with that youve spent a long time now telling your system this is what i want experience wise something like this it doesnt have to be exactly like this im open to some variation but you know within these boundaries right and now youre more or less saying something very different than that to go to location four layer four and location three'</li><li>'layer 4 is not easily accessible in location 3'</li></ul> |
|
96 |
+
| 23 | <ul><li>'since ive been in fundamental wellbeing ive been communicating'</li><li>'increased or total focus on the present moment'</li><li>'when that timer goes off and youre taking time to sink into fundamental well being'</li></ul> |
|
97 |
+
| 22 | <ul><li>'fundamental wellbeing prior to where youve landed'</li><li>'it just sounds like its in the direction of fundamental well being type of stuff to me'</li><li>'wellbeing'</li></ul> |
|
98 |
+
| 8 | <ul><li>'layer 1 is not the default layer that people transition to or experience in location 3 however it can remainquite accessible'</li><li>'location three layer one'</li><li>' it will often be experienced from perception being centered in deeper layers'</li></ul> |
|
99 |
+
| 25 | <ul><li>'its not necessarily because its temporary and because the reference point is remaining at layer one um its kind of its not necessarily reflecting on and deepening into the sense of actually being that all pervading presence even though that is your subjective experience if i ask you about it yeah'</li><li>'just whatever is noticing'</li><li>'emotion kind of the mind'</li></ul> |
|
100 |
+
| 18 | <ul><li>'location two because youve got that single meta emotion'</li><li>'deeply into nonduality and location two'</li><li>'make logical sense or would be considered the'</li></ul> |
|
101 |
+
|
102 |
+
## Evaluation
|
103 |
+
|
104 |
+
### Metrics
|
105 |
+
| Label | Accuracy |
|
106 |
+
|:--------|:---------|
|
107 |
+
| **all** | 0.3671 |
|
108 |
+
|
109 |
+
## Uses
|
110 |
+
|
111 |
+
### Direct Use for Inference
|
112 |
+
|
113 |
+
First install the SetFit library:
|
114 |
+
|
115 |
+
```bash
|
116 |
+
pip install setfit
|
117 |
+
```
|
118 |
+
|
119 |
+
Then you can load this model and run inference.
|
120 |
+
|
121 |
+
```python
|
122 |
+
from setfit import SetFitModel
|
123 |
+
|
124 |
+
# Download from the 🤗 Hub
|
125 |
+
model = SetFitModel.from_pretrained("dendimaki/few-shots-apeiron-model-v3")
|
126 |
+
# Run inference
|
127 |
+
preds = model("merged with experience")
|
128 |
+
```
|
129 |
+
|
130 |
+
<!--
|
131 |
+
### Downstream Use
|
132 |
+
|
133 |
+
*List how someone could finetune this model on their own dataset.*
|
134 |
+
-->
|
135 |
+
|
136 |
+
<!--
|
137 |
+
### Out-of-Scope Use
|
138 |
+
|
139 |
+
*List how the model may foreseeably be misused and address what users ought not to do with the model.*
|
140 |
+
-->
|
141 |
+
|
142 |
+
<!--
|
143 |
+
## Bias, Risks and Limitations
|
144 |
+
|
145 |
+
*What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
|
146 |
+
-->
|
147 |
+
|
148 |
+
<!--
|
149 |
+
### Recommendations
|
150 |
+
|
151 |
+
*What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
|
152 |
+
-->
|
153 |
+
|
154 |
+
## Training Details
|
155 |
+
|
156 |
+
### Training Set Metrics
|
157 |
+
| Training set | Min | Median | Max |
|
158 |
+
|:-------------|:----|:--------|:----|
|
159 |
+
| Word count | 1 | 23.8579 | 246 |
|
160 |
+
|
161 |
+
| Label | Training Sample Count |
|
162 |
+
|:------|:----------------------|
|
163 |
+
| 0 | 20 |
|
164 |
+
| 1 | 20 |
|
165 |
+
| 2 | 11 |
|
166 |
+
| 3 | 20 |
|
167 |
+
| 4 | 20 |
|
168 |
+
| 5 | 20 |
|
169 |
+
| 6 | 20 |
|
170 |
+
| 7 | 19 |
|
171 |
+
| 8 | 20 |
|
172 |
+
| 9 | 15 |
|
173 |
+
| 10 | 20 |
|
174 |
+
| 11 | 18 |
|
175 |
+
| 12 | 20 |
|
176 |
+
| 13 | 20 |
|
177 |
+
| 14 | 20 |
|
178 |
+
| 15 | 20 |
|
179 |
+
| 16 | 20 |
|
180 |
+
| 17 | 20 |
|
181 |
+
| 18 | 20 |
|
182 |
+
| 19 | 20 |
|
183 |
+
| 20 | 20 |
|
184 |
+
| 21 | 20 |
|
185 |
+
| 22 | 20 |
|
186 |
+
| 23 | 20 |
|
187 |
+
| 24 | 20 |
|
188 |
+
| 25 | 20 |
|
189 |
+
| 26 | 20 |
|
190 |
+
| 27 | 20 |
|
191 |
+
| 28 | 20 |
|
192 |
+
|
193 |
+
### Training Hyperparameters
|
194 |
+
- batch_size: (16, 16)
|
195 |
+
- num_epochs: (3, 3)
|
196 |
+
- max_steps: -1
|
197 |
+
- sampling_strategy: oversampling
|
198 |
+
- num_iterations: 20
|
199 |
+
- body_learning_rate: (2e-05, 2e-05)
|
200 |
+
- head_learning_rate: 2e-05
|
201 |
+
- loss: CosineSimilarityLoss
|
202 |
+
- distance_metric: cosine_distance
|
203 |
+
- margin: 0.25
|
204 |
+
- end_to_end: False
|
205 |
+
- use_amp: False
|
206 |
+
- warmup_proportion: 0.1
|
207 |
+
- seed: 42
|
208 |
+
- eval_max_steps: -1
|
209 |
+
- load_best_model_at_end: False
|
210 |
+
|
211 |
+
### Training Results
|
212 |
+
| Epoch | Step | Training Loss | Validation Loss |
|
213 |
+
|:------:|:----:|:-------------:|:---------------:|
|
214 |
+
| 0.0007 | 1 | 0.2504 | - |
|
215 |
+
| 0.0355 | 50 | 0.2577 | - |
|
216 |
+
| 0.0710 | 100 | 0.2139 | - |
|
217 |
+
| 0.1065 | 150 | 0.1838 | - |
|
218 |
+
| 0.1420 | 200 | 0.2082 | - |
|
219 |
+
| 0.1776 | 250 | 0.2038 | - |
|
220 |
+
| 0.2131 | 300 | 0.186 | - |
|
221 |
+
| 0.2486 | 350 | 0.1733 | - |
|
222 |
+
| 0.2841 | 400 | 0.2002 | - |
|
223 |
+
| 0.3196 | 450 | 0.1911 | - |
|
224 |
+
| 0.3551 | 500 | 0.1404 | - |
|
225 |
+
| 0.3906 | 550 | 0.1506 | - |
|
226 |
+
| 0.4261 | 600 | 0.1356 | - |
|
227 |
+
| 0.4616 | 650 | 0.1458 | - |
|
228 |
+
| 0.4972 | 700 | 0.1048 | - |
|
229 |
+
| 0.5327 | 750 | 0.1125 | - |
|
230 |
+
| 0.5682 | 800 | 0.0842 | - |
|
231 |
+
| 0.6037 | 850 | 0.0391 | - |
|
232 |
+
| 0.6392 | 900 | 0.1178 | - |
|
233 |
+
| 0.6747 | 950 | 0.0856 | - |
|
234 |
+
| 0.7102 | 1000 | 0.0909 | - |
|
235 |
+
| 0.7457 | 1050 | 0.1068 | - |
|
236 |
+
| 0.7812 | 1100 | 0.0535 | - |
|
237 |
+
| 0.8168 | 1150 | 0.0673 | - |
|
238 |
+
| 0.8523 | 1200 | 0.0234 | - |
|
239 |
+
| 0.8878 | 1250 | 0.0625 | - |
|
240 |
+
| 0.9233 | 1300 | 0.0841 | - |
|
241 |
+
| 0.9588 | 1350 | 0.0508 | - |
|
242 |
+
| 0.9943 | 1400 | 0.05 | - |
|
243 |
+
| 1.0298 | 1450 | 0.0494 | - |
|
244 |
+
| 1.0653 | 1500 | 0.0135 | - |
|
245 |
+
| 1.1009 | 1550 | 0.0063 | - |
|
246 |
+
| 1.1364 | 1600 | 0.0938 | - |
|
247 |
+
| 1.1719 | 1650 | 0.0445 | - |
|
248 |
+
| 1.2074 | 1700 | 0.0108 | - |
|
249 |
+
| 1.2429 | 1750 | 0.0236 | - |
|
250 |
+
| 1.2784 | 1800 | 0.0108 | - |
|
251 |
+
| 1.3139 | 1850 | 0.0153 | - |
|
252 |
+
| 1.3494 | 1900 | 0.0051 | - |
|
253 |
+
| 1.3849 | 1950 | 0.0288 | - |
|
254 |
+
| 1.4205 | 2000 | 0.022 | - |
|
255 |
+
| 1.4560 | 2050 | 0.0058 | - |
|
256 |
+
| 1.4915 | 2100 | 0.0092 | - |
|
257 |
+
| 1.5270 | 2150 | 0.0134 | - |
|
258 |
+
| 1.5625 | 2200 | 0.0073 | - |
|
259 |
+
| 1.5980 | 2250 | 0.0103 | - |
|
260 |
+
| 1.6335 | 2300 | 0.0029 | - |
|
261 |
+
| 1.6690 | 2350 | 0.0049 | - |
|
262 |
+
| 1.7045 | 2400 | 0.0586 | - |
|
263 |
+
| 1.7401 | 2450 | 0.0457 | - |
|
264 |
+
| 1.7756 | 2500 | 0.0132 | - |
|
265 |
+
| 1.8111 | 2550 | 0.0585 | - |
|
266 |
+
| 1.8466 | 2600 | 0.0025 | - |
|
267 |
+
| 1.8821 | 2650 | 0.0118 | - |
|
268 |
+
| 1.9176 | 2700 | 0.0012 | - |
|
269 |
+
| 1.9531 | 2750 | 0.0618 | - |
|
270 |
+
| 1.9886 | 2800 | 0.0036 | - |
|
271 |
+
| 2.0241 | 2850 | 0.0079 | - |
|
272 |
+
| 2.0597 | 2900 | 0.0034 | - |
|
273 |
+
| 2.0952 | 2950 | 0.0496 | - |
|
274 |
+
| 2.1307 | 3000 | 0.0041 | - |
|
275 |
+
| 2.1662 | 3050 | 0.0523 | - |
|
276 |
+
| 2.2017 | 3100 | 0.0055 | - |
|
277 |
+
| 2.2372 | 3150 | 0.007 | - |
|
278 |
+
| 2.2727 | 3200 | 0.0493 | - |
|
279 |
+
| 2.3082 | 3250 | 0.0055 | - |
|
280 |
+
| 2.3438 | 3300 | 0.0649 | - |
|
281 |
+
| 2.3793 | 3350 | 0.005 | - |
|
282 |
+
| 2.4148 | 3400 | 0.0028 | - |
|
283 |
+
| 2.4503 | 3450 | 0.0564 | - |
|
284 |
+
| 2.4858 | 3500 | 0.0436 | - |
|
285 |
+
| 2.5213 | 3550 | 0.0018 | - |
|
286 |
+
| 2.5568 | 3600 | 0.0077 | - |
|
287 |
+
| 2.5923 | 3650 | 0.0067 | - |
|
288 |
+
| 2.6278 | 3700 | 0.0029 | - |
|
289 |
+
| 2.6634 | 3750 | 0.0031 | - |
|
290 |
+
| 2.6989 | 3800 | 0.0283 | - |
|
291 |
+
| 2.7344 | 3850 | 0.1042 | - |
|
292 |
+
| 2.7699 | 3900 | 0.055 | - |
|
293 |
+
| 2.8054 | 3950 | 0.0689 | - |
|
294 |
+
| 2.8409 | 4000 | 0.0031 | - |
|
295 |
+
| 2.8764 | 4050 | 0.0014 | - |
|
296 |
+
| 2.9119 | 4100 | 0.0017 | - |
|
297 |
+
| 2.9474 | 4150 | 0.0033 | - |
|
298 |
+
| 2.9830 | 4200 | 0.0634 | - |
|
299 |
+
|
300 |
+
### Framework Versions
|
301 |
+
- Python: 3.10.12
|
302 |
+
- SetFit: 1.0.3
|
303 |
+
- Sentence Transformers: 2.7.0
|
304 |
+
- Transformers: 4.40.1
|
305 |
+
- PyTorch: 2.2.1+cu121
|
306 |
+
- Datasets: 2.19.0
|
307 |
+
- Tokenizers: 0.19.1
|
308 |
+
|
309 |
+
## Citation
|
310 |
+
|
311 |
+
### BibTeX
|
312 |
+
```bibtex
|
313 |
+
@article{https://doi.org/10.48550/arxiv.2209.11055,
|
314 |
+
doi = {10.48550/ARXIV.2209.11055},
|
315 |
+
url = {https://arxiv.org/abs/2209.11055},
|
316 |
+
author = {Tunstall, Lewis and Reimers, Nils and Jo, Unso Eun Seo and Bates, Luke and Korat, Daniel and Wasserblat, Moshe and Pereg, Oren},
|
317 |
+
keywords = {Computation and Language (cs.CL), FOS: Computer and information sciences, FOS: Computer and information sciences},
|
318 |
+
title = {Efficient Few-Shot Learning Without Prompts},
|
319 |
+
publisher = {arXiv},
|
320 |
+
year = {2022},
|
321 |
+
copyright = {Creative Commons Attribution 4.0 International}
|
322 |
+
}
|
323 |
+
```
|
324 |
+
|
325 |
+
<!--
|
326 |
+
## Glossary
|
327 |
+
|
328 |
+
*Clearly define terms in order to be accessible across audiences.*
|
329 |
+
-->
|
330 |
+
|
331 |
+
<!--
|
332 |
+
## Model Card Authors
|
333 |
+
|
334 |
+
*Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
|
335 |
+
-->
|
336 |
+
|
337 |
+
<!--
|
338 |
+
## Model Card Contact
|
339 |
+
|
340 |
+
*Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
|
341 |
+
-->
|
config.json
ADDED
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "sentence-transformers/paraphrase-mpnet-base-v2",
|
3 |
+
"architectures": [
|
4 |
+
"MPNetModel"
|
5 |
+
],
|
6 |
+
"attention_probs_dropout_prob": 0.1,
|
7 |
+
"bos_token_id": 0,
|
8 |
+
"eos_token_id": 2,
|
9 |
+
"hidden_act": "gelu",
|
10 |
+
"hidden_dropout_prob": 0.1,
|
11 |
+
"hidden_size": 768,
|
12 |
+
"initializer_range": 0.02,
|
13 |
+
"intermediate_size": 3072,
|
14 |
+
"layer_norm_eps": 1e-05,
|
15 |
+
"max_position_embeddings": 514,
|
16 |
+
"model_type": "mpnet",
|
17 |
+
"num_attention_heads": 12,
|
18 |
+
"num_hidden_layers": 12,
|
19 |
+
"pad_token_id": 1,
|
20 |
+
"relative_attention_num_buckets": 32,
|
21 |
+
"torch_dtype": "float32",
|
22 |
+
"transformers_version": "4.40.1",
|
23 |
+
"vocab_size": 30527
|
24 |
+
}
|
config_sentence_transformers.json
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"__version__": {
|
3 |
+
"sentence_transformers": "2.0.0",
|
4 |
+
"transformers": "4.7.0",
|
5 |
+
"pytorch": "1.9.0+cu102"
|
6 |
+
},
|
7 |
+
"prompts": {},
|
8 |
+
"default_prompt_name": null
|
9 |
+
}
|
config_setfit.json
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"labels": null,
|
3 |
+
"normalize_embeddings": false
|
4 |
+
}
|
model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:db6c68393cf6f272aaefa4a5b684d4323129399029cdac1283e7c935c85d9338
|
3 |
+
size 437967672
|
model_head.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1f5d9add423fada2569b7b9e4b987c9f260895d5652d03b081e04a6708956566
|
3 |
+
size 179487
|
modules.json
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[
|
2 |
+
{
|
3 |
+
"idx": 0,
|
4 |
+
"name": "0",
|
5 |
+
"path": "",
|
6 |
+
"type": "sentence_transformers.models.Transformer"
|
7 |
+
},
|
8 |
+
{
|
9 |
+
"idx": 1,
|
10 |
+
"name": "1",
|
11 |
+
"path": "1_Pooling",
|
12 |
+
"type": "sentence_transformers.models.Pooling"
|
13 |
+
}
|
14 |
+
]
|
sentence_bert_config.json
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"max_seq_length": 512,
|
3 |
+
"do_lower_case": false
|
4 |
+
}
|
special_tokens_map.json
ADDED
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"bos_token": {
|
3 |
+
"content": "<s>",
|
4 |
+
"lstrip": false,
|
5 |
+
"normalized": false,
|
6 |
+
"rstrip": false,
|
7 |
+
"single_word": false
|
8 |
+
},
|
9 |
+
"cls_token": {
|
10 |
+
"content": "<s>",
|
11 |
+
"lstrip": false,
|
12 |
+
"normalized": false,
|
13 |
+
"rstrip": false,
|
14 |
+
"single_word": false
|
15 |
+
},
|
16 |
+
"eos_token": {
|
17 |
+
"content": "</s>",
|
18 |
+
"lstrip": false,
|
19 |
+
"normalized": false,
|
20 |
+
"rstrip": false,
|
21 |
+
"single_word": false
|
22 |
+
},
|
23 |
+
"mask_token": {
|
24 |
+
"content": "<mask>",
|
25 |
+
"lstrip": true,
|
26 |
+
"normalized": false,
|
27 |
+
"rstrip": false,
|
28 |
+
"single_word": false
|
29 |
+
},
|
30 |
+
"pad_token": {
|
31 |
+
"content": "<pad>",
|
32 |
+
"lstrip": false,
|
33 |
+
"normalized": false,
|
34 |
+
"rstrip": false,
|
35 |
+
"single_word": false
|
36 |
+
},
|
37 |
+
"sep_token": {
|
38 |
+
"content": "</s>",
|
39 |
+
"lstrip": false,
|
40 |
+
"normalized": false,
|
41 |
+
"rstrip": false,
|
42 |
+
"single_word": false
|
43 |
+
},
|
44 |
+
"unk_token": {
|
45 |
+
"content": "[UNK]",
|
46 |
+
"lstrip": false,
|
47 |
+
"normalized": false,
|
48 |
+
"rstrip": false,
|
49 |
+
"single_word": false
|
50 |
+
}
|
51 |
+
}
|
tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
tokenizer_config.json
ADDED
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"added_tokens_decoder": {
|
3 |
+
"0": {
|
4 |
+
"content": "<s>",
|
5 |
+
"lstrip": false,
|
6 |
+
"normalized": false,
|
7 |
+
"rstrip": false,
|
8 |
+
"single_word": false,
|
9 |
+
"special": true
|
10 |
+
},
|
11 |
+
"1": {
|
12 |
+
"content": "<pad>",
|
13 |
+
"lstrip": false,
|
14 |
+
"normalized": false,
|
15 |
+
"rstrip": false,
|
16 |
+
"single_word": false,
|
17 |
+
"special": true
|
18 |
+
},
|
19 |
+
"2": {
|
20 |
+
"content": "</s>",
|
21 |
+
"lstrip": false,
|
22 |
+
"normalized": false,
|
23 |
+
"rstrip": false,
|
24 |
+
"single_word": false,
|
25 |
+
"special": true
|
26 |
+
},
|
27 |
+
"104": {
|
28 |
+
"content": "[UNK]",
|
29 |
+
"lstrip": false,
|
30 |
+
"normalized": false,
|
31 |
+
"rstrip": false,
|
32 |
+
"single_word": false,
|
33 |
+
"special": true
|
34 |
+
},
|
35 |
+
"30526": {
|
36 |
+
"content": "<mask>",
|
37 |
+
"lstrip": true,
|
38 |
+
"normalized": false,
|
39 |
+
"rstrip": false,
|
40 |
+
"single_word": false,
|
41 |
+
"special": true
|
42 |
+
}
|
43 |
+
},
|
44 |
+
"bos_token": "<s>",
|
45 |
+
"clean_up_tokenization_spaces": true,
|
46 |
+
"cls_token": "<s>",
|
47 |
+
"do_basic_tokenize": true,
|
48 |
+
"do_lower_case": true,
|
49 |
+
"eos_token": "</s>",
|
50 |
+
"mask_token": "<mask>",
|
51 |
+
"model_max_length": 512,
|
52 |
+
"never_split": null,
|
53 |
+
"pad_token": "<pad>",
|
54 |
+
"sep_token": "</s>",
|
55 |
+
"strip_accents": null,
|
56 |
+
"tokenize_chinese_chars": true,
|
57 |
+
"tokenizer_class": "MPNetTokenizer",
|
58 |
+
"unk_token": "[UNK]"
|
59 |
+
}
|
vocab.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|