Add SetFit model
Browse files- 1_Pooling/config.json +10 -0
- README.md +282 -0
- config.json +24 -0
- config_sentence_transformers.json +10 -0
- config_setfit.json +7 -0
- model.safetensors +3 -0
- model_head.pkl +3 -0
- modules.json +20 -0
- sentence_bert_config.json +4 -0
- special_tokens_map.json +51 -0
- tokenizer.json +0 -0
- tokenizer_config.json +72 -0
- vocab.txt +0 -0
1_Pooling/config.json
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"word_embedding_dimension": 768,
|
3 |
+
"pooling_mode_cls_token": false,
|
4 |
+
"pooling_mode_mean_tokens": true,
|
5 |
+
"pooling_mode_max_tokens": false,
|
6 |
+
"pooling_mode_mean_sqrt_len_tokens": false,
|
7 |
+
"pooling_mode_weightedmean_tokens": false,
|
8 |
+
"pooling_mode_lasttoken": false,
|
9 |
+
"include_prompt": true
|
10 |
+
}
|
README.md
ADDED
@@ -0,0 +1,282 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
base_model: sentence-transformers/all-mpnet-base-v2
|
3 |
+
library_name: setfit
|
4 |
+
metrics:
|
5 |
+
- accuracy
|
6 |
+
pipeline_tag: text-classification
|
7 |
+
tags:
|
8 |
+
- setfit
|
9 |
+
- sentence-transformers
|
10 |
+
- text-classification
|
11 |
+
- generated_from_setfit_trainer
|
12 |
+
widget:
|
13 |
+
- text: 'Having previously lived in D.C., Rochester and Detroit and having made regular
|
14 |
+
trips on the thruways and turnpikes in-between, I can truly say that the rest
|
15 |
+
stops along the New York Thruway are the least desirable for food offerings. Even
|
16 |
+
the NJ Turnpike offers a much better selection, with Ohio striking the best balance
|
17 |
+
overall. Delaware has the largest rest stop, which offers a great selection but
|
18 |
+
at the cost of having to negotiate a mall-size parking lot. Although I don''t
|
19 |
+
begrudge those who like McDonald''s, I can honestly say I''ve never eaten at a
|
20 |
+
rest stop or airport McDonalds, even when there were no other options. There''s
|
21 |
+
nothing wrong with wanting better food, so long as there are options available
|
22 |
+
at reasonable prices.If there''s one thing for which I can give credit to the
|
23 |
+
New York Thruway rest stops, it''s in forcing us to seek out roadside alternatives
|
24 |
+
in the many communities along the way. As a result, my wife has an extensive collection
|
25 |
+
of books on diners that has morphed into somewhat of an obsession over the years.
|
26 |
+
Of course with smartphones and apps such as Yelp, finding exceptional food along
|
27 |
+
the way has never been easier. Put another way, I see the thruway rest stop as
|
28 |
+
a place for an early morning snack or cup of coffee when we''re desperate. Unfortunately,
|
29 |
+
the options are at their worst at 2 am, no matter where one stops.
|
30 |
+
|
31 |
+
'
|
32 |
+
- text: 'Now that Iran is actively funneling missiles, warheads and drones to Russia
|
33 |
+
for use in Ukraine, and Russia is funneling technical expertise and supplies to
|
34 |
+
Iran to make more weapons, things are quickly heating up and the clock is approaching
|
35 |
+
midnight as Iran get closer and closer to weaponizing a nuclear MIRV ICBM.The
|
36 |
+
no so cold war between Iran and Israel, Egypt, Saudi Arabia and the UAE is about
|
37 |
+
to get very hot and Israel''s efforts to avoid aligning against Russia in Syrian
|
38 |
+
airspace (thank you President Obama) is about to fail as the Russo-Nato proxy
|
39 |
+
war in Ukraine spills into the Middle East and a heavily armed and nuclear Israel
|
40 |
+
gets drawn into a very open conflict with Iran and Russia. The bombing of an
|
41 |
+
Iranian plant inside Iran is major escalation and I doubt that the CIA and DIA
|
42 |
+
were blindsided by the IDF operation as such a strike was likely meant to cripple
|
43 |
+
Iranian efforts to resupply Russia as much as Iranian efforts to resupply Hizbollah
|
44 |
+
in Lebanon. With the Turks waging war in Syria, the air space over Syria is clearly
|
45 |
+
going to become very crowded and very dangerous very quickly as Russia is stumbling
|
46 |
+
into a second war with Israel through its Iranian proxy and Israel unlike Ukraine
|
47 |
+
can take out both Russian and Iranian offensive capabilities. We just witnessed
|
48 |
+
the opening salvo of a hot war which is why the DIA, CIA have been in Tel Aviv
|
49 |
+
and Cairo recently - it is not really about the Palestinian territories.
|
50 |
+
|
51 |
+
'
|
52 |
+
- text: 'It''s the year of our Lord, 2023; it''s hard to believe that we are having
|
53 |
+
this conversation about the urgent necessity of ammo and lethal weapons. WWI,
|
54 |
+
WWII, the Korean War, Gulf Wars I & II, Afghanistan, ISIS, etc., have come and
|
55 |
+
gone. This does not include the multitude of conflicts in Africa, Georgia, and
|
56 |
+
other hot spots. Mankind has not changed a bit. We are still driven by fear,
|
57 |
+
greed, and the curse of the ego and its lust for power. Another article in today''s
|
58 |
+
edition discusses the Doomsday Clock and its relentless ticking toward oblivion. It''s
|
59 |
+
just a matter of time -and Boom!
|
60 |
+
|
61 |
+
'
|
62 |
+
- text: 'i''d go further than the correct interpretation that putin''s "cease fire"
|
63 |
+
was nothing more than "propaganda."i suggest that the russian attack on kramatorsk
|
64 |
+
on january 7, which russia falsely claimed killed 600 ukrainian soldiers, reveals
|
65 |
+
the expectation that a cease fire would gather ukrainians in a rest area where
|
66 |
+
they could be killed en masse. the headline was preplanned before the event.i
|
67 |
+
point readers to the Institute for the Study of War (ISW) as an excellent daily
|
68 |
+
summary of open source information by highly skilled military analysts. they point
|
69 |
+
out that putin is using a "grievance-revenge" framing of russian military activities
|
70 |
+
(e.g., kramatorsk was revenge for the grievance of russians killed in makiivka).
|
71 |
+
the ISW points out that this has only worsened the antagonism toward the kremlin
|
72 |
+
and military from pro-invasion russian commentators, who ask why any "grievance
|
73 |
+
event" was allowed to occur in the first place.
|
74 |
+
|
75 |
+
'
|
76 |
+
- text: 'I cannot entirely agree with this. If there''s a disconnect between what''s
|
77 |
+
being taught, and what the student really wants to learn, that can be a problem.
|
78 |
+
I, for example, learned a _LOT_ about computers, back in ''84 -- and a fair bit
|
79 |
+
of other stuff, too. (I speak what I''ll term "conversational" Spanish; I can''t
|
80 |
+
claim to be fluent, but I can absolutely carry on modest conversations and express
|
81 |
+
myself.)But the teachers in my core subjects were uninspired or flatly failed
|
82 |
+
me (e.g., the CompSci prof who lost my test, and gave me a zero; that really took
|
83 |
+
the wind out of my sails, considering I thought I nailed it). So I was having
|
84 |
+
far more fun at 11:00 p.m. in the computer lab than I was doing school work. Bombed
|
85 |
+
out of college, but I''ve now worked at four Fortune 500 companies, and am currently
|
86 |
+
a senior cloud admin. Students _do_ need to have a desire to learn, yes, but
|
87 |
+
teachers need to be equipped properly to teach them, too.
|
88 |
+
|
89 |
+
'
|
90 |
+
inference: true
|
91 |
+
model-index:
|
92 |
+
- name: SetFit with sentence-transformers/all-mpnet-base-v2
|
93 |
+
results:
|
94 |
+
- task:
|
95 |
+
type: text-classification
|
96 |
+
name: Text Classification
|
97 |
+
dataset:
|
98 |
+
name: Unknown
|
99 |
+
type: unknown
|
100 |
+
split: test
|
101 |
+
metrics:
|
102 |
+
- type: accuracy
|
103 |
+
value: 0.9
|
104 |
+
name: Accuracy
|
105 |
+
---
|
106 |
+
|
107 |
+
# SetFit with sentence-transformers/all-mpnet-base-v2
|
108 |
+
|
109 |
+
This is a [SetFit](https://github.com/huggingface/setfit) model that can be used for Text Classification. This SetFit model uses [sentence-transformers/all-mpnet-base-v2](https://huggingface.co/sentence-transformers/all-mpnet-base-v2) as the Sentence Transformer embedding model. A [LogisticRegression](https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html) instance is used for classification.
|
110 |
+
|
111 |
+
The model has been trained using an efficient few-shot learning technique that involves:
|
112 |
+
|
113 |
+
1. Fine-tuning a [Sentence Transformer](https://www.sbert.net) with contrastive learning.
|
114 |
+
2. Training a classification head with features from the fine-tuned Sentence Transformer.
|
115 |
+
|
116 |
+
## Model Details
|
117 |
+
|
118 |
+
### Model Description
|
119 |
+
- **Model Type:** SetFit
|
120 |
+
- **Sentence Transformer body:** [sentence-transformers/all-mpnet-base-v2](https://huggingface.co/sentence-transformers/all-mpnet-base-v2)
|
121 |
+
- **Classification head:** a [LogisticRegression](https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html) instance
|
122 |
+
- **Maximum Sequence Length:** 384 tokens
|
123 |
+
- **Number of Classes:** 2 classes
|
124 |
+
<!-- - **Training Dataset:** [Unknown](https://huggingface.co/datasets/unknown) -->
|
125 |
+
<!-- - **Language:** Unknown -->
|
126 |
+
<!-- - **License:** Unknown -->
|
127 |
+
|
128 |
+
### Model Sources
|
129 |
+
|
130 |
+
- **Repository:** [SetFit on GitHub](https://github.com/huggingface/setfit)
|
131 |
+
- **Paper:** [Efficient Few-Shot Learning Without Prompts](https://arxiv.org/abs/2209.11055)
|
132 |
+
- **Blogpost:** [SetFit: Efficient Few-Shot Learning Without Prompts](https://huggingface.co/blog/setfit)
|
133 |
+
|
134 |
+
### Model Labels
|
135 |
+
| Label | Examples |
|
136 |
+
|:------|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
137 |
+
| yes | <ul><li>'TIME Magazine prediction for 2023 (3Jan2023)"A cornered Russia will turn from global player into the world’s most dangerous rogue state, posing a serious and pervasive danger to Europe, the U.S., and beyond. Bogged down in Ukraine, with little to lose from further isolation and Western retaliation, and facing intense domestic pressure to show strength, Russia will turn to asymmetric warfare against the West to inflict damage through a thousand \'paper cuts\' rather than by overt aggression that depends on military and economic power that Russia no longer has.Putin’s nuclear saber-rattling will escalate. Kremlin-affiliated hackers will ramp up increasingly sophisticated cyberattacks on Western firms, governments, and infrastructure. Russia will intensify its offensive against Western elections by systematically supporting and funding disinformation and extremism. Attacks on Ukrainian infrastructure will continue.In short, Rogue Russia is a threat to global security, Western political systems, the cybersphere, and food security. Not to mention every Ukrainian civilian."\n'</li><li>"Bulletin of the Atomic Scientists advanced the Doomsday Clock, now to 90 seconds due to increasing nuclear risk.The rulers are putting humans in peril, an unconscionable and unethical danger since we haven't consented to such risk.In view of the fact that, over millennia, the rulers have killed hundreds of millions of innocent people, we can question their claimed legitimacy, and reject their bogus claim.\n"</li><li>'This article explains the bad political rusults although rulers might be acting rationally within their ideological frameworks.It is based on plausible speculation of Biden and Putin\'s ideologies, yet other plausible facts could be animating the escalations. For instance, some describe \'getting ukrained\' as "what happens to you if you ally with the U.S. government," and Joe Biden might be escalating to avoid such observations.Notice that these types of explanations do not rely on free will, but that rulers are prisoner to the constraints and incentives facing them, even if this ends with humanity being nuked again.Bulletin of Atomic Scientists advancing the Doomsday Clock is largely in line with rulers vs humanity framework, but as Douthat explains, this is different than the logic of the rulers.Another view, that of Prof. Mearshimer\'s presents a pessimistic view of this Ukraine War, while being remarkably prescient providing yet another framework to understand what\'s likely to happen; let\'s hope that he\'s wrong, althought lacking evidence for this optimism.\n'</li></ul> |
|
138 |
+
| no | <ul><li>"M Martínez - Doubtful. The US has been conducting virtually Perpetual War (mostly against smaller, weaker, brown-skinned nations) since day one and that hasn't dulled the Chickenhawk politicians (see: Bush the Lesser, George) from happily pushing us into the next one.Starting wars that are fought by Other Mother's Children and are profitable for the war-mongers will never cease.\n"</li><li>"I know it is easy to blame America always, but we are largely blameless. We opened trade with China and this allowed China to industrialize and build its economy. We in the west believe in Free markets and free people. Chinese state adopted a version of capitalism but instead of liberalizing like South Korea and Taiwan decided to become more insular. They restricted access to western products for their citizens. Movies, TV shows had to be censored. American social media companies cannot do business in China. Chinese citizens are not masters of their own destiny as the state dictates every aspect of their lives. Many of us in the west enjoy the benefits of western liberalism, namely - Free markets, Rule of law ( including contract enforcement) and individual rights. In the cold war era, we had to actively defend these values from Soviets. Now, we must brace ourselves to defend them from China. Liberal order will prevail because once people know the values of western liberal order, like Hongkongers, Taiwanese etc they will defend it. We in US, must help them, become the arsenal of democracy, supply planes, ships, munitions to Taiwan to defend themselves. Help Hong Kong citizens by giving the persecuted asylum in the west. We are not responsible for confrontation with China, Chinese state's disregard for Taiwanese and Hongkong citizens aspirations is responsible for this.\n"</li><li>'We probably have male, transient cougars moving through the area more frequently than wildlife experts and state officials document. My neighbors woke to a partially eaten deer carcass in their backyard, but heard no coyotes the night before. We hadn\'t heard this story yet, when a week later, my husband had a very large animal run in front of his car. It had a very long tail, short hair of all tan color and bounded as tall as the hood of his sedan. I posted this on a local wildlife FB page, and a man replied his daughter saw it while walking one their 2 dogs, and reported it was as big as their mastiff. A week later, my neighbor was walking her dog at 7 am, and saw it in a neighboring yard, at the top of a hill, "sitting like a sphinx" under a large blue juniper bush. My neighbor clearly saw a broad feline face and large white torso. Several months later, I heard a jogger in another part of my town also saw it early in the morning, and and went to FB posting a stock picture of a cougar with the comment, \'\'This is what I saw." An email sent to CTDEEP with all this information wasn\'t taken seriously, with their reply stating reports are usually confusing other animals. It\'s hard to know what CTDEEP might think we are confused about, since coyote, fox, fisher, black bear and deer have all been sighted in our yard or near us, frequently.\n'</li></ul> |
|
139 |
+
|
140 |
+
## Evaluation
|
141 |
+
|
142 |
+
### Metrics
|
143 |
+
| Label | Accuracy |
|
144 |
+
|:--------|:---------|
|
145 |
+
| **all** | 0.9 |
|
146 |
+
|
147 |
+
## Uses
|
148 |
+
|
149 |
+
### Direct Use for Inference
|
150 |
+
|
151 |
+
First install the SetFit library:
|
152 |
+
|
153 |
+
```bash
|
154 |
+
pip install setfit
|
155 |
+
```
|
156 |
+
|
157 |
+
Then you can load this model and run inference.
|
158 |
+
|
159 |
+
```python
|
160 |
+
from setfit import SetFitModel
|
161 |
+
|
162 |
+
# Download from the 🤗 Hub
|
163 |
+
model = SetFitModel.from_pretrained("davidadamczyk/setfit-model-4")
|
164 |
+
# Run inference
|
165 |
+
preds = model("It's the year of our Lord, 2023; it's hard to believe that we are having this conversation about the urgent necessity of ammo and lethal weapons. WWI, WWII, the Korean War, Gulf Wars I & II, Afghanistan, ISIS, etc., have come and gone. This does not include the multitude of conflicts in Africa, Georgia, and other hot spots. Mankind has not changed a bit. We are still driven by fear, greed, and the curse of the ego and its lust for power. Another article in today's edition discusses the Doomsday Clock and its relentless ticking toward oblivion. It's just a matter of time -and Boom!
|
166 |
+
")
|
167 |
+
```
|
168 |
+
|
169 |
+
<!--
|
170 |
+
### Downstream Use
|
171 |
+
|
172 |
+
*List how someone could finetune this model on their own dataset.*
|
173 |
+
-->
|
174 |
+
|
175 |
+
<!--
|
176 |
+
### Out-of-Scope Use
|
177 |
+
|
178 |
+
*List how the model may foreseeably be misused and address what users ought not to do with the model.*
|
179 |
+
-->
|
180 |
+
|
181 |
+
<!--
|
182 |
+
## Bias, Risks and Limitations
|
183 |
+
|
184 |
+
*What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
|
185 |
+
-->
|
186 |
+
|
187 |
+
<!--
|
188 |
+
### Recommendations
|
189 |
+
|
190 |
+
*What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
|
191 |
+
-->
|
192 |
+
|
193 |
+
## Training Details
|
194 |
+
|
195 |
+
### Training Set Metrics
|
196 |
+
| Training set | Min | Median | Max |
|
197 |
+
|:-------------|:----|:--------|:----|
|
198 |
+
| Word count | 18 | 133.075 | 255 |
|
199 |
+
|
200 |
+
| Label | Training Sample Count |
|
201 |
+
|:------|:----------------------|
|
202 |
+
| no | 18 |
|
203 |
+
| yes | 22 |
|
204 |
+
|
205 |
+
### Training Hyperparameters
|
206 |
+
- batch_size: (16, 16)
|
207 |
+
- num_epochs: (1, 1)
|
208 |
+
- max_steps: -1
|
209 |
+
- sampling_strategy: oversampling
|
210 |
+
- num_iterations: 120
|
211 |
+
- body_learning_rate: (2e-05, 2e-05)
|
212 |
+
- head_learning_rate: 2e-05
|
213 |
+
- loss: CosineSimilarityLoss
|
214 |
+
- distance_metric: cosine_distance
|
215 |
+
- margin: 0.25
|
216 |
+
- end_to_end: False
|
217 |
+
- use_amp: False
|
218 |
+
- warmup_proportion: 0.1
|
219 |
+
- l2_weight: 0.01
|
220 |
+
- seed: 42
|
221 |
+
- eval_max_steps: -1
|
222 |
+
- load_best_model_at_end: False
|
223 |
+
|
224 |
+
### Training Results
|
225 |
+
| Epoch | Step | Training Loss | Validation Loss |
|
226 |
+
|:------:|:----:|:-------------:|:---------------:|
|
227 |
+
| 0.0017 | 1 | 0.4133 | - |
|
228 |
+
| 0.0833 | 50 | 0.188 | - |
|
229 |
+
| 0.1667 | 100 | 0.0071 | - |
|
230 |
+
| 0.25 | 150 | 0.0002 | - |
|
231 |
+
| 0.3333 | 200 | 0.0001 | - |
|
232 |
+
| 0.4167 | 250 | 0.0001 | - |
|
233 |
+
| 0.5 | 300 | 0.0001 | - |
|
234 |
+
| 0.5833 | 350 | 0.0001 | - |
|
235 |
+
| 0.6667 | 400 | 0.0001 | - |
|
236 |
+
| 0.75 | 450 | 0.0001 | - |
|
237 |
+
| 0.8333 | 500 | 0.0001 | - |
|
238 |
+
| 0.9167 | 550 | 0.0001 | - |
|
239 |
+
| 1.0 | 600 | 0.0001 | - |
|
240 |
+
|
241 |
+
### Framework Versions
|
242 |
+
- Python: 3.10.13
|
243 |
+
- SetFit: 1.1.0
|
244 |
+
- Sentence Transformers: 3.0.1
|
245 |
+
- Transformers: 4.45.2
|
246 |
+
- PyTorch: 2.4.0+cu124
|
247 |
+
- Datasets: 2.21.0
|
248 |
+
- Tokenizers: 0.20.0
|
249 |
+
|
250 |
+
## Citation
|
251 |
+
|
252 |
+
### BibTeX
|
253 |
+
```bibtex
|
254 |
+
@article{https://doi.org/10.48550/arxiv.2209.11055,
|
255 |
+
doi = {10.48550/ARXIV.2209.11055},
|
256 |
+
url = {https://arxiv.org/abs/2209.11055},
|
257 |
+
author = {Tunstall, Lewis and Reimers, Nils and Jo, Unso Eun Seo and Bates, Luke and Korat, Daniel and Wasserblat, Moshe and Pereg, Oren},
|
258 |
+
keywords = {Computation and Language (cs.CL), FOS: Computer and information sciences, FOS: Computer and information sciences},
|
259 |
+
title = {Efficient Few-Shot Learning Without Prompts},
|
260 |
+
publisher = {arXiv},
|
261 |
+
year = {2022},
|
262 |
+
copyright = {Creative Commons Attribution 4.0 International}
|
263 |
+
}
|
264 |
+
```
|
265 |
+
|
266 |
+
<!--
|
267 |
+
## Glossary
|
268 |
+
|
269 |
+
*Clearly define terms in order to be accessible across audiences.*
|
270 |
+
-->
|
271 |
+
|
272 |
+
<!--
|
273 |
+
## Model Card Authors
|
274 |
+
|
275 |
+
*Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
|
276 |
+
-->
|
277 |
+
|
278 |
+
<!--
|
279 |
+
## Model Card Contact
|
280 |
+
|
281 |
+
*Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
|
282 |
+
-->
|
config.json
ADDED
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "sentence-transformers/all-mpnet-base-v2",
|
3 |
+
"architectures": [
|
4 |
+
"MPNetModel"
|
5 |
+
],
|
6 |
+
"attention_probs_dropout_prob": 0.1,
|
7 |
+
"bos_token_id": 0,
|
8 |
+
"eos_token_id": 2,
|
9 |
+
"hidden_act": "gelu",
|
10 |
+
"hidden_dropout_prob": 0.1,
|
11 |
+
"hidden_size": 768,
|
12 |
+
"initializer_range": 0.02,
|
13 |
+
"intermediate_size": 3072,
|
14 |
+
"layer_norm_eps": 1e-05,
|
15 |
+
"max_position_embeddings": 514,
|
16 |
+
"model_type": "mpnet",
|
17 |
+
"num_attention_heads": 12,
|
18 |
+
"num_hidden_layers": 12,
|
19 |
+
"pad_token_id": 1,
|
20 |
+
"relative_attention_num_buckets": 32,
|
21 |
+
"torch_dtype": "float32",
|
22 |
+
"transformers_version": "4.45.2",
|
23 |
+
"vocab_size": 30527
|
24 |
+
}
|
config_sentence_transformers.json
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"__version__": {
|
3 |
+
"sentence_transformers": "3.0.1",
|
4 |
+
"transformers": "4.45.2",
|
5 |
+
"pytorch": "2.4.0+cu124"
|
6 |
+
},
|
7 |
+
"prompts": {},
|
8 |
+
"default_prompt_name": null,
|
9 |
+
"similarity_fn_name": null
|
10 |
+
}
|
config_setfit.json
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"normalize_embeddings": false,
|
3 |
+
"labels": [
|
4 |
+
"no",
|
5 |
+
"yes"
|
6 |
+
]
|
7 |
+
}
|
model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:247c538f7b4e2b3f64fee6c0673fa2098b0bb3a754e1b7831b502465754b4b0f
|
3 |
+
size 437967672
|
model_head.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:50f6842dac0d0a5adbbc39640ca8f9fc2c3b73f6ceaa2621820c30c30d57db95
|
3 |
+
size 7023
|
modules.json
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[
|
2 |
+
{
|
3 |
+
"idx": 0,
|
4 |
+
"name": "0",
|
5 |
+
"path": "",
|
6 |
+
"type": "sentence_transformers.models.Transformer"
|
7 |
+
},
|
8 |
+
{
|
9 |
+
"idx": 1,
|
10 |
+
"name": "1",
|
11 |
+
"path": "1_Pooling",
|
12 |
+
"type": "sentence_transformers.models.Pooling"
|
13 |
+
},
|
14 |
+
{
|
15 |
+
"idx": 2,
|
16 |
+
"name": "2",
|
17 |
+
"path": "2_Normalize",
|
18 |
+
"type": "sentence_transformers.models.Normalize"
|
19 |
+
}
|
20 |
+
]
|
sentence_bert_config.json
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"max_seq_length": 384,
|
3 |
+
"do_lower_case": false
|
4 |
+
}
|
special_tokens_map.json
ADDED
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"bos_token": {
|
3 |
+
"content": "<s>",
|
4 |
+
"lstrip": false,
|
5 |
+
"normalized": false,
|
6 |
+
"rstrip": false,
|
7 |
+
"single_word": false
|
8 |
+
},
|
9 |
+
"cls_token": {
|
10 |
+
"content": "<s>",
|
11 |
+
"lstrip": false,
|
12 |
+
"normalized": false,
|
13 |
+
"rstrip": false,
|
14 |
+
"single_word": false
|
15 |
+
},
|
16 |
+
"eos_token": {
|
17 |
+
"content": "</s>",
|
18 |
+
"lstrip": false,
|
19 |
+
"normalized": false,
|
20 |
+
"rstrip": false,
|
21 |
+
"single_word": false
|
22 |
+
},
|
23 |
+
"mask_token": {
|
24 |
+
"content": "<mask>",
|
25 |
+
"lstrip": true,
|
26 |
+
"normalized": false,
|
27 |
+
"rstrip": false,
|
28 |
+
"single_word": false
|
29 |
+
},
|
30 |
+
"pad_token": {
|
31 |
+
"content": "<pad>",
|
32 |
+
"lstrip": false,
|
33 |
+
"normalized": false,
|
34 |
+
"rstrip": false,
|
35 |
+
"single_word": false
|
36 |
+
},
|
37 |
+
"sep_token": {
|
38 |
+
"content": "</s>",
|
39 |
+
"lstrip": false,
|
40 |
+
"normalized": false,
|
41 |
+
"rstrip": false,
|
42 |
+
"single_word": false
|
43 |
+
},
|
44 |
+
"unk_token": {
|
45 |
+
"content": "[UNK]",
|
46 |
+
"lstrip": false,
|
47 |
+
"normalized": false,
|
48 |
+
"rstrip": false,
|
49 |
+
"single_word": false
|
50 |
+
}
|
51 |
+
}
|
tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
tokenizer_config.json
ADDED
@@ -0,0 +1,72 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"added_tokens_decoder": {
|
3 |
+
"0": {
|
4 |
+
"content": "<s>",
|
5 |
+
"lstrip": false,
|
6 |
+
"normalized": false,
|
7 |
+
"rstrip": false,
|
8 |
+
"single_word": false,
|
9 |
+
"special": true
|
10 |
+
},
|
11 |
+
"1": {
|
12 |
+
"content": "<pad>",
|
13 |
+
"lstrip": false,
|
14 |
+
"normalized": false,
|
15 |
+
"rstrip": false,
|
16 |
+
"single_word": false,
|
17 |
+
"special": true
|
18 |
+
},
|
19 |
+
"2": {
|
20 |
+
"content": "</s>",
|
21 |
+
"lstrip": false,
|
22 |
+
"normalized": false,
|
23 |
+
"rstrip": false,
|
24 |
+
"single_word": false,
|
25 |
+
"special": true
|
26 |
+
},
|
27 |
+
"3": {
|
28 |
+
"content": "<unk>",
|
29 |
+
"lstrip": false,
|
30 |
+
"normalized": true,
|
31 |
+
"rstrip": false,
|
32 |
+
"single_word": false,
|
33 |
+
"special": true
|
34 |
+
},
|
35 |
+
"104": {
|
36 |
+
"content": "[UNK]",
|
37 |
+
"lstrip": false,
|
38 |
+
"normalized": false,
|
39 |
+
"rstrip": false,
|
40 |
+
"single_word": false,
|
41 |
+
"special": true
|
42 |
+
},
|
43 |
+
"30526": {
|
44 |
+
"content": "<mask>",
|
45 |
+
"lstrip": true,
|
46 |
+
"normalized": false,
|
47 |
+
"rstrip": false,
|
48 |
+
"single_word": false,
|
49 |
+
"special": true
|
50 |
+
}
|
51 |
+
},
|
52 |
+
"bos_token": "<s>",
|
53 |
+
"clean_up_tokenization_spaces": false,
|
54 |
+
"cls_token": "<s>",
|
55 |
+
"do_lower_case": true,
|
56 |
+
"eos_token": "</s>",
|
57 |
+
"mask_token": "<mask>",
|
58 |
+
"max_length": 128,
|
59 |
+
"model_max_length": 384,
|
60 |
+
"pad_to_multiple_of": null,
|
61 |
+
"pad_token": "<pad>",
|
62 |
+
"pad_token_type_id": 0,
|
63 |
+
"padding_side": "right",
|
64 |
+
"sep_token": "</s>",
|
65 |
+
"stride": 0,
|
66 |
+
"strip_accents": null,
|
67 |
+
"tokenize_chinese_chars": true,
|
68 |
+
"tokenizer_class": "MPNetTokenizer",
|
69 |
+
"truncation_side": "right",
|
70 |
+
"truncation_strategy": "longest_first",
|
71 |
+
"unk_token": "[UNK]"
|
72 |
+
}
|
vocab.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|