Upload 14 files
Browse files- model-20240117T042438Z-001/model/1_Pooling/config.json +7 -0
- model-20240117T042438Z-001/model/README.md +251 -0
- model-20240117T042438Z-001/model/config.json +24 -0
- model-20240117T042438Z-001/model/config_sentence_transformers.json +7 -0
- model-20240117T042438Z-001/model/config_setfit.json +4 -0
- model-20240117T042438Z-001/model/model.safetensors +3 -0
- model-20240117T042438Z-001/model/model_head.pkl +3 -0
- model-20240117T042438Z-001/model/modules.json +20 -0
- model-20240117T042438Z-001/model/pytorch_model.bin +3 -0
- model-20240117T042438Z-001/model/sentence_bert_config.json +4 -0
- model-20240117T042438Z-001/model/special_tokens_map.json +51 -0
- model-20240117T042438Z-001/model/tokenizer.json +0 -0
- model-20240117T042438Z-001/model/tokenizer_config.json +72 -0
- model-20240117T042438Z-001/model/vocab.txt +0 -0
model-20240117T042438Z-001/model/1_Pooling/config.json
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"word_embedding_dimension": 768,
|
3 |
+
"pooling_mode_cls_token": false,
|
4 |
+
"pooling_mode_mean_tokens": true,
|
5 |
+
"pooling_mode_max_tokens": false,
|
6 |
+
"pooling_mode_mean_sqrt_len_tokens": false
|
7 |
+
}
|
model-20240117T042438Z-001/model/README.md
ADDED
@@ -0,0 +1,251 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
library_name: setfit
|
3 |
+
tags:
|
4 |
+
- setfit
|
5 |
+
- sentence-transformers
|
6 |
+
- text-classification
|
7 |
+
- generated_from_setfit_trainer
|
8 |
+
metrics:
|
9 |
+
- accuracy
|
10 |
+
widget:
|
11 |
+
- text: Board Meeting Outcome for Scheme Of Merger By Absorption Of Mahindra Heavy
|
12 |
+
Engines Limited And Mahindra Two Wheelers Limited And Trringo.Com Limited With
|
13 |
+
The Company And Their Respective Shareholders
|
14 |
+
- text: Unaudited Financial Results (Standalone And Consolidated) For The Third Quarter
|
15 |
+
And Nine Months Ended 31St December 2022.
|
16 |
+
- text: Announcement under Regulation 30 (LODR)-Updates on Acquisition
|
17 |
+
- text: Results For The Quarter And Year Ended March 31, 2023
|
18 |
+
- text: Board Meeting Outcome for Unaudited Standalone & Consolidated Financial Results
|
19 |
+
And Limited Review Reports Of The Statutory Auditors For The First Quarter Ended
|
20 |
+
June 30, 2023
|
21 |
+
pipeline_tag: text-classification
|
22 |
+
inference: true
|
23 |
+
base_model: sentence-transformers/all-mpnet-base-v2
|
24 |
+
model-index:
|
25 |
+
- name: SetFit with sentence-transformers/all-mpnet-base-v2
|
26 |
+
results:
|
27 |
+
- task:
|
28 |
+
type: text-classification
|
29 |
+
name: Text Classification
|
30 |
+
dataset:
|
31 |
+
name: Unknown
|
32 |
+
type: unknown
|
33 |
+
split: test
|
34 |
+
metrics:
|
35 |
+
- type: accuracy
|
36 |
+
value: 0.926605504587156
|
37 |
+
name: Accuracy
|
38 |
+
---
|
39 |
+
|
40 |
+
# SetFit with sentence-transformers/all-mpnet-base-v2
|
41 |
+
|
42 |
+
This is a [SetFit](https://github.com/huggingface/setfit) model that can be used for Text Classification. This SetFit model uses [sentence-transformers/all-mpnet-base-v2](https://huggingface.co/sentence-transformers/all-mpnet-base-v2) as the Sentence Transformer embedding model. A [LogisticRegression](https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html) instance is used for classification.
|
43 |
+
|
44 |
+
The model has been trained using an efficient few-shot learning technique that involves:
|
45 |
+
|
46 |
+
1. Fine-tuning a [Sentence Transformer](https://www.sbert.net) with contrastive learning.
|
47 |
+
2. Training a classification head with features from the fine-tuned Sentence Transformer.
|
48 |
+
|
49 |
+
## Model Details
|
50 |
+
|
51 |
+
### Model Description
|
52 |
+
- **Model Type:** SetFit
|
53 |
+
- **Sentence Transformer body:** [sentence-transformers/all-mpnet-base-v2](https://huggingface.co/sentence-transformers/all-mpnet-base-v2)
|
54 |
+
- **Classification head:** a [LogisticRegression](https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html) instance
|
55 |
+
- **Maximum Sequence Length:** 384 tokens
|
56 |
+
- **Number of Classes:** 9 classes
|
57 |
+
<!-- - **Training Dataset:** [Unknown](https://huggingface.co/datasets/unknown) -->
|
58 |
+
<!-- - **Language:** Unknown -->
|
59 |
+
<!-- - **License:** Unknown -->
|
60 |
+
|
61 |
+
### Model Sources
|
62 |
+
|
63 |
+
- **Repository:** [SetFit on GitHub](https://github.com/huggingface/setfit)
|
64 |
+
- **Paper:** [Efficient Few-Shot Learning Without Prompts](https://arxiv.org/abs/2209.11055)
|
65 |
+
- **Blogpost:** [SetFit: Efficient Few-Shot Learning Without Prompts](https://huggingface.co/blog/setfit)
|
66 |
+
|
67 |
+
### Model Labels
|
68 |
+
| Label | Examples |
|
69 |
+
|:------|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
70 |
+
| 5 | <ul><li>'Regulation 30 Of The SEBI (Listing Obligations And Disclosure Requirements) Regulations 2015: Disclosure Of Appointment of Key Managerial Personnel'</li><li>'Disclosure Under Regulation 30 Of SEBI LODR Regulations (Violation of Securities Regulations)'</li><li>'Regulation 30 Of The SEBI (Listing Obligations And Disclosure Requirements) Regulations'</li></ul> |
|
71 |
+
| 6 | <ul><li>'Audited Standalone & Consolidated Financial Results Of The Company For The First Quarter And Financial Year Ended June 30 2030 Along With Audit Reports'</li><li>"Updated Independent Auditor's Report On The Consolidated Financial Statements As At And For The Year Ended March 31 2023 Prepared Under Indian Accounting Standards"</li><li>'Unaudited Financial Results Of The Company For The Quarter Ended 30Th June, 2023.'</li></ul> |
|
72 |
+
| 1 | <ul><li>"Order Passed By The Hon'Ble National Company Law Tribunal, Mumbai Bench, Sanctioning The Scheme Of Arrangement Between Reliance Projects & Property Management Services Limited And Its Shareholders And Creditors & Reliance Industries Limited And Its Shareholders And Creditors ('Scheme') - Further Update"</li><li>'Update On Disinvestment Of Non-core Assets'</li><li>'Announcement Of New Manufacturing Facility'</li></ul> |
|
73 |
+
| 2 | <ul><li>"Board Meeting Outcome for Board Meeting Outcome For Intimation Under Regulation 30 Of SEBI (Listing Obligations And Disclosure Requirements) Regulations, 2015 (''Listing Regulations'') For Declaration Of Interim Dividend"</li><li>'Board Meeting Outcome for Capital Infusion Through Rights Issue Of Equity Shares'</li><li>'Outcome Of The Meeting Of The Board Of Directors Of Indusind Bank Limited (The Bank) For Approval Of Audited Financial Results Of The Bank (Standalone And Consolidated) For The Quarter And Financial Year Ended March 31, 2023 And Payment Of Dividend, For The Financial Year 2022-23.'</li></ul> |
|
74 |
+
| 0 | <ul><li>'Transcripts of Town Hall Meeting with Stakeholders'</li><li>'Clarification on Market Rumors Regarding Product Recall'</li><li>'Media Release By Reliance Jio Infocomm Limited'</li></ul> |
|
75 |
+
| 3 | <ul><li>'Earnings Call - Intimation'</li><li>'Audio / Video Recording - Earnings Call - Technology and Innovation Highlights'</li><li>'Audio Recording - Earnings Call'</li></ul> |
|
76 |
+
| 7 | <ul><li>'R&D'</li><li>'Infosys Collaborates with Leading Universities for Research and Development'</li><li>'Cloud For Organizational Growth And Transformation Is Three Times More Important Than Cloud For Cost Optimization: Infosys Research'</li></ul> |
|
77 |
+
| 8 | <ul><li>'Resignation Of Shri Rajesh B. Ambani From The Board Of The Company - Disclosure Dated September 5'</li><li>'Announcement under Regulation 30 (LODR)-Resignation of Chief Legal Officer (CLO)'</li><li>'Announcement under Regulation 30 (LODR)-Resignation of Chief Risk Officer (CRO)'</li></ul> |
|
78 |
+
| 4 | <ul><li>'The Employee Stock Option Plan of FutureTech Ventures has been authorized for the issuance of stock options for the fiscal year 2023.'</li><li>'Approval for the grant of stock options for the year 2024 has been obtained under the Employee Stock Option Scheme of QuantumTech Industries.'</li><li>'Stock options have been officially granted under the Employee Stock Option Scheme of InnovateTech Corporation for the fiscal year 2024.'</li></ul> |
|
79 |
+
|
80 |
+
## Evaluation
|
81 |
+
|
82 |
+
### Metrics
|
83 |
+
| Label | Accuracy |
|
84 |
+
|:--------|:---------|
|
85 |
+
| **all** | 0.9266 |
|
86 |
+
|
87 |
+
## Uses
|
88 |
+
|
89 |
+
### Direct Use for Inference
|
90 |
+
|
91 |
+
First install the SetFit library:
|
92 |
+
|
93 |
+
```bash
|
94 |
+
pip install setfit
|
95 |
+
```
|
96 |
+
|
97 |
+
Then you can load this model and run inference.
|
98 |
+
|
99 |
+
```python
|
100 |
+
from setfit import SetFitModel
|
101 |
+
|
102 |
+
# Download from the 🤗 Hub
|
103 |
+
model = SetFitModel.from_pretrained("setfit_model_id")
|
104 |
+
# Run inference
|
105 |
+
preds = model("Results For The Quarter And Year Ended March 31, 2023")
|
106 |
+
```
|
107 |
+
|
108 |
+
<!--
|
109 |
+
### Downstream Use
|
110 |
+
|
111 |
+
*List how someone could finetune this model on their own dataset.*
|
112 |
+
-->
|
113 |
+
|
114 |
+
<!--
|
115 |
+
### Out-of-Scope Use
|
116 |
+
|
117 |
+
*List how the model may foreseeably be misused and address what users ought not to do with the model.*
|
118 |
+
-->
|
119 |
+
|
120 |
+
<!--
|
121 |
+
## Bias, Risks and Limitations
|
122 |
+
|
123 |
+
*What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
|
124 |
+
-->
|
125 |
+
|
126 |
+
<!--
|
127 |
+
### Recommendations
|
128 |
+
|
129 |
+
*What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
|
130 |
+
-->
|
131 |
+
|
132 |
+
## Training Details
|
133 |
+
|
134 |
+
### Training Set Metrics
|
135 |
+
| Training set | Min | Median | Max |
|
136 |
+
|:-------------|:----|:--------|:----|
|
137 |
+
| Word count | 1 | 14.7204 | 70 |
|
138 |
+
|
139 |
+
| Label | Training Sample Count |
|
140 |
+
|:------|:----------------------|
|
141 |
+
| 0 | 143 |
|
142 |
+
| 1 | 138 |
|
143 |
+
| 2 | 299 |
|
144 |
+
| 3 | 62 |
|
145 |
+
| 4 | 42 |
|
146 |
+
| 5 | 60 |
|
147 |
+
| 6 | 192 |
|
148 |
+
| 7 | 7 |
|
149 |
+
| 8 | 37 |
|
150 |
+
|
151 |
+
### Training Hyperparameters
|
152 |
+
- batch_size: (64, 64)
|
153 |
+
- num_epochs: (2, 2)
|
154 |
+
- max_steps: -1
|
155 |
+
- sampling_strategy: oversampling
|
156 |
+
- num_iterations: 30
|
157 |
+
- body_learning_rate: (2e-05, 2e-05)
|
158 |
+
- head_learning_rate: 2e-05
|
159 |
+
- loss: CosineSimilarityLoss
|
160 |
+
- distance_metric: cosine_distance
|
161 |
+
- margin: 0.25
|
162 |
+
- end_to_end: False
|
163 |
+
- use_amp: False
|
164 |
+
- warmup_proportion: 0.1
|
165 |
+
- seed: 42
|
166 |
+
- eval_max_steps: -1
|
167 |
+
- load_best_model_at_end: False
|
168 |
+
|
169 |
+
### Training Results
|
170 |
+
| Epoch | Step | Training Loss | Validation Loss |
|
171 |
+
|:------:|:----:|:-------------:|:---------------:|
|
172 |
+
| 0.0011 | 1 | 0.1926 | - |
|
173 |
+
| 0.0544 | 50 | 0.1512 | - |
|
174 |
+
| 0.1088 | 100 | 0.07 | - |
|
175 |
+
| 0.1632 | 150 | 0.0327 | - |
|
176 |
+
| 0.2176 | 200 | 0.0192 | - |
|
177 |
+
| 0.2720 | 250 | 0.0109 | - |
|
178 |
+
| 0.3264 | 300 | 0.0129 | - |
|
179 |
+
| 0.3808 | 350 | 0.0124 | - |
|
180 |
+
| 0.4353 | 400 | 0.0056 | - |
|
181 |
+
| 0.4897 | 450 | 0.021 | - |
|
182 |
+
| 0.5441 | 500 | 0.0392 | - |
|
183 |
+
| 0.5985 | 550 | 0.0127 | - |
|
184 |
+
| 0.6529 | 600 | 0.0211 | - |
|
185 |
+
| 0.7073 | 650 | 0.0031 | - |
|
186 |
+
| 0.7617 | 700 | 0.0054 | - |
|
187 |
+
| 0.8161 | 750 | 0.0046 | - |
|
188 |
+
| 0.8705 | 800 | 0.027 | - |
|
189 |
+
| 0.9249 | 850 | 0.0229 | - |
|
190 |
+
| 0.9793 | 900 | 0.0065 | - |
|
191 |
+
| 1.0337 | 950 | 0.0058 | - |
|
192 |
+
| 1.0881 | 1000 | 0.0134 | - |
|
193 |
+
| 1.1425 | 1050 | 0.0319 | - |
|
194 |
+
| 1.1970 | 1100 | 0.0042 | - |
|
195 |
+
| 1.2514 | 1150 | 0.0065 | - |
|
196 |
+
| 1.3058 | 1200 | 0.0016 | - |
|
197 |
+
| 1.3602 | 1250 | 0.0094 | - |
|
198 |
+
| 1.4146 | 1300 | 0.0173 | - |
|
199 |
+
| 1.4690 | 1350 | 0.0042 | - |
|
200 |
+
| 1.5234 | 1400 | 0.0083 | - |
|
201 |
+
| 1.5778 | 1450 | 0.0011 | - |
|
202 |
+
| 1.6322 | 1500 | 0.0092 | - |
|
203 |
+
| 1.6866 | 1550 | 0.0184 | - |
|
204 |
+
| 1.7410 | 1600 | 0.0073 | - |
|
205 |
+
| 1.7954 | 1650 | 0.0188 | - |
|
206 |
+
| 1.8498 | 1700 | 0.0211 | - |
|
207 |
+
| 1.9042 | 1750 | 0.0016 | - |
|
208 |
+
| 1.9587 | 1800 | 0.0118 | - |
|
209 |
+
|
210 |
+
### Framework Versions
|
211 |
+
- Python: 3.10.12
|
212 |
+
- SetFit: 1.0.1
|
213 |
+
- Sentence Transformers: 2.2.2
|
214 |
+
- Transformers: 4.35.2
|
215 |
+
- PyTorch: 2.1.0+cu121
|
216 |
+
- Datasets: 2.15.0
|
217 |
+
- Tokenizers: 0.15.0
|
218 |
+
|
219 |
+
## Citation
|
220 |
+
|
221 |
+
### BibTeX
|
222 |
+
```bibtex
|
223 |
+
@article{https://doi.org/10.48550/arxiv.2209.11055,
|
224 |
+
doi = {10.48550/ARXIV.2209.11055},
|
225 |
+
url = {https://arxiv.org/abs/2209.11055},
|
226 |
+
author = {Tunstall, Lewis and Reimers, Nils and Jo, Unso Eun Seo and Bates, Luke and Korat, Daniel and Wasserblat, Moshe and Pereg, Oren},
|
227 |
+
keywords = {Computation and Language (cs.CL), FOS: Computer and information sciences, FOS: Computer and information sciences},
|
228 |
+
title = {Efficient Few-Shot Learning Without Prompts},
|
229 |
+
publisher = {arXiv},
|
230 |
+
year = {2022},
|
231 |
+
copyright = {Creative Commons Attribution 4.0 International}
|
232 |
+
}
|
233 |
+
```
|
234 |
+
|
235 |
+
<!--
|
236 |
+
## Glossary
|
237 |
+
|
238 |
+
*Clearly define terms in order to be accessible across audiences.*
|
239 |
+
-->
|
240 |
+
|
241 |
+
<!--
|
242 |
+
## Model Card Authors
|
243 |
+
|
244 |
+
*Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
|
245 |
+
-->
|
246 |
+
|
247 |
+
<!--
|
248 |
+
## Model Card Contact
|
249 |
+
|
250 |
+
*Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
|
251 |
+
-->
|
model-20240117T042438Z-001/model/config.json
ADDED
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "/root/.cache/torch/sentence_transformers/sentence-transformers_all-mpnet-base-v2/",
|
3 |
+
"architectures": [
|
4 |
+
"MPNetModel"
|
5 |
+
],
|
6 |
+
"attention_probs_dropout_prob": 0.1,
|
7 |
+
"bos_token_id": 0,
|
8 |
+
"eos_token_id": 2,
|
9 |
+
"hidden_act": "gelu",
|
10 |
+
"hidden_dropout_prob": 0.1,
|
11 |
+
"hidden_size": 768,
|
12 |
+
"initializer_range": 0.02,
|
13 |
+
"intermediate_size": 3072,
|
14 |
+
"layer_norm_eps": 1e-05,
|
15 |
+
"max_position_embeddings": 514,
|
16 |
+
"model_type": "mpnet",
|
17 |
+
"num_attention_heads": 12,
|
18 |
+
"num_hidden_layers": 12,
|
19 |
+
"pad_token_id": 1,
|
20 |
+
"relative_attention_num_buckets": 32,
|
21 |
+
"torch_dtype": "float32",
|
22 |
+
"transformers_version": "4.35.2",
|
23 |
+
"vocab_size": 30527
|
24 |
+
}
|
model-20240117T042438Z-001/model/config_sentence_transformers.json
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"__version__": {
|
3 |
+
"sentence_transformers": "2.0.0",
|
4 |
+
"transformers": "4.6.1",
|
5 |
+
"pytorch": "1.8.1"
|
6 |
+
}
|
7 |
+
}
|
model-20240117T042438Z-001/model/config_setfit.json
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"labels": null,
|
3 |
+
"normalize_embeddings": false
|
4 |
+
}
|
model-20240117T042438Z-001/model/model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b7330e8bbe7e73b9f53ead679beb787e26f915d7c7a91882f0f07b43a6a33033
|
3 |
+
size 437967672
|
model-20240117T042438Z-001/model/model_head.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0b129c69d249d8d6793157b40402992156c2536383760370df178f98e9fafb21
|
3 |
+
size 56271
|
model-20240117T042438Z-001/model/modules.json
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[
|
2 |
+
{
|
3 |
+
"idx": 0,
|
4 |
+
"name": "0",
|
5 |
+
"path": "",
|
6 |
+
"type": "sentence_transformers.models.Transformer"
|
7 |
+
},
|
8 |
+
{
|
9 |
+
"idx": 1,
|
10 |
+
"name": "1",
|
11 |
+
"path": "1_Pooling",
|
12 |
+
"type": "sentence_transformers.models.Pooling"
|
13 |
+
},
|
14 |
+
{
|
15 |
+
"idx": 2,
|
16 |
+
"name": "2",
|
17 |
+
"path": "2_Normalize",
|
18 |
+
"type": "sentence_transformers.models.Normalize"
|
19 |
+
}
|
20 |
+
]
|
model-20240117T042438Z-001/model/pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a8fd120b1a0032e70ff3d4b8ab8e46a6d01c2cb08ffe7c007a021c1788928146
|
3 |
+
size 438011953
|
model-20240117T042438Z-001/model/sentence_bert_config.json
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"max_seq_length": 384,
|
3 |
+
"do_lower_case": false
|
4 |
+
}
|
model-20240117T042438Z-001/model/special_tokens_map.json
ADDED
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"bos_token": {
|
3 |
+
"content": "<s>",
|
4 |
+
"lstrip": false,
|
5 |
+
"normalized": false,
|
6 |
+
"rstrip": false,
|
7 |
+
"single_word": false
|
8 |
+
},
|
9 |
+
"cls_token": {
|
10 |
+
"content": "<s>",
|
11 |
+
"lstrip": false,
|
12 |
+
"normalized": true,
|
13 |
+
"rstrip": false,
|
14 |
+
"single_word": false
|
15 |
+
},
|
16 |
+
"eos_token": {
|
17 |
+
"content": "</s>",
|
18 |
+
"lstrip": false,
|
19 |
+
"normalized": false,
|
20 |
+
"rstrip": false,
|
21 |
+
"single_word": false
|
22 |
+
},
|
23 |
+
"mask_token": {
|
24 |
+
"content": "<mask>",
|
25 |
+
"lstrip": true,
|
26 |
+
"normalized": false,
|
27 |
+
"rstrip": false,
|
28 |
+
"single_word": false
|
29 |
+
},
|
30 |
+
"pad_token": {
|
31 |
+
"content": "<pad>",
|
32 |
+
"lstrip": false,
|
33 |
+
"normalized": false,
|
34 |
+
"rstrip": false,
|
35 |
+
"single_word": false
|
36 |
+
},
|
37 |
+
"sep_token": {
|
38 |
+
"content": "</s>",
|
39 |
+
"lstrip": false,
|
40 |
+
"normalized": true,
|
41 |
+
"rstrip": false,
|
42 |
+
"single_word": false
|
43 |
+
},
|
44 |
+
"unk_token": {
|
45 |
+
"content": "[UNK]",
|
46 |
+
"lstrip": false,
|
47 |
+
"normalized": false,
|
48 |
+
"rstrip": false,
|
49 |
+
"single_word": false
|
50 |
+
}
|
51 |
+
}
|
model-20240117T042438Z-001/model/tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
model-20240117T042438Z-001/model/tokenizer_config.json
ADDED
@@ -0,0 +1,72 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"added_tokens_decoder": {
|
3 |
+
"0": {
|
4 |
+
"content": "<s>",
|
5 |
+
"lstrip": false,
|
6 |
+
"normalized": false,
|
7 |
+
"rstrip": false,
|
8 |
+
"single_word": false,
|
9 |
+
"special": true
|
10 |
+
},
|
11 |
+
"1": {
|
12 |
+
"content": "<pad>",
|
13 |
+
"lstrip": false,
|
14 |
+
"normalized": false,
|
15 |
+
"rstrip": false,
|
16 |
+
"single_word": false,
|
17 |
+
"special": true
|
18 |
+
},
|
19 |
+
"2": {
|
20 |
+
"content": "</s>",
|
21 |
+
"lstrip": false,
|
22 |
+
"normalized": false,
|
23 |
+
"rstrip": false,
|
24 |
+
"single_word": false,
|
25 |
+
"special": true
|
26 |
+
},
|
27 |
+
"3": {
|
28 |
+
"content": "<unk>",
|
29 |
+
"lstrip": false,
|
30 |
+
"normalized": true,
|
31 |
+
"rstrip": false,
|
32 |
+
"single_word": false,
|
33 |
+
"special": true
|
34 |
+
},
|
35 |
+
"104": {
|
36 |
+
"content": "[UNK]",
|
37 |
+
"lstrip": false,
|
38 |
+
"normalized": false,
|
39 |
+
"rstrip": false,
|
40 |
+
"single_word": false,
|
41 |
+
"special": true
|
42 |
+
},
|
43 |
+
"30526": {
|
44 |
+
"content": "<mask>",
|
45 |
+
"lstrip": true,
|
46 |
+
"normalized": false,
|
47 |
+
"rstrip": false,
|
48 |
+
"single_word": false,
|
49 |
+
"special": true
|
50 |
+
}
|
51 |
+
},
|
52 |
+
"bos_token": "<s>",
|
53 |
+
"clean_up_tokenization_spaces": true,
|
54 |
+
"cls_token": "<s>",
|
55 |
+
"do_lower_case": true,
|
56 |
+
"eos_token": "</s>",
|
57 |
+
"mask_token": "<mask>",
|
58 |
+
"max_length": 128,
|
59 |
+
"model_max_length": 512,
|
60 |
+
"pad_to_multiple_of": null,
|
61 |
+
"pad_token": "<pad>",
|
62 |
+
"pad_token_type_id": 0,
|
63 |
+
"padding_side": "right",
|
64 |
+
"sep_token": "</s>",
|
65 |
+
"stride": 0,
|
66 |
+
"strip_accents": null,
|
67 |
+
"tokenize_chinese_chars": true,
|
68 |
+
"tokenizer_class": "MPNetTokenizer",
|
69 |
+
"truncation_side": "right",
|
70 |
+
"truncation_strategy": "longest_first",
|
71 |
+
"unk_token": "[UNK]"
|
72 |
+
}
|
model-20240117T042438Z-001/model/vocab.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|