vishwask commited on
Commit
1eddeb0
1 Parent(s): 85d1e2f

Upload 27 files

Browse files
Llama-2-13B-chat-GPTQ/.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
Llama-2-13B-chat-GPTQ/LICENSE ADDED
@@ -0,0 +1,126 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ LLAMA 2 COMMUNITY LICENSE AGREEMENT
2
+ Llama 2 Version Release Date: July 18, 2023
3
+
4
+ "Agreement" means the terms and conditions for use, reproduction, distribution and
5
+ modification of the Llama Materials set forth herein.
6
+
7
+ "Documentation" means the specifications, manuals and documentation
8
+ accompanying Llama 2 distributed by Meta at ai.meta.com/resources/models-and-
9
+ libraries/llama-downloads/.
10
+
11
+ "Licensee" or "you" means you, or your employer or any other person or entity (if
12
+ you are entering into this Agreement on such person or entity's behalf), of the age
13
+ required under applicable laws, rules or regulations to provide legal consent and that
14
+ has legal authority to bind your employer or such other person or entity if you are
15
+ entering in this Agreement on their behalf.
16
+
17
+ "Llama 2" means the foundational large language models and software and
18
+ algorithms, including machine-learning model code, trained model weights,
19
+ inference-enabling code, training-enabling code, fine-tuning enabling code and other
20
+ elements of the foregoing distributed by Meta at ai.meta.com/resources/models-and-
21
+ libraries/llama-downloads/.
22
+
23
+ "Llama Materials" means, collectively, Meta's proprietary Llama 2 and
24
+ Documentation (and any portion thereof) made available under this Agreement.
25
+
26
+ "Meta" or "we" means Meta Platforms Ireland Limited (if you are located in or, if you
27
+ are an entity, your principal place of business is in the EEA or Switzerland) and Meta
28
+ Platforms, Inc. (if you are located outside of the EEA or Switzerland).
29
+
30
+ By clicking "I Accept" below or by using or distributing any portion or element of the
31
+ Llama Materials, you agree to be bound by this Agreement.
32
+
33
+ 1. License Rights and Redistribution.
34
+
35
+ a. Grant of Rights. You are granted a non-exclusive, worldwide, non-
36
+ transferable and royalty-free limited license under Meta's intellectual property or
37
+ other rights owned by Meta embodied in the Llama Materials to use, reproduce,
38
+ distribute, copy, create derivative works of, and make modifications to the Llama
39
+ Materials.
40
+
41
+ b. Redistribution and Use.
42
+
43
+ i. If you distribute or make the Llama Materials, or any derivative works
44
+ thereof, available to a third party, you shall provide a copy of this Agreement to such
45
+ third party.
46
+ ii. If you receive Llama Materials, or any derivative works thereof, from
47
+ a Licensee as part of an integrated end user product, then Section 2 of this
48
+ Agreement will not apply to you.
49
+
50
+ iii. You must retain in all copies of the Llama Materials that you
51
+ distribute the following attribution notice within a "Notice" text file distributed as a
52
+ part of such copies: "Llama 2 is licensed under the LLAMA 2 Community License,
53
+ Copyright (c) Meta Platforms, Inc. All Rights Reserved."
54
+
55
+ iv. Your use of the Llama Materials must comply with applicable laws
56
+ and regulations (including trade compliance laws and regulations) and adhere to the
57
+ Acceptable Use Policy for the Llama Materials (available at
58
+ https://ai.meta.com/llama/use-policy), which is hereby incorporated by reference into
59
+ this Agreement.
60
+
61
+ v. You will not use the Llama Materials or any output or results of the
62
+ Llama Materials to improve any other large language model (excluding Llama 2 or
63
+ derivative works thereof).
64
+
65
+ 2. Additional Commercial Terms. If, on the Llama 2 version release date, the
66
+ monthly active users of the products or services made available by or for Licensee,
67
+ or Licensee's affiliates, is greater than 700 million monthly active users in the
68
+ preceding calendar month, you must request a license from Meta, which Meta may
69
+ grant to you in its sole discretion, and you are not authorized to exercise any of the
70
+ rights under this Agreement unless or until Meta otherwise expressly grants you
71
+ such rights.
72
+
73
+ 3. Disclaimer of Warranty. UNLESS REQUIRED BY APPLICABLE LAW, THE
74
+ LLAMA MATERIALS AND ANY OUTPUT AND RESULTS THEREFROM ARE
75
+ PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
76
+ EITHER EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, ANY
77
+ WARRANTIES OF TITLE, NON-INFRINGEMENT, MERCHANTABILITY, OR
78
+ FITNESS FOR A PARTICULAR PURPOSE. YOU ARE SOLELY RESPONSIBLE
79
+ FOR DETERMINING THE APPROPRIATENESS OF USING OR REDISTRIBUTING
80
+ THE LLAMA MATERIALS AND ASSUME ANY RISKS ASSOCIATED WITH YOUR
81
+ USE OF THE LLAMA MATERIALS AND ANY OUTPUT AND RESULTS.
82
+
83
+ 4. Limitation of Liability. IN NO EVENT WILL META OR ITS AFFILIATES BE
84
+ LIABLE UNDER ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, TORT,
85
+ NEGLIGENCE, PRODUCTS LIABILITY, OR OTHERWISE, ARISING OUT OF THIS
86
+ AGREEMENT, FOR ANY LOST PROFITS OR ANY INDIRECT, SPECIAL,
87
+ CONSEQUENTIAL, INCIDENTAL, EXEMPLARY OR PUNITIVE DAMAGES, EVEN
88
+ IF META OR ITS AFFILIATES HAVE BEEN ADVISED OF THE POSSIBILITY OF
89
+ ANY OF THE FOREGOING.
90
+
91
+ 5. Intellectual Property.
92
+
93
+ a. No trademark licenses are granted under this Agreement, and in
94
+ connection with the Llama Materials, neither Meta nor Licensee may use any name
95
+ or mark owned by or associated with the other or any of its affiliates, except as
96
+ required for reasonable and customary use in describing and redistributing the
97
+ Llama Materials.
98
+
99
+ b. Subject to Meta's ownership of Llama Materials and derivatives made by or
100
+ for Meta, with respect to any derivative works and modifications of the Llama
101
+ Materials that are made by you, as between you and Meta, you are and will be the
102
+ owner of such derivative works and modifications.
103
+
104
+ c. If you institute litigation or other proceedings against Meta or any entity
105
+ (including a cross-claim or counterclaim in a lawsuit) alleging that the Llama
106
+ Materials or Llama 2 outputs or results, or any portion of any of the foregoing,
107
+ constitutes infringement of intellectual property or other rights owned or licensable
108
+ by you, then any licenses granted to you under this Agreement shall terminate as of
109
+ the date such litigation or claim is filed or instituted. You will indemnify and hold
110
+ harmless Meta from and against any claim by any third party arising out of or related
111
+ to your use or distribution of the Llama Materials.
112
+
113
+ 6. Term and Termination. The term of this Agreement will commence upon your
114
+ acceptance of this Agreement or access to the Llama Materials and will continue in
115
+ full force and effect until terminated in accordance with the terms and conditions
116
+ herein. Meta may terminate this Agreement if you are in breach of any term or
117
+ condition of this Agreement. Upon termination of this Agreement, you shall delete
118
+ and cease use of the Llama Materials. Sections 3, 4 and 7 shall survive the
119
+ termination of this Agreement.
120
+
121
+ 7. Governing Law and Jurisdiction. This Agreement will be governed and
122
+ construed under the laws of the State of California without regard to choice of law
123
+ principles, and the UN Convention on Contracts for the International Sale of Goods
124
+ does not apply to this Agreement. The courts of California shall have exclusive
125
+ jurisdiction of any dispute arising out of this Agreement.
126
+
Llama-2-13B-chat-GPTQ/Notice ADDED
@@ -0,0 +1 @@
 
 
1
+ Llama 2 is licensed under the LLAMA 2 Community License, Copyright © Meta Platforms, Inc. All Rights Reserved.
Llama-2-13B-chat-GPTQ/README.md ADDED
@@ -0,0 +1,389 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: https://huggingface.co/meta-llama/Llama-2-13b-chat-hf
3
+ inference: false
4
+ language:
5
+ - en
6
+ license: llama2
7
+ model_creator: Meta Llama 2
8
+ model_name: Llama 2 13B Chat
9
+ model_type: llama
10
+ pipeline_tag: text-generation
11
+ prompt_template: '[INST] <<SYS>>
12
+
13
+ You are a helpful, respectful and honest assistant. Always answer as helpfully as
14
+ possible, while being safe. Your answers should not include any harmful, unethical,
15
+ racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses
16
+ are socially unbiased and positive in nature. If a question does not make any sense,
17
+ or is not factually coherent, explain why instead of answering something not correct.
18
+ If you don''t know the answer to a question, please don''t share false information.
19
+
20
+ <</SYS>>
21
+
22
+ {prompt}[/INST]
23
+
24
+ '
25
+ quantized_by: TheBloke
26
+ tags:
27
+ - facebook
28
+ - meta
29
+ - pytorch
30
+ - llama
31
+ - llama-2
32
+ ---
33
+
34
+ <!-- header start -->
35
+ <!-- 200823 -->
36
+ <div style="width: auto; margin-left: auto; margin-right: auto">
37
+ <img src="https://i.imgur.com/EBdldam.jpg" alt="TheBlokeAI" style="width: 100%; min-width: 400px; display: block; margin: auto;">
38
+ </div>
39
+ <div style="display: flex; justify-content: space-between; width: 100%;">
40
+ <div style="display: flex; flex-direction: column; align-items: flex-start;">
41
+ <p style="margin-top: 0.5em; margin-bottom: 0em;"><a href="https://discord.gg/theblokeai">Chat & support: TheBloke's Discord server</a></p>
42
+ </div>
43
+ <div style="display: flex; flex-direction: column; align-items: flex-end;">
44
+ <p style="margin-top: 0.5em; margin-bottom: 0em;"><a href="https://www.patreon.com/TheBlokeAI">Want to contribute? TheBloke's Patreon page</a></p>
45
+ </div>
46
+ </div>
47
+ <div style="text-align:center; margin-top: 0em; margin-bottom: 0em"><p style="margin-top: 0.25em; margin-bottom: 0em;">TheBloke's LLM work is generously supported by a grant from <a href="https://a16z.com">andreessen horowitz (a16z)</a></p></div>
48
+ <hr style="margin-top: 1.0em; margin-bottom: 1.0em;">
49
+ <!-- header end -->
50
+
51
+ # Llama 2 13B Chat - GPTQ
52
+ - Model creator: [Meta Llama 2](https://huggingface.co/meta-llama)
53
+ - Original model: [Llama 2 13B Chat](https://huggingface.co/meta-llama/Llama-2-13b-chat-hf)
54
+
55
+ <!-- description start -->
56
+ ## Description
57
+
58
+ This repo contains GPTQ model files for [Meta's Llama 2 13B-chat](https://huggingface.co/meta-llama/Llama-2-13b-chat-hf).
59
+
60
+ Multiple GPTQ parameter permutations are provided; see Provided Files below for details of the options provided, their parameters, and the software used to create them.
61
+
62
+ <!-- description end -->
63
+ <!-- repositories-available start -->
64
+ ## Repositories available
65
+
66
+ * [AWQ model(s) for GPU inference.](https://huggingface.co/TheBloke/Llama-2-13B-chat-AWQ)
67
+ * [GPTQ models for GPU inference, with multiple quantisation parameter options.](https://huggingface.co/TheBloke/Llama-2-13B-chat-GPTQ)
68
+ * [2, 3, 4, 5, 6 and 8-bit GGUF models for CPU+GPU inference](https://huggingface.co/TheBloke/Llama-2-13B-chat-GGUF)
69
+ * [Meta Llama 2's original unquantised fp16 model in pytorch format, for GPU inference and for further conversions](https://huggingface.co/meta-llama/Llama-2-13B-chat-hf)
70
+ <!-- repositories-available end -->
71
+
72
+ <!-- prompt-template start -->
73
+ ## Prompt template: Llama-2-Chat
74
+
75
+ ```
76
+ [INST] <<SYS>>
77
+ You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature. If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.
78
+ <</SYS>>
79
+ {prompt}[/INST]
80
+
81
+ ```
82
+
83
+ <!-- prompt-template end -->
84
+
85
+
86
+ <!-- README_GPTQ.md-provided-files start -->
87
+ ## Provided files and GPTQ parameters
88
+
89
+ Multiple quantisation parameters are provided, to allow you to choose the best one for your hardware and requirements.
90
+
91
+ Each separate quant is in a different branch. See below for instructions on fetching from different branches.
92
+
93
+ All recent GPTQ files are made with AutoGPTQ, and all files in non-main branches are made with AutoGPTQ. Files in the `main` branch which were uploaded before August 2023 were made with GPTQ-for-LLaMa.
94
+
95
+ <details>
96
+ <summary>Explanation of GPTQ parameters</summary>
97
+
98
+ - Bits: The bit size of the quantised model.
99
+ - GS: GPTQ group size. Higher numbers use less VRAM, but have lower quantisation accuracy. "None" is the lowest possible value.
100
+ - Act Order: True or False. Also known as `desc_act`. True results in better quantisation accuracy. Some GPTQ clients have had issues with models that use Act Order plus Group Size, but this is generally resolved now.
101
+ - Damp %: A GPTQ parameter that affects how samples are processed for quantisation. 0.01 is default, but 0.1 results in slightly better accuracy.
102
+ - GPTQ dataset: The dataset used for quantisation. Using a dataset more appropriate to the model's training can improve quantisation accuracy. Note that the GPTQ dataset is not the same as the dataset used to train the model - please refer to the original model repo for details of the training dataset(s).
103
+ - Sequence Length: The length of the dataset sequences used for quantisation. Ideally this is the same as the model sequence length. For some very long sequence models (16+K), a lower sequence length may have to be used. Note that a lower sequence length does not limit the sequence length of the quantised model. It only impacts the quantisation accuracy on longer inference sequences.
104
+ - ExLlama Compatibility: Whether this file can be loaded with ExLlama, which currently only supports Llama models in 4-bit.
105
+
106
+ </details>
107
+
108
+ | Branch | Bits | GS | Act Order | Damp % | GPTQ Dataset | Seq Len | Size | ExLlama | Desc |
109
+ | ------ | ---- | -- | --------- | ------ | ------------ | ------- | ---- | ------- | ---- |
110
+ | [main](https://huggingface.co/TheBloke/Llama-2-13B-chat-GPTQ/tree/main) | 4 | 128 | No | 0.01 | [wikitext](https://huggingface.co/datasets/wikitext/viewer/wikitext-2-v1/test) | 4096 | 7.26 GB | Yes | 4-bit, without Act Order and group size 128g. |
111
+ | [gptq-4bit-32g-actorder_True](https://huggingface.co/TheBloke/Llama-2-13B-chat-GPTQ/tree/gptq-4bit-32g-actorder_True) | 4 | 32 | Yes | 0.01 | [wikitext](https://huggingface.co/datasets/wikitext/viewer/wikitext-2-v1/test) | 4096 | 8.00 GB | Yes | 4-bit, with Act Order and group size 32g. Gives highest possible inference quality, with maximum VRAM usage. |
112
+ | [gptq-4bit-64g-actorder_True](https://huggingface.co/TheBloke/Llama-2-13B-chat-GPTQ/tree/gptq-4bit-64g-actorder_True) | 4 | 64 | Yes | 0.01 | [wikitext](https://huggingface.co/datasets/wikitext/viewer/wikitext-2-v1/test) | 4096 | 7.51 GB | Yes | 4-bit, with Act Order and group size 64g. Uses less VRAM than 32g, but with slightly lower accuracy. |
113
+ | [gptq-4bit-128g-actorder_True](https://huggingface.co/TheBloke/Llama-2-13B-chat-GPTQ/tree/gptq-4bit-128g-actorder_True) | 4 | 128 | Yes | 0.01 | [wikitext](https://huggingface.co/datasets/wikitext/viewer/wikitext-2-v1/test) | 4096 | 7.26 GB | Yes | 4-bit, with Act Order and group size 128g. Uses even less VRAM than 64g, but with slightly lower accuracy. |
114
+ | [gptq-8bit-128g-actorder_True](https://huggingface.co/TheBloke/Llama-2-13B-chat-GPTQ/tree/gptq-8bit-128g-actorder_True) | 8 | 128 | Yes | 0.01 | [wikitext](https://huggingface.co/datasets/wikitext/viewer/wikitext-2-v1/test) | 4096 | 13.65 GB | No | 8-bit, with group size 128g for higher inference quality and with Act Order for even higher accuracy. |
115
+ | [gptq-8bit-64g-actorder_True](https://huggingface.co/TheBloke/Llama-2-13B-chat-GPTQ/tree/gptq-8bit-64g-actorder_True) | 8 | 64 | Yes | 0.01 | [wikitext](https://huggingface.co/datasets/wikitext/viewer/wikitext-2-v1/test) | 4096 | 13.95 GB | No | 8-bit, with group size 64g and Act Order for even higher inference quality. Poor AutoGPTQ CUDA speed. |
116
+ | [gptq-8bit-128g-actorder_False](https://huggingface.co/TheBloke/Llama-2-13B-chat-GPTQ/tree/gptq-8bit-128g-actorder_False) | 8 | 128 | No | 0.01 | [wikitext](https://huggingface.co/datasets/wikitext/viewer/wikitext-2-v1/test) | 4096 | 13.65 GB | No | 8-bit, with group size 128g for higher inference quality and without Act Order to improve AutoGPTQ speed. |
117
+ | [gptq-8bit--1g-actorder_True](https://huggingface.co/TheBloke/Llama-2-13B-chat-GPTQ/tree/gptq-8bit--1g-actorder_True) | 8 | None | Yes | 0.01 | [wikitext](https://huggingface.co/datasets/wikitext/viewer/wikitext-2-v1/test) | 4096 | 13.36 GB | No | 8-bit, with Act Order. No group size, to lower VRAM requirements. |
118
+
119
+ <!-- README_GPTQ.md-provided-files end -->
120
+
121
+ <!-- README_GPTQ.md-download-from-branches start -->
122
+ ## How to download from branches
123
+
124
+ - In text-generation-webui, you can add `:branch` to the end of the download name, eg `TheBloke/Llama-2-13B-chat-GPTQ:main`
125
+ - With Git, you can clone a branch with:
126
+ ```
127
+ git clone --single-branch --branch main https://huggingface.co/TheBloke/Llama-2-13B-chat-GPTQ
128
+ ```
129
+ - In Python Transformers code, the branch is the `revision` parameter; see below.
130
+ <!-- README_GPTQ.md-download-from-branches end -->
131
+ <!-- README_GPTQ.md-text-generation-webui start -->
132
+ ## How to easily download and use this model in [text-generation-webui](https://github.com/oobabooga/text-generation-webui).
133
+
134
+ Please make sure you're using the latest version of [text-generation-webui](https://github.com/oobabooga/text-generation-webui).
135
+
136
+ It is strongly recommended to use the text-generation-webui one-click-installers unless you're sure you know how to make a manual install.
137
+
138
+ 1. Click the **Model tab**.
139
+ 2. Under **Download custom model or LoRA**, enter `TheBloke/Llama-2-13B-chat-GPTQ`.
140
+ - To download from a specific branch, enter for example `TheBloke/Llama-2-13B-chat-GPTQ:main`
141
+ - see Provided Files above for the list of branches for each option.
142
+ 3. Click **Download**.
143
+ 4. The model will start downloading. Once it's finished it will say "Done".
144
+ 5. In the top left, click the refresh icon next to **Model**.
145
+ 6. In the **Model** dropdown, choose the model you just downloaded: `Llama-2-13B-chat-GPTQ`
146
+ 7. The model will automatically load, and is now ready for use!
147
+ 8. If you want any custom settings, set them and then click **Save settings for this model** followed by **Reload the Model** in the top right.
148
+ * Note that you do not need to and should not set manual GPTQ parameters any more. These are set automatically from the file `quantize_config.json`.
149
+ 9. Once you're ready, click the **Text Generation tab** and enter a prompt to get started!
150
+ <!-- README_GPTQ.md-text-generation-webui end -->
151
+
152
+ <!-- README_GPTQ.md-use-from-python start -->
153
+ ## How to use this GPTQ model from Python code
154
+
155
+ ### Install the necessary packages
156
+
157
+ Requires: Transformers 4.32.0 or later, Optimum 1.12.0 or later, and AutoGPTQ 0.4.2 or later.
158
+
159
+ ```shell
160
+ pip3 install transformers>=4.32.0 optimum>=1.12.0
161
+ pip3 install auto-gptq --extra-index-url https://huggingface.github.io/autogptq-index/whl/cu118/ # Use cu117 if on CUDA 11.7
162
+ ```
163
+
164
+ If you have problems installing AutoGPTQ using the pre-built wheels, install it from source instead:
165
+
166
+ ```shell
167
+ pip3 uninstall -y auto-gptq
168
+ git clone https://github.com/PanQiWei/AutoGPTQ
169
+ cd AutoGPTQ
170
+ pip3 install .
171
+ ```
172
+
173
+ ### For CodeLlama models only: you must use Transformers 4.33.0 or later.
174
+
175
+ If 4.33.0 is not yet released when you read this, you will need to install Transformers from source:
176
+ ```shell
177
+ pip3 uninstall -y transformers
178
+ pip3 install git+https://github.com/huggingface/transformers.git
179
+ ```
180
+
181
+ ### You can then use the following code
182
+
183
+ ```python
184
+ from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
185
+
186
+ model_name_or_path = "TheBloke/Llama-2-13B-chat-GPTQ"
187
+ # To use a different branch, change revision
188
+ # For example: revision="main"
189
+ model = AutoModelForCausalLM.from_pretrained(model_name_or_path,
190
+ device_map="auto",
191
+ trust_remote_code=False,
192
+ revision="main")
193
+
194
+ tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, use_fast=True)
195
+
196
+ prompt = "Tell me about AI"
197
+ prompt_template=f'''[INST] <<SYS>>
198
+ You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature. If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.
199
+ <</SYS>>
200
+ {prompt}[/INST]
201
+
202
+ '''
203
+
204
+ print("\n\n*** Generate:")
205
+
206
+ input_ids = tokenizer(prompt_template, return_tensors='pt').input_ids.cuda()
207
+ output = model.generate(inputs=input_ids, temperature=0.7, do_sample=True, top_p=0.95, top_k=40, max_new_tokens=512)
208
+ print(tokenizer.decode(output[0]))
209
+
210
+ # Inference can also be done using transformers' pipeline
211
+
212
+ print("*** Pipeline:")
213
+ pipe = pipeline(
214
+ "text-generation",
215
+ model=model,
216
+ tokenizer=tokenizer,
217
+ max_new_tokens=512,
218
+ do_sample=True,
219
+ temperature=0.7,
220
+ top_p=0.95,
221
+ top_k=40,
222
+ repetition_penalty=1.1
223
+ )
224
+
225
+ print(pipe(prompt_template)[0]['generated_text'])
226
+ ```
227
+ <!-- README_GPTQ.md-use-from-python end -->
228
+
229
+ <!-- README_GPTQ.md-compatibility start -->
230
+ ## Compatibility
231
+
232
+ The files provided are tested to work with AutoGPTQ, both via Transformers and using AutoGPTQ directly. They should also work with [Occ4m's GPTQ-for-LLaMa fork](https://github.com/0cc4m/KoboldAI).
233
+
234
+ [ExLlama](https://github.com/turboderp/exllama) is compatible with Llama models in 4-bit. Please see the Provided Files table above for per-file compatibility.
235
+
236
+ [Huggingface Text Generation Inference (TGI)](https://github.com/huggingface/text-generation-inference) is compatible with all GPTQ models.
237
+ <!-- README_GPTQ.md-compatibility end -->
238
+
239
+ <!-- footer start -->
240
+ <!-- 200823 -->
241
+ ## Discord
242
+
243
+ For further support, and discussions on these models and AI in general, join us at:
244
+
245
+ [TheBloke AI's Discord server](https://discord.gg/theblokeai)
246
+
247
+ ## Thanks, and how to contribute
248
+
249
+ Thanks to the [chirper.ai](https://chirper.ai) team!
250
+
251
+ Thanks to Clay from [gpus.llm-utils.org](llm-utils)!
252
+
253
+ I've had a lot of people ask if they can contribute. I enjoy providing models and helping people, and would love to be able to spend even more time doing it, as well as expanding into new projects like fine tuning/training.
254
+
255
+ If you're able and willing to contribute it will be most gratefully received and will help me to keep providing more models, and to start work on new AI projects.
256
+
257
+ Donaters will get priority support on any and all AI/LLM/model questions and requests, access to a private Discord room, plus other benefits.
258
+
259
+ * Patreon: https://patreon.com/TheBlokeAI
260
+ * Ko-Fi: https://ko-fi.com/TheBlokeAI
261
+
262
+ **Special thanks to**: Aemon Algiz.
263
+
264
+ **Patreon special mentions**: Alicia Loh, Stephen Murray, K, Ajan Kanaga, RoA, Magnesian, Deo Leter, Olakabola, Eugene Pentland, zynix, Deep Realms, Raymond Fosdick, Elijah Stavena, Iucharbius, Erik Bjäreholt, Luis Javier Navarrete Lozano, Nicholas, theTransient, John Detwiler, alfie_i, knownsqashed, Mano Prime, Willem Michiel, Enrico Ros, LangChain4j, OG, Michael Dempsey, Pierre Kircher, Pedro Madruga, James Bentley, Thomas Belote, Luke @flexchar, Leonard Tan, Johann-Peter Hartmann, Illia Dulskyi, Fen Risland, Chadd, S_X, Jeff Scroggin, Ken Nordquist, Sean Connelly, Artur Olbinski, Swaroop Kallakuri, Jack West, Ai Maven, David Ziegler, Russ Johnson, transmissions 11, John Villwock, Alps Aficionado, Clay Pascal, Viktor Bowallius, Subspace Studios, Rainer Wilmers, Trenton Dambrowitz, vamX, Michael Levine, 준교 김, Brandon Frisco, Kalila, Trailburnt, Randy H, Talal Aujan, Nathan Dryer, Vadim, 阿明, ReadyPlayerEmma, Tiffany J. Kim, George Stoitzev, Spencer Kim, Jerry Meng, Gabriel Tamborski, Cory Kujawski, Jeffrey Morgan, Spiking Neurons AB, Edmond Seymore, Alexandros Triantafyllidis, Lone Striker, Cap'n Zoog, Nikolai Manek, danny, ya boyyy, Derek Yates, usrbinkat, Mandus, TL, Nathan LeClaire, subjectnull, Imad Khwaja, webtim, Raven Klaugh, Asp the Wyvern, Gabriel Puliatti, Caitlyn Gatomon, Joseph William Delisle, Jonathan Leane, Luke Pendergrass, SuperWojo, Sebastain Graf, Will Dee, Fred von Graf, Andrey, Dan Guido, Daniel P. Andersen, Nitin Borwankar, Elle, Vitor Caleffi, biorpg, jjj, NimbleBox.ai, Pieter, Matthew Berman, terasurfer, Michael Davis, Alex, Stanislav Ovsiannikov
265
+
266
+
267
+ Thank you to all my generous patrons and donaters!
268
+
269
+ And thank you again to a16z for their generous grant.
270
+
271
+ <!-- footer end -->
272
+
273
+ # Original model card: Meta's Llama 2 13B-chat
274
+
275
+ # **Llama 2**
276
+ Llama 2 is a collection of pretrained and fine-tuned generative text models ranging in scale from 7 billion to 70 billion parameters. This is the repository for the 13B fine-tuned model, optimized for dialogue use cases and converted for the Hugging Face Transformers format. Links to other models can be found in the index at the bottom.
277
+
278
+ ## Model Details
279
+ *Note: Use of this model is governed by the Meta license. In order to download the model weights and tokenizer, please visit the [website](https://ai.meta.com/resources/models-and-libraries/llama-downloads/) and accept our License before requesting access here.*
280
+
281
+ Meta developed and publicly released the Llama 2 family of large language models (LLMs), a collection of pretrained and fine-tuned generative text models ranging in scale from 7 billion to 70 billion parameters. Our fine-tuned LLMs, called Llama-2-Chat, are optimized for dialogue use cases. Llama-2-Chat models outperform open-source chat models on most benchmarks we tested, and in our human evaluations for helpfulness and safety, are on par with some popular closed-source models like ChatGPT and PaLM.
282
+
283
+ **Model Developers** Meta
284
+
285
+ **Variations** Llama 2 comes in a range of parameter sizes — 7B, 13B, and 70B — as well as pretrained and fine-tuned variations.
286
+
287
+ **Input** Models input text only.
288
+
289
+ **Output** Models generate text only.
290
+
291
+ **Model Architecture** Llama 2 is an auto-regressive language model that uses an optimized transformer architecture. The tuned versions use supervised fine-tuning (SFT) and reinforcement learning with human feedback (RLHF) to align to human preferences for helpfulness and safety.
292
+
293
+
294
+ ||Training Data|Params|Content Length|GQA|Tokens|LR|
295
+ |---|---|---|---|---|---|---|
296
+ |Llama 2|*A new mix of publicly available online data*|7B|4k|&#10007;|2.0T|3.0 x 10<sup>-4</sup>|
297
+ |Llama 2|*A new mix of publicly available online data*|13B|4k|&#10007;|2.0T|3.0 x 10<sup>-4</sup>|
298
+ |Llama 2|*A new mix of publicly available online data*|70B|4k|&#10004;|2.0T|1.5 x 10<sup>-4</sup>|
299
+
300
+ *Llama 2 family of models.* Token counts refer to pretraining data only. All models are trained with a global batch-size of 4M tokens. Bigger models - 70B -- use Grouped-Query Attention (GQA) for improved inference scalability.
301
+
302
+ **Model Dates** Llama 2 was trained between January 2023 and July 2023.
303
+
304
+ **Status** This is a static model trained on an offline dataset. Future versions of the tuned models will be released as we improve model safety with community feedback.
305
+
306
+ **License** A custom commercial license is available at: [https://ai.meta.com/resources/models-and-libraries/llama-downloads/](https://ai.meta.com/resources/models-and-libraries/llama-downloads/)
307
+
308
+ **Research Paper** ["Llama-2: Open Foundation and Fine-tuned Chat Models"](arxiv.org/abs/2307.09288)
309
+
310
+ ## Intended Use
311
+ **Intended Use Cases** Llama 2 is intended for commercial and research use in English. Tuned models are intended for assistant-like chat, whereas pretrained models can be adapted for a variety of natural language generation tasks.
312
+
313
+ To get the expected features and performance for the chat versions, a specific formatting needs to be followed, including the `INST` and `<<SYS>>` tags, `BOS` and `EOS` tokens, and the whitespaces and breaklines in between (we recommend calling `strip()` on inputs to avoid double-spaces). See our reference code in github for details: [`chat_completion`](https://github.com/facebookresearch/llama/blob/main/llama/generation.py#L212).
314
+
315
+ **Out-of-scope Uses** Use in any manner that violates applicable laws or regulations (including trade compliance laws).Use in languages other than English. Use in any other way that is prohibited by the Acceptable Use Policy and Licensing Agreement for Llama 2.
316
+
317
+ ## Hardware and Software
318
+ **Training Factors** We used custom training libraries, Meta's Research Super Cluster, and production clusters for pretraining. Fine-tuning, annotation, and evaluation were also performed on third-party cloud compute.
319
+
320
+ **Carbon Footprint** Pretraining utilized a cumulative 3.3M GPU hours of computation on hardware of type A100-80GB (TDP of 350-400W). Estimated total emissions were 539 tCO2eq, 100% of which were offset by Meta’s sustainability program.
321
+
322
+ ||Time (GPU hours)|Power Consumption (W)|Carbon Emitted(tCO<sub>2</sub>eq)|
323
+ |---|---|---|---|
324
+ |Llama 2 7B|184320|400|31.22|
325
+ |Llama 2 13B|368640|400|62.44|
326
+ |Llama 2 70B|1720320|400|291.42|
327
+ |Total|3311616||539.00|
328
+
329
+ **CO<sub>2</sub> emissions during pretraining.** Time: total GPU time required for training each model. Power Consumption: peak power capacity per GPU device for the GPUs used adjusted for power usage efficiency. 100% of the emissions are directly offset by Meta's sustainability program, and because we are openly releasing these models, the pretraining costs do not need to be incurred by others.
330
+
331
+ ## Training Data
332
+ **Overview** Llama 2 was pretrained on 2 trillion tokens of data from publicly available sources. The fine-tuning data includes publicly available instruction datasets, as well as over one million new human-annotated examples. Neither the pretraining nor the fine-tuning datasets include Meta user data.
333
+
334
+ **Data Freshness** The pretraining data has a cutoff of September 2022, but some tuning data is more recent, up to July 2023.
335
+
336
+ ## Evaluation Results
337
+
338
+ In this section, we report the results for the Llama 1 and Llama 2 models on standard academic benchmarks.For all the evaluations, we use our internal evaluations library.
339
+
340
+ |Model|Size|Code|Commonsense Reasoning|World Knowledge|Reading Comprehension|Math|MMLU|BBH|AGI Eval|
341
+ |---|---|---|---|---|---|---|---|---|---|
342
+ |Llama 1|7B|14.1|60.8|46.2|58.5|6.95|35.1|30.3|23.9|
343
+ |Llama 1|13B|18.9|66.1|52.6|62.3|10.9|46.9|37.0|33.9|
344
+ |Llama 1|33B|26.0|70.0|58.4|67.6|21.4|57.8|39.8|41.7|
345
+ |Llama 1|65B|30.7|70.7|60.5|68.6|30.8|63.4|43.5|47.6|
346
+ |Llama 2|7B|16.8|63.9|48.9|61.3|14.6|45.3|32.6|29.3|
347
+ |Llama 2|13B|24.5|66.9|55.4|65.8|28.7|54.8|39.4|39.1|
348
+ |Llama 2|70B|**37.5**|**71.9**|**63.6**|**69.4**|**35.2**|**68.9**|**51.2**|**54.2**|
349
+
350
+ **Overall performance on grouped academic benchmarks.** *Code:* We report the average pass@1 scores of our models on HumanEval and MBPP. *Commonsense Reasoning:* We report the average of PIQA, SIQA, HellaSwag, WinoGrande, ARC easy and challenge, OpenBookQA, and CommonsenseQA. We report 7-shot results for CommonSenseQA and 0-shot results for all other benchmarks. *World Knowledge:* We evaluate the 5-shot performance on NaturalQuestions and TriviaQA and report the average. *Reading Comprehension:* For reading comprehension, we report the 0-shot average on SQuAD, QuAC, and BoolQ. *MATH:* We report the average of the GSM8K (8 shot) and MATH (4 shot) benchmarks at top 1.
351
+
352
+ |||TruthfulQA|Toxigen|
353
+ |---|---|---|---|
354
+ |Llama 1|7B|27.42|23.00|
355
+ |Llama 1|13B|41.74|23.08|
356
+ |Llama 1|33B|44.19|22.57|
357
+ |Llama 1|65B|48.71|21.77|
358
+ |Llama 2|7B|33.29|**21.25**|
359
+ |Llama 2|13B|41.86|26.10|
360
+ |Llama 2|70B|**50.18**|24.60|
361
+
362
+ **Evaluation of pretrained LLMs on automatic safety benchmarks.** For TruthfulQA, we present the percentage of generations that are both truthful and informative (the higher the better). For ToxiGen, we present the percentage of toxic generations (the smaller the better).
363
+
364
+
365
+ |||TruthfulQA|Toxigen|
366
+ |---|---|---|---|
367
+ |Llama-2-Chat|7B|57.04|**0.00**|
368
+ |Llama-2-Chat|13B|62.18|**0.00**|
369
+ |Llama-2-Chat|70B|**64.14**|0.01|
370
+
371
+ **Evaluation of fine-tuned LLMs on different safety datasets.** Same metric definitions as above.
372
+
373
+ ## Ethical Considerations and Limitations
374
+ Llama 2 is a new technology that carries risks with use. Testing conducted to date has been in English, and has not covered, nor could it cover all scenarios. For these reasons, as with all LLMs, Llama 2’s potential outputs cannot be predicted in advance, and the model may in some instances produce inaccurate, biased or other objectionable responses to user prompts. Therefore, before deploying any applications of Llama 2, developers should perform safety testing and tuning tailored to their specific applications of the model.
375
+
376
+ Please see the Responsible Use Guide available at [https://ai.meta.com/llama/responsible-use-guide/](https://ai.meta.com/llama/responsible-use-guide)
377
+
378
+ ## Reporting Issues
379
+ Please report any software “bug,” or other problems with the models through one of the following means:
380
+ - Reporting issues with the model: [github.com/facebookresearch/llama](http://github.com/facebookresearch/llama)
381
+ - Reporting problematic content generated by the model: [developers.facebook.com/llama_output_feedback](http://developers.facebook.com/llama_output_feedback)
382
+ - Reporting bugs and security concerns: [facebook.com/whitehat/info](http://facebook.com/whitehat/info)
383
+
384
+ ## Llama Model Index
385
+ |Model|Llama2|Llama2-hf|Llama2-chat|Llama2-chat-hf|
386
+ |---|---|---|---|---|
387
+ |7B| [Link](https://huggingface.co/llamaste/Llama-2-7b) | [Link](https://huggingface.co/llamaste/Llama-2-7b-hf) | [Link](https://huggingface.co/llamaste/Llama-2-7b-chat) | [Link](https://huggingface.co/llamaste/Llama-2-7b-chat-hf)|
388
+ |13B| [Link](https://huggingface.co/llamaste/Llama-2-13b) | [Link](https://huggingface.co/llamaste/Llama-2-13b-hf) | [Link](https://huggingface.co/llamaste/Llama-2-13b-chat) | [Link](https://huggingface.co/llamaste/Llama-2-13b-hf)|
389
+ |70B| [Link](https://huggingface.co/llamaste/Llama-2-70b) | [Link](https://huggingface.co/llamaste/Llama-2-70b-hf) | [Link](https://huggingface.co/llamaste/Llama-2-70b-chat) | [Link](https://huggingface.co/llamaste/Llama-2-70b-hf)|
Llama-2-13B-chat-GPTQ/USE_POLICY.md ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Llama 2 Acceptable Use Policy
2
+
3
+ Meta is committed to promoting safe and fair use of its tools and features, including Llama 2. If you access or use Llama 2, you agree to this Acceptable Use Policy (“Policy”). The most recent copy of this policy can be found at [ai.meta.com/llama/use-policy](http://ai.meta.com/llama/use-policy).
4
+
5
+ ## Prohibited Uses
6
+ We want everyone to use Llama 2 safely and responsibly. You agree you will not use, or allow others to use, Llama 2 to:
7
+
8
+ 1. Violate the law or others’ rights, including to:
9
+ 1. Engage in, promote, generate, contribute to, encourage, plan, incite, or further illegal or unlawful activity or content, such as:
10
+ 1. Violence or terrorism
11
+ 2. Exploitation or harm to children, including the solicitation, creation, acquisition, or dissemination of child exploitative content or failure to report Child Sexual Abuse Material
12
+ 3. Human trafficking, exploitation, and sexual violence
13
+ 4. The illegal distribution of information or materials to minors, including obscene materials, or failure to employ legally required age-gating in connection with such information or materials.
14
+ 5. Sexual solicitation
15
+ 6. Any other criminal activity
16
+ 2. Engage in, promote, incite, or facilitate the harassment, abuse, threatening, or bullying of individuals or groups of individuals
17
+ 3. Engage in, promote, incite, or facilitate discrimination or other unlawful or harmful conduct in the provision of employment, employment benefits, credit, housing, other economic benefits, or other essential goods and services
18
+ 4. Engage in the unauthorized or unlicensed practice of any profession including, but not limited to, financial, legal, medical/health, or related professional practices
19
+ 5. Collect, process, disclose, generate, or infer health, demographic, or other sensitive personal or private information about individuals without rights and consents required by applicable laws
20
+ 6. Engage in or facilitate any action or generate any content that infringes, misappropriates, or otherwise violates any third-party rights, including the outputs or results of any products or services using the Llama 2 Materials
21
+ 7. Create, generate, or facilitate the creation of malicious code, malware, computer viruses or do anything else that could disable, overburden, interfere with or impair the proper working, integrity, operation or appearance of a website or computer system
22
+
23
+
24
+
25
+ 2. Engage in, promote, incite, facilitate, or assist in the planning or development of activities that present a risk of death or bodily harm to individuals, including use of Llama 2 related to the following:
26
+ 1. Military, warfare, nuclear industries or applications, espionage, use for materials or activities that are subject to the International Traffic Arms Regulations (ITAR) maintained by the United States Department of State
27
+ 2. Guns and illegal weapons (including weapon development)
28
+ 3. Illegal drugs and regulated/controlled substances
29
+ 4. Operation of critical infrastructure, transportation technologies, or heavy machinery
30
+ 5. Self-harm or harm to others, including suicide, cutting, and eating disorders
31
+ 6. Any content intended to incite or promote violence, abuse, or any infliction of bodily harm to an individual
32
+
33
+
34
+
35
+ 3. Intentionally deceive or mislead others, including use of Llama 2 related to the following:
36
+ 1. Generating, promoting, or furthering fraud or the creation or promotion of disinformation
37
+ 2. Generating, promoting, or furthering defamatory content, including the creation of defamatory statements, images, or other content
38
+ 3. Generating, promoting, or further distributing spam
39
+ 4. Impersonating another individual without consent, authorization, or legal right
40
+ 5. Representing that the use of Llama 2 or outputs are human-generated
41
+ 6. Generating or facilitating false online engagement, including fake reviews and other means of fake online engagement
42
+ 4. Fail to appropriately disclose to end users any known dangers of your AI system
43
+
44
+ Please report any violation of this Policy, software “bug,” or other problems that could lead to a violation of this Policy through one of the following means:
45
+
46
+ * Reporting issues with the model: [github.com/facebookresearch/llama](http://github.com/facebookresearch/llama)
47
+ * Reporting risky content generated by the model: [developers.facebook.com/llama_output_feedback](http://developers.facebook.com/llama_output_feedback)
48
+ * Reporting bugs and security concerns: [facebook.com/whitehat/info](http://facebook.com/whitehat/info)
49
+ * Reporting violations of the Acceptable Use Policy or unlicensed uses of Llama: [LlamaUseReport@meta.com](mailto:LlamaUseReport@meta.com)
50
+
Llama-2-13B-chat-GPTQ/config.json ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "LlamaForCausalLM"
4
+ ],
5
+ "bos_token_id": 1,
6
+ "eos_token_id": 2,
7
+ "hidden_act": "silu",
8
+ "hidden_size": 5120,
9
+ "initializer_range": 0.02,
10
+ "intermediate_size": 13824,
11
+ "max_length": 4096,
12
+ "max_position_embeddings": 4096,
13
+ "model_type": "llama",
14
+ "num_attention_heads": 40,
15
+ "num_hidden_layers": 40,
16
+ "num_key_value_heads": 40,
17
+ "pad_token_id": 0,
18
+ "rms_norm_eps": 1e-05,
19
+ "rope_scaling": null,
20
+ "tie_word_embeddings": false,
21
+ "torch_dtype": "float16",
22
+ "transformers_version": "4.30.2",
23
+ "use_cache": true,
24
+ "vocab_size": 32000,
25
+ "quantization_config": {
26
+ "bits": 4,
27
+ "group_size": 128,
28
+ "damp_percent": 0.01,
29
+ "desc_act": false,
30
+ "sym": true,
31
+ "true_sequential": true,
32
+ "model_name_or_path": null,
33
+ "model_file_base_name": "model",
34
+ "quant_method": "gptq"
35
+ }
36
+ }
Llama-2-13B-chat-GPTQ/generation_config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 1,
4
+ "eos_token_id": 2,
5
+ "pad_token_id": 0,
6
+ "transformers_version": "4.30.2"
7
+ }
Llama-2-13B-chat-GPTQ/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:586f4d59cbaa1fc83e34898cd7a650b4d87ab69ef1560a9153628367c96e2dbc
3
+ size 7259449480
Llama-2-13B-chat-GPTQ/quantize_config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bits": 4,
3
+ "group_size": 128,
4
+ "damp_percent": 0.01,
5
+ "desc_act": false,
6
+ "sym": true,
7
+ "true_sequential": true,
8
+ "model_name_or_path": null,
9
+ "model_file_base_name": "model"
10
+ }
Llama-2-13B-chat-GPTQ/special_tokens_map.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": true,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": true,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "unk_token": {
17
+ "content": "<unk>",
18
+ "lstrip": false,
19
+ "normalized": true,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ }
23
+ }
Llama-2-13B-chat-GPTQ/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
Llama-2-13B-chat-GPTQ/tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
3
+ size 499723
Llama-2-13B-chat-GPTQ/tokenizer_config.json ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": true,
3
+ "add_eos_token": false,
4
+ "bos_token": {
5
+ "__type": "AddedToken",
6
+ "content": "<s>",
7
+ "lstrip": false,
8
+ "normalized": true,
9
+ "rstrip": false,
10
+ "single_word": false
11
+ },
12
+ "clean_up_tokenization_spaces": false,
13
+ "eos_token": {
14
+ "__type": "AddedToken",
15
+ "content": "</s>",
16
+ "lstrip": false,
17
+ "normalized": true,
18
+ "rstrip": false,
19
+ "single_word": false
20
+ },
21
+ "model_max_length": 1000000000000000019884624838656,
22
+ "pad_token": null,
23
+ "sp_model_kwargs": {},
24
+ "tokenizer_class": "LlamaTokenizer",
25
+ "unk_token": {
26
+ "__type": "AddedToken",
27
+ "content": "<unk>",
28
+ "lstrip": false,
29
+ "normalized": true,
30
+ "rstrip": false,
31
+ "single_word": false
32
+ }
33
+ }
instructor-large/.gitattributes ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tflite filter=lfs diff=lfs merge=lfs -text
29
+ *.tgz filter=lfs diff=lfs merge=lfs -text
30
+ *.wasm filter=lfs diff=lfs merge=lfs -text
31
+ *.xz filter=lfs diff=lfs merge=lfs -text
32
+ *.zip filter=lfs diff=lfs merge=lfs -text
33
+ *.zst filter=lfs diff=lfs merge=lfs -text
34
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
instructor-large/1_Pooling/config.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "word_embedding_dimension": 768,
3
+ "pooling_mode_cls_token": false,
4
+ "pooling_mode_mean_tokens": true,
5
+ "pooling_mode_max_tokens": false,
6
+ "pooling_mode_mean_sqrt_len_tokens": false,
7
+ "pooling_mode_weightedmean_tokens": false,
8
+ "pooling_mode_lasttoken": false
9
+ }
instructor-large/2_Dense/config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"in_features": 1024, "out_features": 768, "bias": false, "activation_function": "torch.nn.modules.linear.Identity"}
instructor-large/2_Dense/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a6e82cba0876dacbadccdea565c9e19e29848d994d23968bd1343b8f0f762bdc
3
+ size 3146603
instructor-large/README.md ADDED
@@ -0,0 +1,2610 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ pipeline_tag: sentence-similarity
3
+ tags:
4
+ - text-embedding
5
+ - embeddings
6
+ - information-retrieval
7
+ - beir
8
+ - text-classification
9
+ - language-model
10
+ - text-clustering
11
+ - text-semantic-similarity
12
+ - text-evaluation
13
+ - prompt-retrieval
14
+ - text-reranking
15
+ - sentence-transformers
16
+ - feature-extraction
17
+ - sentence-similarity
18
+ - transformers
19
+ - t5
20
+ - English
21
+ - Sentence Similarity
22
+ - natural_questions
23
+ - ms_marco
24
+ - fever
25
+ - hotpot_qa
26
+ - mteb
27
+ language: en
28
+ inference: false
29
+ license: apache-2.0
30
+ model-index:
31
+ - name: INSTRUCTOR
32
+ results:
33
+ - task:
34
+ type: Classification
35
+ dataset:
36
+ type: mteb/amazon_counterfactual
37
+ name: MTEB AmazonCounterfactualClassification (en)
38
+ config: en
39
+ split: test
40
+ revision: e8379541af4e31359cca9fbcf4b00f2671dba205
41
+ metrics:
42
+ - type: accuracy
43
+ value: 88.13432835820896
44
+ - type: ap
45
+ value: 59.298209334395665
46
+ - type: f1
47
+ value: 83.31769058643586
48
+ - task:
49
+ type: Classification
50
+ dataset:
51
+ type: mteb/amazon_polarity
52
+ name: MTEB AmazonPolarityClassification
53
+ config: default
54
+ split: test
55
+ revision: e2d317d38cd51312af73b3d32a06d1a08b442046
56
+ metrics:
57
+ - type: accuracy
58
+ value: 91.526375
59
+ - type: ap
60
+ value: 88.16327709705504
61
+ - type: f1
62
+ value: 91.51095801287843
63
+ - task:
64
+ type: Classification
65
+ dataset:
66
+ type: mteb/amazon_reviews_multi
67
+ name: MTEB AmazonReviewsClassification (en)
68
+ config: en
69
+ split: test
70
+ revision: 1399c76144fd37290681b995c656ef9b2e06e26d
71
+ metrics:
72
+ - type: accuracy
73
+ value: 47.856
74
+ - type: f1
75
+ value: 45.41490917650942
76
+ - task:
77
+ type: Retrieval
78
+ dataset:
79
+ type: arguana
80
+ name: MTEB ArguAna
81
+ config: default
82
+ split: test
83
+ revision: None
84
+ metrics:
85
+ - type: map_at_1
86
+ value: 31.223
87
+ - type: map_at_10
88
+ value: 47.947
89
+ - type: map_at_100
90
+ value: 48.742000000000004
91
+ - type: map_at_1000
92
+ value: 48.745
93
+ - type: map_at_3
94
+ value: 43.137
95
+ - type: map_at_5
96
+ value: 45.992
97
+ - type: mrr_at_1
98
+ value: 32.432
99
+ - type: mrr_at_10
100
+ value: 48.4
101
+ - type: mrr_at_100
102
+ value: 49.202
103
+ - type: mrr_at_1000
104
+ value: 49.205
105
+ - type: mrr_at_3
106
+ value: 43.551
107
+ - type: mrr_at_5
108
+ value: 46.467999999999996
109
+ - type: ndcg_at_1
110
+ value: 31.223
111
+ - type: ndcg_at_10
112
+ value: 57.045
113
+ - type: ndcg_at_100
114
+ value: 60.175
115
+ - type: ndcg_at_1000
116
+ value: 60.233000000000004
117
+ - type: ndcg_at_3
118
+ value: 47.171
119
+ - type: ndcg_at_5
120
+ value: 52.322
121
+ - type: precision_at_1
122
+ value: 31.223
123
+ - type: precision_at_10
124
+ value: 8.599
125
+ - type: precision_at_100
126
+ value: 0.991
127
+ - type: precision_at_1000
128
+ value: 0.1
129
+ - type: precision_at_3
130
+ value: 19.63
131
+ - type: precision_at_5
132
+ value: 14.282
133
+ - type: recall_at_1
134
+ value: 31.223
135
+ - type: recall_at_10
136
+ value: 85.989
137
+ - type: recall_at_100
138
+ value: 99.075
139
+ - type: recall_at_1000
140
+ value: 99.502
141
+ - type: recall_at_3
142
+ value: 58.89
143
+ - type: recall_at_5
144
+ value: 71.408
145
+ - task:
146
+ type: Clustering
147
+ dataset:
148
+ type: mteb/arxiv-clustering-p2p
149
+ name: MTEB ArxivClusteringP2P
150
+ config: default
151
+ split: test
152
+ revision: a122ad7f3f0291bf49cc6f4d32aa80929df69d5d
153
+ metrics:
154
+ - type: v_measure
155
+ value: 43.1621946393635
156
+ - task:
157
+ type: Clustering
158
+ dataset:
159
+ type: mteb/arxiv-clustering-s2s
160
+ name: MTEB ArxivClusteringS2S
161
+ config: default
162
+ split: test
163
+ revision: f910caf1a6075f7329cdf8c1a6135696f37dbd53
164
+ metrics:
165
+ - type: v_measure
166
+ value: 32.56417132407894
167
+ - task:
168
+ type: Reranking
169
+ dataset:
170
+ type: mteb/askubuntudupquestions-reranking
171
+ name: MTEB AskUbuntuDupQuestions
172
+ config: default
173
+ split: test
174
+ revision: 2000358ca161889fa9c082cb41daa8dcfb161a54
175
+ metrics:
176
+ - type: map
177
+ value: 64.29539304390207
178
+ - type: mrr
179
+ value: 76.44484017060196
180
+ - task:
181
+ type: STS
182
+ dataset:
183
+ type: mteb/biosses-sts
184
+ name: MTEB BIOSSES
185
+ config: default
186
+ split: test
187
+ revision: d3fb88f8f02e40887cd149695127462bbcf29b4a
188
+ metrics:
189
+ - type: cos_sim_spearman
190
+ value: 84.38746499431112
191
+ - task:
192
+ type: Classification
193
+ dataset:
194
+ type: mteb/banking77
195
+ name: MTEB Banking77Classification
196
+ config: default
197
+ split: test
198
+ revision: 0fd18e25b25c072e09e0d92ab615fda904d66300
199
+ metrics:
200
+ - type: accuracy
201
+ value: 78.51298701298701
202
+ - type: f1
203
+ value: 77.49041754069235
204
+ - task:
205
+ type: Clustering
206
+ dataset:
207
+ type: mteb/biorxiv-clustering-p2p
208
+ name: MTEB BiorxivClusteringP2P
209
+ config: default
210
+ split: test
211
+ revision: 65b79d1d13f80053f67aca9498d9402c2d9f1f40
212
+ metrics:
213
+ - type: v_measure
214
+ value: 37.61848554098577
215
+ - task:
216
+ type: Clustering
217
+ dataset:
218
+ type: mteb/biorxiv-clustering-s2s
219
+ name: MTEB BiorxivClusteringS2S
220
+ config: default
221
+ split: test
222
+ revision: 258694dd0231531bc1fd9de6ceb52a0853c6d908
223
+ metrics:
224
+ - type: v_measure
225
+ value: 31.32623280148178
226
+ - task:
227
+ type: Retrieval
228
+ dataset:
229
+ type: BeIR/cqadupstack
230
+ name: MTEB CQADupstackAndroidRetrieval
231
+ config: default
232
+ split: test
233
+ revision: None
234
+ metrics:
235
+ - type: map_at_1
236
+ value: 35.803000000000004
237
+ - type: map_at_10
238
+ value: 48.848
239
+ - type: map_at_100
240
+ value: 50.5
241
+ - type: map_at_1000
242
+ value: 50.602999999999994
243
+ - type: map_at_3
244
+ value: 45.111000000000004
245
+ - type: map_at_5
246
+ value: 47.202
247
+ - type: mrr_at_1
248
+ value: 44.635000000000005
249
+ - type: mrr_at_10
250
+ value: 55.593
251
+ - type: mrr_at_100
252
+ value: 56.169999999999995
253
+ - type: mrr_at_1000
254
+ value: 56.19499999999999
255
+ - type: mrr_at_3
256
+ value: 53.361999999999995
257
+ - type: mrr_at_5
258
+ value: 54.806999999999995
259
+ - type: ndcg_at_1
260
+ value: 44.635000000000005
261
+ - type: ndcg_at_10
262
+ value: 55.899
263
+ - type: ndcg_at_100
264
+ value: 60.958
265
+ - type: ndcg_at_1000
266
+ value: 62.302
267
+ - type: ndcg_at_3
268
+ value: 51.051
269
+ - type: ndcg_at_5
270
+ value: 53.351000000000006
271
+ - type: precision_at_1
272
+ value: 44.635000000000005
273
+ - type: precision_at_10
274
+ value: 10.786999999999999
275
+ - type: precision_at_100
276
+ value: 1.6580000000000001
277
+ - type: precision_at_1000
278
+ value: 0.213
279
+ - type: precision_at_3
280
+ value: 24.893
281
+ - type: precision_at_5
282
+ value: 17.740000000000002
283
+ - type: recall_at_1
284
+ value: 35.803000000000004
285
+ - type: recall_at_10
286
+ value: 68.657
287
+ - type: recall_at_100
288
+ value: 89.77199999999999
289
+ - type: recall_at_1000
290
+ value: 97.67
291
+ - type: recall_at_3
292
+ value: 54.066
293
+ - type: recall_at_5
294
+ value: 60.788
295
+ - task:
296
+ type: Retrieval
297
+ dataset:
298
+ type: BeIR/cqadupstack
299
+ name: MTEB CQADupstackEnglishRetrieval
300
+ config: default
301
+ split: test
302
+ revision: None
303
+ metrics:
304
+ - type: map_at_1
305
+ value: 33.706
306
+ - type: map_at_10
307
+ value: 44.896
308
+ - type: map_at_100
309
+ value: 46.299
310
+ - type: map_at_1000
311
+ value: 46.44
312
+ - type: map_at_3
313
+ value: 41.721000000000004
314
+ - type: map_at_5
315
+ value: 43.486000000000004
316
+ - type: mrr_at_1
317
+ value: 41.592
318
+ - type: mrr_at_10
319
+ value: 50.529
320
+ - type: mrr_at_100
321
+ value: 51.22
322
+ - type: mrr_at_1000
323
+ value: 51.258
324
+ - type: mrr_at_3
325
+ value: 48.205999999999996
326
+ - type: mrr_at_5
327
+ value: 49.528
328
+ - type: ndcg_at_1
329
+ value: 41.592
330
+ - type: ndcg_at_10
331
+ value: 50.77199999999999
332
+ - type: ndcg_at_100
333
+ value: 55.383
334
+ - type: ndcg_at_1000
335
+ value: 57.288
336
+ - type: ndcg_at_3
337
+ value: 46.324
338
+ - type: ndcg_at_5
339
+ value: 48.346000000000004
340
+ - type: precision_at_1
341
+ value: 41.592
342
+ - type: precision_at_10
343
+ value: 9.516
344
+ - type: precision_at_100
345
+ value: 1.541
346
+ - type: precision_at_1000
347
+ value: 0.2
348
+ - type: precision_at_3
349
+ value: 22.399
350
+ - type: precision_at_5
351
+ value: 15.770999999999999
352
+ - type: recall_at_1
353
+ value: 33.706
354
+ - type: recall_at_10
355
+ value: 61.353
356
+ - type: recall_at_100
357
+ value: 80.182
358
+ - type: recall_at_1000
359
+ value: 91.896
360
+ - type: recall_at_3
361
+ value: 48.204
362
+ - type: recall_at_5
363
+ value: 53.89699999999999
364
+ - task:
365
+ type: Retrieval
366
+ dataset:
367
+ type: BeIR/cqadupstack
368
+ name: MTEB CQADupstackGamingRetrieval
369
+ config: default
370
+ split: test
371
+ revision: None
372
+ metrics:
373
+ - type: map_at_1
374
+ value: 44.424
375
+ - type: map_at_10
376
+ value: 57.169000000000004
377
+ - type: map_at_100
378
+ value: 58.202
379
+ - type: map_at_1000
380
+ value: 58.242000000000004
381
+ - type: map_at_3
382
+ value: 53.825
383
+ - type: map_at_5
384
+ value: 55.714
385
+ - type: mrr_at_1
386
+ value: 50.470000000000006
387
+ - type: mrr_at_10
388
+ value: 60.489000000000004
389
+ - type: mrr_at_100
390
+ value: 61.096
391
+ - type: mrr_at_1000
392
+ value: 61.112
393
+ - type: mrr_at_3
394
+ value: 58.192
395
+ - type: mrr_at_5
396
+ value: 59.611999999999995
397
+ - type: ndcg_at_1
398
+ value: 50.470000000000006
399
+ - type: ndcg_at_10
400
+ value: 63.071999999999996
401
+ - type: ndcg_at_100
402
+ value: 66.964
403
+ - type: ndcg_at_1000
404
+ value: 67.659
405
+ - type: ndcg_at_3
406
+ value: 57.74399999999999
407
+ - type: ndcg_at_5
408
+ value: 60.367000000000004
409
+ - type: precision_at_1
410
+ value: 50.470000000000006
411
+ - type: precision_at_10
412
+ value: 10.019
413
+ - type: precision_at_100
414
+ value: 1.29
415
+ - type: precision_at_1000
416
+ value: 0.13899999999999998
417
+ - type: precision_at_3
418
+ value: 25.558999999999997
419
+ - type: precision_at_5
420
+ value: 17.467
421
+ - type: recall_at_1
422
+ value: 44.424
423
+ - type: recall_at_10
424
+ value: 77.02
425
+ - type: recall_at_100
426
+ value: 93.738
427
+ - type: recall_at_1000
428
+ value: 98.451
429
+ - type: recall_at_3
430
+ value: 62.888
431
+ - type: recall_at_5
432
+ value: 69.138
433
+ - task:
434
+ type: Retrieval
435
+ dataset:
436
+ type: BeIR/cqadupstack
437
+ name: MTEB CQADupstackGisRetrieval
438
+ config: default
439
+ split: test
440
+ revision: None
441
+ metrics:
442
+ - type: map_at_1
443
+ value: 26.294
444
+ - type: map_at_10
445
+ value: 34.503
446
+ - type: map_at_100
447
+ value: 35.641
448
+ - type: map_at_1000
449
+ value: 35.724000000000004
450
+ - type: map_at_3
451
+ value: 31.753999999999998
452
+ - type: map_at_5
453
+ value: 33.190999999999995
454
+ - type: mrr_at_1
455
+ value: 28.362
456
+ - type: mrr_at_10
457
+ value: 36.53
458
+ - type: mrr_at_100
459
+ value: 37.541000000000004
460
+ - type: mrr_at_1000
461
+ value: 37.602000000000004
462
+ - type: mrr_at_3
463
+ value: 33.917
464
+ - type: mrr_at_5
465
+ value: 35.358000000000004
466
+ - type: ndcg_at_1
467
+ value: 28.362
468
+ - type: ndcg_at_10
469
+ value: 39.513999999999996
470
+ - type: ndcg_at_100
471
+ value: 44.815
472
+ - type: ndcg_at_1000
473
+ value: 46.839
474
+ - type: ndcg_at_3
475
+ value: 34.02
476
+ - type: ndcg_at_5
477
+ value: 36.522
478
+ - type: precision_at_1
479
+ value: 28.362
480
+ - type: precision_at_10
481
+ value: 6.101999999999999
482
+ - type: precision_at_100
483
+ value: 0.9129999999999999
484
+ - type: precision_at_1000
485
+ value: 0.11399999999999999
486
+ - type: precision_at_3
487
+ value: 14.161999999999999
488
+ - type: precision_at_5
489
+ value: 9.966
490
+ - type: recall_at_1
491
+ value: 26.294
492
+ - type: recall_at_10
493
+ value: 53.098
494
+ - type: recall_at_100
495
+ value: 76.877
496
+ - type: recall_at_1000
497
+ value: 91.834
498
+ - type: recall_at_3
499
+ value: 38.266
500
+ - type: recall_at_5
501
+ value: 44.287
502
+ - task:
503
+ type: Retrieval
504
+ dataset:
505
+ type: BeIR/cqadupstack
506
+ name: MTEB CQADupstackMathematicaRetrieval
507
+ config: default
508
+ split: test
509
+ revision: None
510
+ metrics:
511
+ - type: map_at_1
512
+ value: 16.407
513
+ - type: map_at_10
514
+ value: 25.185999999999996
515
+ - type: map_at_100
516
+ value: 26.533
517
+ - type: map_at_1000
518
+ value: 26.657999999999998
519
+ - type: map_at_3
520
+ value: 22.201999999999998
521
+ - type: map_at_5
522
+ value: 23.923
523
+ - type: mrr_at_1
524
+ value: 20.522000000000002
525
+ - type: mrr_at_10
526
+ value: 29.522
527
+ - type: mrr_at_100
528
+ value: 30.644
529
+ - type: mrr_at_1000
530
+ value: 30.713
531
+ - type: mrr_at_3
532
+ value: 26.679000000000002
533
+ - type: mrr_at_5
534
+ value: 28.483000000000004
535
+ - type: ndcg_at_1
536
+ value: 20.522000000000002
537
+ - type: ndcg_at_10
538
+ value: 30.656
539
+ - type: ndcg_at_100
540
+ value: 36.864999999999995
541
+ - type: ndcg_at_1000
542
+ value: 39.675
543
+ - type: ndcg_at_3
544
+ value: 25.319000000000003
545
+ - type: ndcg_at_5
546
+ value: 27.992
547
+ - type: precision_at_1
548
+ value: 20.522000000000002
549
+ - type: precision_at_10
550
+ value: 5.795999999999999
551
+ - type: precision_at_100
552
+ value: 1.027
553
+ - type: precision_at_1000
554
+ value: 0.13999999999999999
555
+ - type: precision_at_3
556
+ value: 12.396
557
+ - type: precision_at_5
558
+ value: 9.328
559
+ - type: recall_at_1
560
+ value: 16.407
561
+ - type: recall_at_10
562
+ value: 43.164
563
+ - type: recall_at_100
564
+ value: 69.695
565
+ - type: recall_at_1000
566
+ value: 89.41900000000001
567
+ - type: recall_at_3
568
+ value: 28.634999999999998
569
+ - type: recall_at_5
570
+ value: 35.308
571
+ - task:
572
+ type: Retrieval
573
+ dataset:
574
+ type: BeIR/cqadupstack
575
+ name: MTEB CQADupstackPhysicsRetrieval
576
+ config: default
577
+ split: test
578
+ revision: None
579
+ metrics:
580
+ - type: map_at_1
581
+ value: 30.473
582
+ - type: map_at_10
583
+ value: 41.676
584
+ - type: map_at_100
585
+ value: 43.120999999999995
586
+ - type: map_at_1000
587
+ value: 43.230000000000004
588
+ - type: map_at_3
589
+ value: 38.306000000000004
590
+ - type: map_at_5
591
+ value: 40.355999999999995
592
+ - type: mrr_at_1
593
+ value: 37.536
594
+ - type: mrr_at_10
595
+ value: 47.643
596
+ - type: mrr_at_100
597
+ value: 48.508
598
+ - type: mrr_at_1000
599
+ value: 48.551
600
+ - type: mrr_at_3
601
+ value: 45.348
602
+ - type: mrr_at_5
603
+ value: 46.744
604
+ - type: ndcg_at_1
605
+ value: 37.536
606
+ - type: ndcg_at_10
607
+ value: 47.823
608
+ - type: ndcg_at_100
609
+ value: 53.395
610
+ - type: ndcg_at_1000
611
+ value: 55.271
612
+ - type: ndcg_at_3
613
+ value: 42.768
614
+ - type: ndcg_at_5
615
+ value: 45.373000000000005
616
+ - type: precision_at_1
617
+ value: 37.536
618
+ - type: precision_at_10
619
+ value: 8.681
620
+ - type: precision_at_100
621
+ value: 1.34
622
+ - type: precision_at_1000
623
+ value: 0.165
624
+ - type: precision_at_3
625
+ value: 20.468
626
+ - type: precision_at_5
627
+ value: 14.495
628
+ - type: recall_at_1
629
+ value: 30.473
630
+ - type: recall_at_10
631
+ value: 60.092999999999996
632
+ - type: recall_at_100
633
+ value: 82.733
634
+ - type: recall_at_1000
635
+ value: 94.875
636
+ - type: recall_at_3
637
+ value: 45.734
638
+ - type: recall_at_5
639
+ value: 52.691
640
+ - task:
641
+ type: Retrieval
642
+ dataset:
643
+ type: BeIR/cqadupstack
644
+ name: MTEB CQADupstackProgrammersRetrieval
645
+ config: default
646
+ split: test
647
+ revision: None
648
+ metrics:
649
+ - type: map_at_1
650
+ value: 29.976000000000003
651
+ - type: map_at_10
652
+ value: 41.097
653
+ - type: map_at_100
654
+ value: 42.547000000000004
655
+ - type: map_at_1000
656
+ value: 42.659000000000006
657
+ - type: map_at_3
658
+ value: 37.251
659
+ - type: map_at_5
660
+ value: 39.493
661
+ - type: mrr_at_1
662
+ value: 37.557
663
+ - type: mrr_at_10
664
+ value: 46.605000000000004
665
+ - type: mrr_at_100
666
+ value: 47.487
667
+ - type: mrr_at_1000
668
+ value: 47.54
669
+ - type: mrr_at_3
670
+ value: 43.721
671
+ - type: mrr_at_5
672
+ value: 45.411
673
+ - type: ndcg_at_1
674
+ value: 37.557
675
+ - type: ndcg_at_10
676
+ value: 47.449000000000005
677
+ - type: ndcg_at_100
678
+ value: 53.052
679
+ - type: ndcg_at_1000
680
+ value: 55.010999999999996
681
+ - type: ndcg_at_3
682
+ value: 41.439
683
+ - type: ndcg_at_5
684
+ value: 44.292
685
+ - type: precision_at_1
686
+ value: 37.557
687
+ - type: precision_at_10
688
+ value: 8.847
689
+ - type: precision_at_100
690
+ value: 1.357
691
+ - type: precision_at_1000
692
+ value: 0.16999999999999998
693
+ - type: precision_at_3
694
+ value: 20.091
695
+ - type: precision_at_5
696
+ value: 14.384
697
+ - type: recall_at_1
698
+ value: 29.976000000000003
699
+ - type: recall_at_10
700
+ value: 60.99099999999999
701
+ - type: recall_at_100
702
+ value: 84.245
703
+ - type: recall_at_1000
704
+ value: 96.97200000000001
705
+ - type: recall_at_3
706
+ value: 43.794
707
+ - type: recall_at_5
708
+ value: 51.778999999999996
709
+ - task:
710
+ type: Retrieval
711
+ dataset:
712
+ type: BeIR/cqadupstack
713
+ name: MTEB CQADupstackRetrieval
714
+ config: default
715
+ split: test
716
+ revision: None
717
+ metrics:
718
+ - type: map_at_1
719
+ value: 28.099166666666665
720
+ - type: map_at_10
721
+ value: 38.1365
722
+ - type: map_at_100
723
+ value: 39.44491666666667
724
+ - type: map_at_1000
725
+ value: 39.55858333333334
726
+ - type: map_at_3
727
+ value: 35.03641666666666
728
+ - type: map_at_5
729
+ value: 36.79833333333334
730
+ - type: mrr_at_1
731
+ value: 33.39966666666667
732
+ - type: mrr_at_10
733
+ value: 42.42583333333333
734
+ - type: mrr_at_100
735
+ value: 43.28575
736
+ - type: mrr_at_1000
737
+ value: 43.33741666666667
738
+ - type: mrr_at_3
739
+ value: 39.94975
740
+ - type: mrr_at_5
741
+ value: 41.41633333333334
742
+ - type: ndcg_at_1
743
+ value: 33.39966666666667
744
+ - type: ndcg_at_10
745
+ value: 43.81741666666667
746
+ - type: ndcg_at_100
747
+ value: 49.08166666666667
748
+ - type: ndcg_at_1000
749
+ value: 51.121166666666674
750
+ - type: ndcg_at_3
751
+ value: 38.73575
752
+ - type: ndcg_at_5
753
+ value: 41.18158333333333
754
+ - type: precision_at_1
755
+ value: 33.39966666666667
756
+ - type: precision_at_10
757
+ value: 7.738916666666667
758
+ - type: precision_at_100
759
+ value: 1.2265833333333331
760
+ - type: precision_at_1000
761
+ value: 0.15983333333333336
762
+ - type: precision_at_3
763
+ value: 17.967416666666665
764
+ - type: precision_at_5
765
+ value: 12.78675
766
+ - type: recall_at_1
767
+ value: 28.099166666666665
768
+ - type: recall_at_10
769
+ value: 56.27049999999999
770
+ - type: recall_at_100
771
+ value: 78.93291666666667
772
+ - type: recall_at_1000
773
+ value: 92.81608333333334
774
+ - type: recall_at_3
775
+ value: 42.09775
776
+ - type: recall_at_5
777
+ value: 48.42533333333334
778
+ - task:
779
+ type: Retrieval
780
+ dataset:
781
+ type: BeIR/cqadupstack
782
+ name: MTEB CQADupstackStatsRetrieval
783
+ config: default
784
+ split: test
785
+ revision: None
786
+ metrics:
787
+ - type: map_at_1
788
+ value: 23.663
789
+ - type: map_at_10
790
+ value: 30.377
791
+ - type: map_at_100
792
+ value: 31.426
793
+ - type: map_at_1000
794
+ value: 31.519000000000002
795
+ - type: map_at_3
796
+ value: 28.069
797
+ - type: map_at_5
798
+ value: 29.256999999999998
799
+ - type: mrr_at_1
800
+ value: 26.687
801
+ - type: mrr_at_10
802
+ value: 33.107
803
+ - type: mrr_at_100
804
+ value: 34.055
805
+ - type: mrr_at_1000
806
+ value: 34.117999999999995
807
+ - type: mrr_at_3
808
+ value: 31.058000000000003
809
+ - type: mrr_at_5
810
+ value: 32.14
811
+ - type: ndcg_at_1
812
+ value: 26.687
813
+ - type: ndcg_at_10
814
+ value: 34.615
815
+ - type: ndcg_at_100
816
+ value: 39.776
817
+ - type: ndcg_at_1000
818
+ value: 42.05
819
+ - type: ndcg_at_3
820
+ value: 30.322
821
+ - type: ndcg_at_5
822
+ value: 32.157000000000004
823
+ - type: precision_at_1
824
+ value: 26.687
825
+ - type: precision_at_10
826
+ value: 5.491
827
+ - type: precision_at_100
828
+ value: 0.877
829
+ - type: precision_at_1000
830
+ value: 0.11499999999999999
831
+ - type: precision_at_3
832
+ value: 13.139000000000001
833
+ - type: precision_at_5
834
+ value: 9.049
835
+ - type: recall_at_1
836
+ value: 23.663
837
+ - type: recall_at_10
838
+ value: 45.035
839
+ - type: recall_at_100
840
+ value: 68.554
841
+ - type: recall_at_1000
842
+ value: 85.077
843
+ - type: recall_at_3
844
+ value: 32.982
845
+ - type: recall_at_5
846
+ value: 37.688
847
+ - task:
848
+ type: Retrieval
849
+ dataset:
850
+ type: BeIR/cqadupstack
851
+ name: MTEB CQADupstackTexRetrieval
852
+ config: default
853
+ split: test
854
+ revision: None
855
+ metrics:
856
+ - type: map_at_1
857
+ value: 17.403
858
+ - type: map_at_10
859
+ value: 25.197000000000003
860
+ - type: map_at_100
861
+ value: 26.355
862
+ - type: map_at_1000
863
+ value: 26.487
864
+ - type: map_at_3
865
+ value: 22.733
866
+ - type: map_at_5
867
+ value: 24.114
868
+ - type: mrr_at_1
869
+ value: 21.37
870
+ - type: mrr_at_10
871
+ value: 29.091
872
+ - type: mrr_at_100
873
+ value: 30.018
874
+ - type: mrr_at_1000
875
+ value: 30.096
876
+ - type: mrr_at_3
877
+ value: 26.887
878
+ - type: mrr_at_5
879
+ value: 28.157
880
+ - type: ndcg_at_1
881
+ value: 21.37
882
+ - type: ndcg_at_10
883
+ value: 30.026000000000003
884
+ - type: ndcg_at_100
885
+ value: 35.416
886
+ - type: ndcg_at_1000
887
+ value: 38.45
888
+ - type: ndcg_at_3
889
+ value: 25.764
890
+ - type: ndcg_at_5
891
+ value: 27.742
892
+ - type: precision_at_1
893
+ value: 21.37
894
+ - type: precision_at_10
895
+ value: 5.609
896
+ - type: precision_at_100
897
+ value: 0.9860000000000001
898
+ - type: precision_at_1000
899
+ value: 0.14300000000000002
900
+ - type: precision_at_3
901
+ value: 12.423
902
+ - type: precision_at_5
903
+ value: 9.009
904
+ - type: recall_at_1
905
+ value: 17.403
906
+ - type: recall_at_10
907
+ value: 40.573
908
+ - type: recall_at_100
909
+ value: 64.818
910
+ - type: recall_at_1000
911
+ value: 86.53699999999999
912
+ - type: recall_at_3
913
+ value: 28.493000000000002
914
+ - type: recall_at_5
915
+ value: 33.660000000000004
916
+ - task:
917
+ type: Retrieval
918
+ dataset:
919
+ type: BeIR/cqadupstack
920
+ name: MTEB CQADupstackUnixRetrieval
921
+ config: default
922
+ split: test
923
+ revision: None
924
+ metrics:
925
+ - type: map_at_1
926
+ value: 28.639
927
+ - type: map_at_10
928
+ value: 38.951
929
+ - type: map_at_100
930
+ value: 40.238
931
+ - type: map_at_1000
932
+ value: 40.327
933
+ - type: map_at_3
934
+ value: 35.842
935
+ - type: map_at_5
936
+ value: 37.617
937
+ - type: mrr_at_1
938
+ value: 33.769
939
+ - type: mrr_at_10
940
+ value: 43.088
941
+ - type: mrr_at_100
942
+ value: 44.03
943
+ - type: mrr_at_1000
944
+ value: 44.072
945
+ - type: mrr_at_3
946
+ value: 40.656
947
+ - type: mrr_at_5
948
+ value: 42.138999999999996
949
+ - type: ndcg_at_1
950
+ value: 33.769
951
+ - type: ndcg_at_10
952
+ value: 44.676
953
+ - type: ndcg_at_100
954
+ value: 50.416000000000004
955
+ - type: ndcg_at_1000
956
+ value: 52.227999999999994
957
+ - type: ndcg_at_3
958
+ value: 39.494
959
+ - type: ndcg_at_5
960
+ value: 42.013
961
+ - type: precision_at_1
962
+ value: 33.769
963
+ - type: precision_at_10
964
+ value: 7.668
965
+ - type: precision_at_100
966
+ value: 1.18
967
+ - type: precision_at_1000
968
+ value: 0.145
969
+ - type: precision_at_3
970
+ value: 18.221
971
+ - type: precision_at_5
972
+ value: 12.966
973
+ - type: recall_at_1
974
+ value: 28.639
975
+ - type: recall_at_10
976
+ value: 57.687999999999995
977
+ - type: recall_at_100
978
+ value: 82.541
979
+ - type: recall_at_1000
980
+ value: 94.896
981
+ - type: recall_at_3
982
+ value: 43.651
983
+ - type: recall_at_5
984
+ value: 49.925999999999995
985
+ - task:
986
+ type: Retrieval
987
+ dataset:
988
+ type: BeIR/cqadupstack
989
+ name: MTEB CQADupstackWebmastersRetrieval
990
+ config: default
991
+ split: test
992
+ revision: None
993
+ metrics:
994
+ - type: map_at_1
995
+ value: 29.57
996
+ - type: map_at_10
997
+ value: 40.004
998
+ - type: map_at_100
999
+ value: 41.75
1000
+ - type: map_at_1000
1001
+ value: 41.97
1002
+ - type: map_at_3
1003
+ value: 36.788
1004
+ - type: map_at_5
1005
+ value: 38.671
1006
+ - type: mrr_at_1
1007
+ value: 35.375
1008
+ - type: mrr_at_10
1009
+ value: 45.121
1010
+ - type: mrr_at_100
1011
+ value: 45.994
1012
+ - type: mrr_at_1000
1013
+ value: 46.04
1014
+ - type: mrr_at_3
1015
+ value: 42.227
1016
+ - type: mrr_at_5
1017
+ value: 43.995
1018
+ - type: ndcg_at_1
1019
+ value: 35.375
1020
+ - type: ndcg_at_10
1021
+ value: 46.392
1022
+ - type: ndcg_at_100
1023
+ value: 52.196
1024
+ - type: ndcg_at_1000
1025
+ value: 54.274
1026
+ - type: ndcg_at_3
1027
+ value: 41.163
1028
+ - type: ndcg_at_5
1029
+ value: 43.813
1030
+ - type: precision_at_1
1031
+ value: 35.375
1032
+ - type: precision_at_10
1033
+ value: 8.676
1034
+ - type: precision_at_100
1035
+ value: 1.678
1036
+ - type: precision_at_1000
1037
+ value: 0.253
1038
+ - type: precision_at_3
1039
+ value: 19.104
1040
+ - type: precision_at_5
1041
+ value: 13.913
1042
+ - type: recall_at_1
1043
+ value: 29.57
1044
+ - type: recall_at_10
1045
+ value: 58.779
1046
+ - type: recall_at_100
1047
+ value: 83.337
1048
+ - type: recall_at_1000
1049
+ value: 95.979
1050
+ - type: recall_at_3
1051
+ value: 44.005
1052
+ - type: recall_at_5
1053
+ value: 50.975
1054
+ - task:
1055
+ type: Retrieval
1056
+ dataset:
1057
+ type: BeIR/cqadupstack
1058
+ name: MTEB CQADupstackWordpressRetrieval
1059
+ config: default
1060
+ split: test
1061
+ revision: None
1062
+ metrics:
1063
+ - type: map_at_1
1064
+ value: 20.832
1065
+ - type: map_at_10
1066
+ value: 29.733999999999998
1067
+ - type: map_at_100
1068
+ value: 30.727
1069
+ - type: map_at_1000
1070
+ value: 30.843999999999998
1071
+ - type: map_at_3
1072
+ value: 26.834999999999997
1073
+ - type: map_at_5
1074
+ value: 28.555999999999997
1075
+ - type: mrr_at_1
1076
+ value: 22.921
1077
+ - type: mrr_at_10
1078
+ value: 31.791999999999998
1079
+ - type: mrr_at_100
1080
+ value: 32.666000000000004
1081
+ - type: mrr_at_1000
1082
+ value: 32.751999999999995
1083
+ - type: mrr_at_3
1084
+ value: 29.144
1085
+ - type: mrr_at_5
1086
+ value: 30.622
1087
+ - type: ndcg_at_1
1088
+ value: 22.921
1089
+ - type: ndcg_at_10
1090
+ value: 34.915
1091
+ - type: ndcg_at_100
1092
+ value: 39.744
1093
+ - type: ndcg_at_1000
1094
+ value: 42.407000000000004
1095
+ - type: ndcg_at_3
1096
+ value: 29.421000000000003
1097
+ - type: ndcg_at_5
1098
+ value: 32.211
1099
+ - type: precision_at_1
1100
+ value: 22.921
1101
+ - type: precision_at_10
1102
+ value: 5.675
1103
+ - type: precision_at_100
1104
+ value: 0.872
1105
+ - type: precision_at_1000
1106
+ value: 0.121
1107
+ - type: precision_at_3
1108
+ value: 12.753999999999998
1109
+ - type: precision_at_5
1110
+ value: 9.353
1111
+ - type: recall_at_1
1112
+ value: 20.832
1113
+ - type: recall_at_10
1114
+ value: 48.795
1115
+ - type: recall_at_100
1116
+ value: 70.703
1117
+ - type: recall_at_1000
1118
+ value: 90.187
1119
+ - type: recall_at_3
1120
+ value: 34.455000000000005
1121
+ - type: recall_at_5
1122
+ value: 40.967
1123
+ - task:
1124
+ type: Retrieval
1125
+ dataset:
1126
+ type: climate-fever
1127
+ name: MTEB ClimateFEVER
1128
+ config: default
1129
+ split: test
1130
+ revision: None
1131
+ metrics:
1132
+ - type: map_at_1
1133
+ value: 10.334
1134
+ - type: map_at_10
1135
+ value: 19.009999999999998
1136
+ - type: map_at_100
1137
+ value: 21.129
1138
+ - type: map_at_1000
1139
+ value: 21.328
1140
+ - type: map_at_3
1141
+ value: 15.152
1142
+ - type: map_at_5
1143
+ value: 17.084
1144
+ - type: mrr_at_1
1145
+ value: 23.453
1146
+ - type: mrr_at_10
1147
+ value: 36.099
1148
+ - type: mrr_at_100
1149
+ value: 37.069
1150
+ - type: mrr_at_1000
1151
+ value: 37.104
1152
+ - type: mrr_at_3
1153
+ value: 32.096000000000004
1154
+ - type: mrr_at_5
1155
+ value: 34.451
1156
+ - type: ndcg_at_1
1157
+ value: 23.453
1158
+ - type: ndcg_at_10
1159
+ value: 27.739000000000004
1160
+ - type: ndcg_at_100
1161
+ value: 35.836
1162
+ - type: ndcg_at_1000
1163
+ value: 39.242
1164
+ - type: ndcg_at_3
1165
+ value: 21.263
1166
+ - type: ndcg_at_5
1167
+ value: 23.677
1168
+ - type: precision_at_1
1169
+ value: 23.453
1170
+ - type: precision_at_10
1171
+ value: 9.199
1172
+ - type: precision_at_100
1173
+ value: 1.791
1174
+ - type: precision_at_1000
1175
+ value: 0.242
1176
+ - type: precision_at_3
1177
+ value: 16.2
1178
+ - type: precision_at_5
1179
+ value: 13.147
1180
+ - type: recall_at_1
1181
+ value: 10.334
1182
+ - type: recall_at_10
1183
+ value: 35.177
1184
+ - type: recall_at_100
1185
+ value: 63.009
1186
+ - type: recall_at_1000
1187
+ value: 81.938
1188
+ - type: recall_at_3
1189
+ value: 19.914
1190
+ - type: recall_at_5
1191
+ value: 26.077
1192
+ - task:
1193
+ type: Retrieval
1194
+ dataset:
1195
+ type: dbpedia-entity
1196
+ name: MTEB DBPedia
1197
+ config: default
1198
+ split: test
1199
+ revision: None
1200
+ metrics:
1201
+ - type: map_at_1
1202
+ value: 8.212
1203
+ - type: map_at_10
1204
+ value: 17.386
1205
+ - type: map_at_100
1206
+ value: 24.234
1207
+ - type: map_at_1000
1208
+ value: 25.724999999999998
1209
+ - type: map_at_3
1210
+ value: 12.727
1211
+ - type: map_at_5
1212
+ value: 14.785
1213
+ - type: mrr_at_1
1214
+ value: 59.25
1215
+ - type: mrr_at_10
1216
+ value: 68.687
1217
+ - type: mrr_at_100
1218
+ value: 69.133
1219
+ - type: mrr_at_1000
1220
+ value: 69.14099999999999
1221
+ - type: mrr_at_3
1222
+ value: 66.917
1223
+ - type: mrr_at_5
1224
+ value: 67.742
1225
+ - type: ndcg_at_1
1226
+ value: 48.625
1227
+ - type: ndcg_at_10
1228
+ value: 36.675999999999995
1229
+ - type: ndcg_at_100
1230
+ value: 41.543
1231
+ - type: ndcg_at_1000
1232
+ value: 49.241
1233
+ - type: ndcg_at_3
1234
+ value: 41.373
1235
+ - type: ndcg_at_5
1236
+ value: 38.707
1237
+ - type: precision_at_1
1238
+ value: 59.25
1239
+ - type: precision_at_10
1240
+ value: 28.525
1241
+ - type: precision_at_100
1242
+ value: 9.027000000000001
1243
+ - type: precision_at_1000
1244
+ value: 1.8339999999999999
1245
+ - type: precision_at_3
1246
+ value: 44.833
1247
+ - type: precision_at_5
1248
+ value: 37.35
1249
+ - type: recall_at_1
1250
+ value: 8.212
1251
+ - type: recall_at_10
1252
+ value: 23.188
1253
+ - type: recall_at_100
1254
+ value: 48.613
1255
+ - type: recall_at_1000
1256
+ value: 73.093
1257
+ - type: recall_at_3
1258
+ value: 14.419
1259
+ - type: recall_at_5
1260
+ value: 17.798
1261
+ - task:
1262
+ type: Classification
1263
+ dataset:
1264
+ type: mteb/emotion
1265
+ name: MTEB EmotionClassification
1266
+ config: default
1267
+ split: test
1268
+ revision: 4f58c6b202a23cf9a4da393831edf4f9183cad37
1269
+ metrics:
1270
+ - type: accuracy
1271
+ value: 52.725
1272
+ - type: f1
1273
+ value: 46.50743309855908
1274
+ - task:
1275
+ type: Retrieval
1276
+ dataset:
1277
+ type: fever
1278
+ name: MTEB FEVER
1279
+ config: default
1280
+ split: test
1281
+ revision: None
1282
+ metrics:
1283
+ - type: map_at_1
1284
+ value: 55.086
1285
+ - type: map_at_10
1286
+ value: 66.914
1287
+ - type: map_at_100
1288
+ value: 67.321
1289
+ - type: map_at_1000
1290
+ value: 67.341
1291
+ - type: map_at_3
1292
+ value: 64.75800000000001
1293
+ - type: map_at_5
1294
+ value: 66.189
1295
+ - type: mrr_at_1
1296
+ value: 59.28600000000001
1297
+ - type: mrr_at_10
1298
+ value: 71.005
1299
+ - type: mrr_at_100
1300
+ value: 71.304
1301
+ - type: mrr_at_1000
1302
+ value: 71.313
1303
+ - type: mrr_at_3
1304
+ value: 69.037
1305
+ - type: mrr_at_5
1306
+ value: 70.35
1307
+ - type: ndcg_at_1
1308
+ value: 59.28600000000001
1309
+ - type: ndcg_at_10
1310
+ value: 72.695
1311
+ - type: ndcg_at_100
1312
+ value: 74.432
1313
+ - type: ndcg_at_1000
1314
+ value: 74.868
1315
+ - type: ndcg_at_3
1316
+ value: 68.72200000000001
1317
+ - type: ndcg_at_5
1318
+ value: 71.081
1319
+ - type: precision_at_1
1320
+ value: 59.28600000000001
1321
+ - type: precision_at_10
1322
+ value: 9.499
1323
+ - type: precision_at_100
1324
+ value: 1.052
1325
+ - type: precision_at_1000
1326
+ value: 0.11100000000000002
1327
+ - type: precision_at_3
1328
+ value: 27.503
1329
+ - type: precision_at_5
1330
+ value: 17.854999999999997
1331
+ - type: recall_at_1
1332
+ value: 55.086
1333
+ - type: recall_at_10
1334
+ value: 86.453
1335
+ - type: recall_at_100
1336
+ value: 94.028
1337
+ - type: recall_at_1000
1338
+ value: 97.052
1339
+ - type: recall_at_3
1340
+ value: 75.821
1341
+ - type: recall_at_5
1342
+ value: 81.6
1343
+ - task:
1344
+ type: Retrieval
1345
+ dataset:
1346
+ type: fiqa
1347
+ name: MTEB FiQA2018
1348
+ config: default
1349
+ split: test
1350
+ revision: None
1351
+ metrics:
1352
+ - type: map_at_1
1353
+ value: 22.262999999999998
1354
+ - type: map_at_10
1355
+ value: 37.488
1356
+ - type: map_at_100
1357
+ value: 39.498
1358
+ - type: map_at_1000
1359
+ value: 39.687
1360
+ - type: map_at_3
1361
+ value: 32.529
1362
+ - type: map_at_5
1363
+ value: 35.455
1364
+ - type: mrr_at_1
1365
+ value: 44.907000000000004
1366
+ - type: mrr_at_10
1367
+ value: 53.239000000000004
1368
+ - type: mrr_at_100
1369
+ value: 54.086
1370
+ - type: mrr_at_1000
1371
+ value: 54.122
1372
+ - type: mrr_at_3
1373
+ value: 51.235
1374
+ - type: mrr_at_5
1375
+ value: 52.415
1376
+ - type: ndcg_at_1
1377
+ value: 44.907000000000004
1378
+ - type: ndcg_at_10
1379
+ value: 45.446
1380
+ - type: ndcg_at_100
1381
+ value: 52.429
1382
+ - type: ndcg_at_1000
1383
+ value: 55.169000000000004
1384
+ - type: ndcg_at_3
1385
+ value: 41.882000000000005
1386
+ - type: ndcg_at_5
1387
+ value: 43.178
1388
+ - type: precision_at_1
1389
+ value: 44.907000000000004
1390
+ - type: precision_at_10
1391
+ value: 12.931999999999999
1392
+ - type: precision_at_100
1393
+ value: 2.025
1394
+ - type: precision_at_1000
1395
+ value: 0.248
1396
+ - type: precision_at_3
1397
+ value: 28.652
1398
+ - type: precision_at_5
1399
+ value: 21.204
1400
+ - type: recall_at_1
1401
+ value: 22.262999999999998
1402
+ - type: recall_at_10
1403
+ value: 52.447
1404
+ - type: recall_at_100
1405
+ value: 78.045
1406
+ - type: recall_at_1000
1407
+ value: 94.419
1408
+ - type: recall_at_3
1409
+ value: 38.064
1410
+ - type: recall_at_5
1411
+ value: 44.769
1412
+ - task:
1413
+ type: Retrieval
1414
+ dataset:
1415
+ type: hotpotqa
1416
+ name: MTEB HotpotQA
1417
+ config: default
1418
+ split: test
1419
+ revision: None
1420
+ metrics:
1421
+ - type: map_at_1
1422
+ value: 32.519
1423
+ - type: map_at_10
1424
+ value: 45.831
1425
+ - type: map_at_100
1426
+ value: 46.815
1427
+ - type: map_at_1000
1428
+ value: 46.899
1429
+ - type: map_at_3
1430
+ value: 42.836
1431
+ - type: map_at_5
1432
+ value: 44.65
1433
+ - type: mrr_at_1
1434
+ value: 65.037
1435
+ - type: mrr_at_10
1436
+ value: 72.16
1437
+ - type: mrr_at_100
1438
+ value: 72.51100000000001
1439
+ - type: mrr_at_1000
1440
+ value: 72.53
1441
+ - type: mrr_at_3
1442
+ value: 70.682
1443
+ - type: mrr_at_5
1444
+ value: 71.54599999999999
1445
+ - type: ndcg_at_1
1446
+ value: 65.037
1447
+ - type: ndcg_at_10
1448
+ value: 55.17999999999999
1449
+ - type: ndcg_at_100
1450
+ value: 58.888
1451
+ - type: ndcg_at_1000
1452
+ value: 60.648
1453
+ - type: ndcg_at_3
1454
+ value: 50.501
1455
+ - type: ndcg_at_5
1456
+ value: 52.977
1457
+ - type: precision_at_1
1458
+ value: 65.037
1459
+ - type: precision_at_10
1460
+ value: 11.530999999999999
1461
+ - type: precision_at_100
1462
+ value: 1.4460000000000002
1463
+ - type: precision_at_1000
1464
+ value: 0.168
1465
+ - type: precision_at_3
1466
+ value: 31.483
1467
+ - type: precision_at_5
1468
+ value: 20.845
1469
+ - type: recall_at_1
1470
+ value: 32.519
1471
+ - type: recall_at_10
1472
+ value: 57.657000000000004
1473
+ - type: recall_at_100
1474
+ value: 72.30199999999999
1475
+ - type: recall_at_1000
1476
+ value: 84.024
1477
+ - type: recall_at_3
1478
+ value: 47.225
1479
+ - type: recall_at_5
1480
+ value: 52.113
1481
+ - task:
1482
+ type: Classification
1483
+ dataset:
1484
+ type: mteb/imdb
1485
+ name: MTEB ImdbClassification
1486
+ config: default
1487
+ split: test
1488
+ revision: 3d86128a09e091d6018b6d26cad27f2739fc2db7
1489
+ metrics:
1490
+ - type: accuracy
1491
+ value: 88.3168
1492
+ - type: ap
1493
+ value: 83.80165516037135
1494
+ - type: f1
1495
+ value: 88.29942471066407
1496
+ - task:
1497
+ type: Retrieval
1498
+ dataset:
1499
+ type: msmarco
1500
+ name: MTEB MSMARCO
1501
+ config: default
1502
+ split: dev
1503
+ revision: None
1504
+ metrics:
1505
+ - type: map_at_1
1506
+ value: 20.724999999999998
1507
+ - type: map_at_10
1508
+ value: 32.736
1509
+ - type: map_at_100
1510
+ value: 33.938
1511
+ - type: map_at_1000
1512
+ value: 33.991
1513
+ - type: map_at_3
1514
+ value: 28.788000000000004
1515
+ - type: map_at_5
1516
+ value: 31.016
1517
+ - type: mrr_at_1
1518
+ value: 21.361
1519
+ - type: mrr_at_10
1520
+ value: 33.323
1521
+ - type: mrr_at_100
1522
+ value: 34.471000000000004
1523
+ - type: mrr_at_1000
1524
+ value: 34.518
1525
+ - type: mrr_at_3
1526
+ value: 29.453000000000003
1527
+ - type: mrr_at_5
1528
+ value: 31.629
1529
+ - type: ndcg_at_1
1530
+ value: 21.361
1531
+ - type: ndcg_at_10
1532
+ value: 39.649
1533
+ - type: ndcg_at_100
1534
+ value: 45.481
1535
+ - type: ndcg_at_1000
1536
+ value: 46.775
1537
+ - type: ndcg_at_3
1538
+ value: 31.594
1539
+ - type: ndcg_at_5
1540
+ value: 35.543
1541
+ - type: precision_at_1
1542
+ value: 21.361
1543
+ - type: precision_at_10
1544
+ value: 6.3740000000000006
1545
+ - type: precision_at_100
1546
+ value: 0.931
1547
+ - type: precision_at_1000
1548
+ value: 0.104
1549
+ - type: precision_at_3
1550
+ value: 13.514999999999999
1551
+ - type: precision_at_5
1552
+ value: 10.100000000000001
1553
+ - type: recall_at_1
1554
+ value: 20.724999999999998
1555
+ - type: recall_at_10
1556
+ value: 61.034
1557
+ - type: recall_at_100
1558
+ value: 88.062
1559
+ - type: recall_at_1000
1560
+ value: 97.86399999999999
1561
+ - type: recall_at_3
1562
+ value: 39.072
1563
+ - type: recall_at_5
1564
+ value: 48.53
1565
+ - task:
1566
+ type: Classification
1567
+ dataset:
1568
+ type: mteb/mtop_domain
1569
+ name: MTEB MTOPDomainClassification (en)
1570
+ config: en
1571
+ split: test
1572
+ revision: d80d48c1eb48d3562165c59d59d0034df9fff0bf
1573
+ metrics:
1574
+ - type: accuracy
1575
+ value: 93.8919288645691
1576
+ - type: f1
1577
+ value: 93.57059586398059
1578
+ - task:
1579
+ type: Classification
1580
+ dataset:
1581
+ type: mteb/mtop_intent
1582
+ name: MTEB MTOPIntentClassification (en)
1583
+ config: en
1584
+ split: test
1585
+ revision: ae001d0e6b1228650b7bd1c2c65fb50ad11a8aba
1586
+ metrics:
1587
+ - type: accuracy
1588
+ value: 67.97993616051072
1589
+ - type: f1
1590
+ value: 48.244319183606535
1591
+ - task:
1592
+ type: Classification
1593
+ dataset:
1594
+ type: mteb/amazon_massive_intent
1595
+ name: MTEB MassiveIntentClassification (en)
1596
+ config: en
1597
+ split: test
1598
+ revision: 31efe3c427b0bae9c22cbb560b8f15491cc6bed7
1599
+ metrics:
1600
+ - type: accuracy
1601
+ value: 68.90047074646941
1602
+ - type: f1
1603
+ value: 66.48999056063725
1604
+ - task:
1605
+ type: Classification
1606
+ dataset:
1607
+ type: mteb/amazon_massive_scenario
1608
+ name: MTEB MassiveScenarioClassification (en)
1609
+ config: en
1610
+ split: test
1611
+ revision: 7d571f92784cd94a019292a1f45445077d0ef634
1612
+ metrics:
1613
+ - type: accuracy
1614
+ value: 73.34566240753195
1615
+ - type: f1
1616
+ value: 73.54164154290658
1617
+ - task:
1618
+ type: Clustering
1619
+ dataset:
1620
+ type: mteb/medrxiv-clustering-p2p
1621
+ name: MTEB MedrxivClusteringP2P
1622
+ config: default
1623
+ split: test
1624
+ revision: e7a26af6f3ae46b30dde8737f02c07b1505bcc73
1625
+ metrics:
1626
+ - type: v_measure
1627
+ value: 34.21866934757011
1628
+ - task:
1629
+ type: Clustering
1630
+ dataset:
1631
+ type: mteb/medrxiv-clustering-s2s
1632
+ name: MTEB MedrxivClusteringS2S
1633
+ config: default
1634
+ split: test
1635
+ revision: 35191c8c0dca72d8ff3efcd72aa802307d469663
1636
+ metrics:
1637
+ - type: v_measure
1638
+ value: 32.000936217235534
1639
+ - task:
1640
+ type: Reranking
1641
+ dataset:
1642
+ type: mteb/mind_small
1643
+ name: MTEB MindSmallReranking
1644
+ config: default
1645
+ split: test
1646
+ revision: 3bdac13927fdc888b903db93b2ffdbd90b295a69
1647
+ metrics:
1648
+ - type: map
1649
+ value: 31.68189362520352
1650
+ - type: mrr
1651
+ value: 32.69603637784303
1652
+ - task:
1653
+ type: Retrieval
1654
+ dataset:
1655
+ type: nfcorpus
1656
+ name: MTEB NFCorpus
1657
+ config: default
1658
+ split: test
1659
+ revision: None
1660
+ metrics:
1661
+ - type: map_at_1
1662
+ value: 6.078
1663
+ - type: map_at_10
1664
+ value: 12.671
1665
+ - type: map_at_100
1666
+ value: 16.291
1667
+ - type: map_at_1000
1668
+ value: 17.855999999999998
1669
+ - type: map_at_3
1670
+ value: 9.610000000000001
1671
+ - type: map_at_5
1672
+ value: 11.152
1673
+ - type: mrr_at_1
1674
+ value: 43.963
1675
+ - type: mrr_at_10
1676
+ value: 53.173
1677
+ - type: mrr_at_100
1678
+ value: 53.718999999999994
1679
+ - type: mrr_at_1000
1680
+ value: 53.756
1681
+ - type: mrr_at_3
1682
+ value: 50.980000000000004
1683
+ - type: mrr_at_5
1684
+ value: 52.42
1685
+ - type: ndcg_at_1
1686
+ value: 42.415000000000006
1687
+ - type: ndcg_at_10
1688
+ value: 34.086
1689
+ - type: ndcg_at_100
1690
+ value: 32.545
1691
+ - type: ndcg_at_1000
1692
+ value: 41.144999999999996
1693
+ - type: ndcg_at_3
1694
+ value: 39.434999999999995
1695
+ - type: ndcg_at_5
1696
+ value: 37.888
1697
+ - type: precision_at_1
1698
+ value: 43.653
1699
+ - type: precision_at_10
1700
+ value: 25.014999999999997
1701
+ - type: precision_at_100
1702
+ value: 8.594
1703
+ - type: precision_at_1000
1704
+ value: 2.169
1705
+ - type: precision_at_3
1706
+ value: 37.049
1707
+ - type: precision_at_5
1708
+ value: 33.065
1709
+ - type: recall_at_1
1710
+ value: 6.078
1711
+ - type: recall_at_10
1712
+ value: 16.17
1713
+ - type: recall_at_100
1714
+ value: 34.512
1715
+ - type: recall_at_1000
1716
+ value: 65.447
1717
+ - type: recall_at_3
1718
+ value: 10.706
1719
+ - type: recall_at_5
1720
+ value: 13.158
1721
+ - task:
1722
+ type: Retrieval
1723
+ dataset:
1724
+ type: nq
1725
+ name: MTEB NQ
1726
+ config: default
1727
+ split: test
1728
+ revision: None
1729
+ metrics:
1730
+ - type: map_at_1
1731
+ value: 27.378000000000004
1732
+ - type: map_at_10
1733
+ value: 42.178
1734
+ - type: map_at_100
1735
+ value: 43.32
1736
+ - type: map_at_1000
1737
+ value: 43.358000000000004
1738
+ - type: map_at_3
1739
+ value: 37.474000000000004
1740
+ - type: map_at_5
1741
+ value: 40.333000000000006
1742
+ - type: mrr_at_1
1743
+ value: 30.823
1744
+ - type: mrr_at_10
1745
+ value: 44.626
1746
+ - type: mrr_at_100
1747
+ value: 45.494
1748
+ - type: mrr_at_1000
1749
+ value: 45.519
1750
+ - type: mrr_at_3
1751
+ value: 40.585
1752
+ - type: mrr_at_5
1753
+ value: 43.146
1754
+ - type: ndcg_at_1
1755
+ value: 30.794
1756
+ - type: ndcg_at_10
1757
+ value: 50.099000000000004
1758
+ - type: ndcg_at_100
1759
+ value: 54.900999999999996
1760
+ - type: ndcg_at_1000
1761
+ value: 55.69499999999999
1762
+ - type: ndcg_at_3
1763
+ value: 41.238
1764
+ - type: ndcg_at_5
1765
+ value: 46.081
1766
+ - type: precision_at_1
1767
+ value: 30.794
1768
+ - type: precision_at_10
1769
+ value: 8.549
1770
+ - type: precision_at_100
1771
+ value: 1.124
1772
+ - type: precision_at_1000
1773
+ value: 0.12
1774
+ - type: precision_at_3
1775
+ value: 18.926000000000002
1776
+ - type: precision_at_5
1777
+ value: 14.16
1778
+ - type: recall_at_1
1779
+ value: 27.378000000000004
1780
+ - type: recall_at_10
1781
+ value: 71.842
1782
+ - type: recall_at_100
1783
+ value: 92.565
1784
+ - type: recall_at_1000
1785
+ value: 98.402
1786
+ - type: recall_at_3
1787
+ value: 49.053999999999995
1788
+ - type: recall_at_5
1789
+ value: 60.207
1790
+ - task:
1791
+ type: Retrieval
1792
+ dataset:
1793
+ type: quora
1794
+ name: MTEB QuoraRetrieval
1795
+ config: default
1796
+ split: test
1797
+ revision: None
1798
+ metrics:
1799
+ - type: map_at_1
1800
+ value: 70.557
1801
+ - type: map_at_10
1802
+ value: 84.729
1803
+ - type: map_at_100
1804
+ value: 85.369
1805
+ - type: map_at_1000
1806
+ value: 85.382
1807
+ - type: map_at_3
1808
+ value: 81.72
1809
+ - type: map_at_5
1810
+ value: 83.613
1811
+ - type: mrr_at_1
1812
+ value: 81.3
1813
+ - type: mrr_at_10
1814
+ value: 87.488
1815
+ - type: mrr_at_100
1816
+ value: 87.588
1817
+ - type: mrr_at_1000
1818
+ value: 87.589
1819
+ - type: mrr_at_3
1820
+ value: 86.53
1821
+ - type: mrr_at_5
1822
+ value: 87.18599999999999
1823
+ - type: ndcg_at_1
1824
+ value: 81.28999999999999
1825
+ - type: ndcg_at_10
1826
+ value: 88.442
1827
+ - type: ndcg_at_100
1828
+ value: 89.637
1829
+ - type: ndcg_at_1000
1830
+ value: 89.70700000000001
1831
+ - type: ndcg_at_3
1832
+ value: 85.55199999999999
1833
+ - type: ndcg_at_5
1834
+ value: 87.154
1835
+ - type: precision_at_1
1836
+ value: 81.28999999999999
1837
+ - type: precision_at_10
1838
+ value: 13.489999999999998
1839
+ - type: precision_at_100
1840
+ value: 1.54
1841
+ - type: precision_at_1000
1842
+ value: 0.157
1843
+ - type: precision_at_3
1844
+ value: 37.553
1845
+ - type: precision_at_5
1846
+ value: 24.708
1847
+ - type: recall_at_1
1848
+ value: 70.557
1849
+ - type: recall_at_10
1850
+ value: 95.645
1851
+ - type: recall_at_100
1852
+ value: 99.693
1853
+ - type: recall_at_1000
1854
+ value: 99.995
1855
+ - type: recall_at_3
1856
+ value: 87.359
1857
+ - type: recall_at_5
1858
+ value: 91.89699999999999
1859
+ - task:
1860
+ type: Clustering
1861
+ dataset:
1862
+ type: mteb/reddit-clustering
1863
+ name: MTEB RedditClustering
1864
+ config: default
1865
+ split: test
1866
+ revision: 24640382cdbf8abc73003fb0fa6d111a705499eb
1867
+ metrics:
1868
+ - type: v_measure
1869
+ value: 63.65060114776209
1870
+ - task:
1871
+ type: Clustering
1872
+ dataset:
1873
+ type: mteb/reddit-clustering-p2p
1874
+ name: MTEB RedditClusteringP2P
1875
+ config: default
1876
+ split: test
1877
+ revision: 282350215ef01743dc01b456c7f5241fa8937f16
1878
+ metrics:
1879
+ - type: v_measure
1880
+ value: 64.63271250680617
1881
+ - task:
1882
+ type: Retrieval
1883
+ dataset:
1884
+ type: scidocs
1885
+ name: MTEB SCIDOCS
1886
+ config: default
1887
+ split: test
1888
+ revision: None
1889
+ metrics:
1890
+ - type: map_at_1
1891
+ value: 4.263
1892
+ - type: map_at_10
1893
+ value: 10.801
1894
+ - type: map_at_100
1895
+ value: 12.888
1896
+ - type: map_at_1000
1897
+ value: 13.224
1898
+ - type: map_at_3
1899
+ value: 7.362
1900
+ - type: map_at_5
1901
+ value: 9.149000000000001
1902
+ - type: mrr_at_1
1903
+ value: 21
1904
+ - type: mrr_at_10
1905
+ value: 31.416
1906
+ - type: mrr_at_100
1907
+ value: 32.513
1908
+ - type: mrr_at_1000
1909
+ value: 32.58
1910
+ - type: mrr_at_3
1911
+ value: 28.116999999999997
1912
+ - type: mrr_at_5
1913
+ value: 29.976999999999997
1914
+ - type: ndcg_at_1
1915
+ value: 21
1916
+ - type: ndcg_at_10
1917
+ value: 18.551000000000002
1918
+ - type: ndcg_at_100
1919
+ value: 26.657999999999998
1920
+ - type: ndcg_at_1000
1921
+ value: 32.485
1922
+ - type: ndcg_at_3
1923
+ value: 16.834
1924
+ - type: ndcg_at_5
1925
+ value: 15.204999999999998
1926
+ - type: precision_at_1
1927
+ value: 21
1928
+ - type: precision_at_10
1929
+ value: 9.84
1930
+ - type: precision_at_100
1931
+ value: 2.16
1932
+ - type: precision_at_1000
1933
+ value: 0.35500000000000004
1934
+ - type: precision_at_3
1935
+ value: 15.667
1936
+ - type: precision_at_5
1937
+ value: 13.62
1938
+ - type: recall_at_1
1939
+ value: 4.263
1940
+ - type: recall_at_10
1941
+ value: 19.922
1942
+ - type: recall_at_100
1943
+ value: 43.808
1944
+ - type: recall_at_1000
1945
+ value: 72.14500000000001
1946
+ - type: recall_at_3
1947
+ value: 9.493
1948
+ - type: recall_at_5
1949
+ value: 13.767999999999999
1950
+ - task:
1951
+ type: STS
1952
+ dataset:
1953
+ type: mteb/sickr-sts
1954
+ name: MTEB SICK-R
1955
+ config: default
1956
+ split: test
1957
+ revision: a6ea5a8cab320b040a23452cc28066d9beae2cee
1958
+ metrics:
1959
+ - type: cos_sim_spearman
1960
+ value: 81.27446313317233
1961
+ - task:
1962
+ type: STS
1963
+ dataset:
1964
+ type: mteb/sts12-sts
1965
+ name: MTEB STS12
1966
+ config: default
1967
+ split: test
1968
+ revision: a0d554a64d88156834ff5ae9920b964011b16384
1969
+ metrics:
1970
+ - type: cos_sim_spearman
1971
+ value: 76.27963301217527
1972
+ - task:
1973
+ type: STS
1974
+ dataset:
1975
+ type: mteb/sts13-sts
1976
+ name: MTEB STS13
1977
+ config: default
1978
+ split: test
1979
+ revision: 7e90230a92c190f1bf69ae9002b8cea547a64cca
1980
+ metrics:
1981
+ - type: cos_sim_spearman
1982
+ value: 88.18495048450949
1983
+ - task:
1984
+ type: STS
1985
+ dataset:
1986
+ type: mteb/sts14-sts
1987
+ name: MTEB STS14
1988
+ config: default
1989
+ split: test
1990
+ revision: 6031580fec1f6af667f0bd2da0a551cf4f0b2375
1991
+ metrics:
1992
+ - type: cos_sim_spearman
1993
+ value: 81.91982338692046
1994
+ - task:
1995
+ type: STS
1996
+ dataset:
1997
+ type: mteb/sts15-sts
1998
+ name: MTEB STS15
1999
+ config: default
2000
+ split: test
2001
+ revision: ae752c7c21bf194d8b67fd573edf7ae58183cbe3
2002
+ metrics:
2003
+ - type: cos_sim_spearman
2004
+ value: 89.00896818385291
2005
+ - task:
2006
+ type: STS
2007
+ dataset:
2008
+ type: mteb/sts16-sts
2009
+ name: MTEB STS16
2010
+ config: default
2011
+ split: test
2012
+ revision: 4d8694f8f0e0100860b497b999b3dbed754a0513
2013
+ metrics:
2014
+ - type: cos_sim_spearman
2015
+ value: 85.48814644586132
2016
+ - task:
2017
+ type: STS
2018
+ dataset:
2019
+ type: mteb/sts17-crosslingual-sts
2020
+ name: MTEB STS17 (en-en)
2021
+ config: en-en
2022
+ split: test
2023
+ revision: af5e6fb845001ecf41f4c1e033ce921939a2a68d
2024
+ metrics:
2025
+ - type: cos_sim_spearman
2026
+ value: 90.30116926966582
2027
+ - task:
2028
+ type: STS
2029
+ dataset:
2030
+ type: mteb/sts22-crosslingual-sts
2031
+ name: MTEB STS22 (en)
2032
+ config: en
2033
+ split: test
2034
+ revision: 6d1ba47164174a496b7fa5d3569dae26a6813b80
2035
+ metrics:
2036
+ - type: cos_sim_spearman
2037
+ value: 67.74132963032342
2038
+ - task:
2039
+ type: STS
2040
+ dataset:
2041
+ type: mteb/stsbenchmark-sts
2042
+ name: MTEB STSBenchmark
2043
+ config: default
2044
+ split: test
2045
+ revision: b0fddb56ed78048fa8b90373c8a3cfc37b684831
2046
+ metrics:
2047
+ - type: cos_sim_spearman
2048
+ value: 86.87741355780479
2049
+ - task:
2050
+ type: Reranking
2051
+ dataset:
2052
+ type: mteb/scidocs-reranking
2053
+ name: MTEB SciDocsRR
2054
+ config: default
2055
+ split: test
2056
+ revision: d3c5e1fc0b855ab6097bf1cda04dd73947d7caab
2057
+ metrics:
2058
+ - type: map
2059
+ value: 82.0019012295875
2060
+ - type: mrr
2061
+ value: 94.70267024188593
2062
+ - task:
2063
+ type: Retrieval
2064
+ dataset:
2065
+ type: scifact
2066
+ name: MTEB SciFact
2067
+ config: default
2068
+ split: test
2069
+ revision: None
2070
+ metrics:
2071
+ - type: map_at_1
2072
+ value: 50.05
2073
+ - type: map_at_10
2074
+ value: 59.36
2075
+ - type: map_at_100
2076
+ value: 59.967999999999996
2077
+ - type: map_at_1000
2078
+ value: 60.023
2079
+ - type: map_at_3
2080
+ value: 56.515
2081
+ - type: map_at_5
2082
+ value: 58.272999999999996
2083
+ - type: mrr_at_1
2084
+ value: 53
2085
+ - type: mrr_at_10
2086
+ value: 61.102000000000004
2087
+ - type: mrr_at_100
2088
+ value: 61.476
2089
+ - type: mrr_at_1000
2090
+ value: 61.523
2091
+ - type: mrr_at_3
2092
+ value: 58.778
2093
+ - type: mrr_at_5
2094
+ value: 60.128
2095
+ - type: ndcg_at_1
2096
+ value: 53
2097
+ - type: ndcg_at_10
2098
+ value: 64.43100000000001
2099
+ - type: ndcg_at_100
2100
+ value: 66.73599999999999
2101
+ - type: ndcg_at_1000
2102
+ value: 68.027
2103
+ - type: ndcg_at_3
2104
+ value: 59.279
2105
+ - type: ndcg_at_5
2106
+ value: 61.888
2107
+ - type: precision_at_1
2108
+ value: 53
2109
+ - type: precision_at_10
2110
+ value: 8.767
2111
+ - type: precision_at_100
2112
+ value: 1.01
2113
+ - type: precision_at_1000
2114
+ value: 0.11100000000000002
2115
+ - type: precision_at_3
2116
+ value: 23.444000000000003
2117
+ - type: precision_at_5
2118
+ value: 15.667
2119
+ - type: recall_at_1
2120
+ value: 50.05
2121
+ - type: recall_at_10
2122
+ value: 78.511
2123
+ - type: recall_at_100
2124
+ value: 88.5
2125
+ - type: recall_at_1000
2126
+ value: 98.333
2127
+ - type: recall_at_3
2128
+ value: 64.117
2129
+ - type: recall_at_5
2130
+ value: 70.867
2131
+ - task:
2132
+ type: PairClassification
2133
+ dataset:
2134
+ type: mteb/sprintduplicatequestions-pairclassification
2135
+ name: MTEB SprintDuplicateQuestions
2136
+ config: default
2137
+ split: test
2138
+ revision: d66bd1f72af766a5cc4b0ca5e00c162f89e8cc46
2139
+ metrics:
2140
+ - type: cos_sim_accuracy
2141
+ value: 99.72178217821782
2142
+ - type: cos_sim_ap
2143
+ value: 93.0728601593541
2144
+ - type: cos_sim_f1
2145
+ value: 85.6727976766699
2146
+ - type: cos_sim_precision
2147
+ value: 83.02063789868667
2148
+ - type: cos_sim_recall
2149
+ value: 88.5
2150
+ - type: dot_accuracy
2151
+ value: 99.72178217821782
2152
+ - type: dot_ap
2153
+ value: 93.07287396168348
2154
+ - type: dot_f1
2155
+ value: 85.6727976766699
2156
+ - type: dot_precision
2157
+ value: 83.02063789868667
2158
+ - type: dot_recall
2159
+ value: 88.5
2160
+ - type: euclidean_accuracy
2161
+ value: 99.72178217821782
2162
+ - type: euclidean_ap
2163
+ value: 93.07285657982895
2164
+ - type: euclidean_f1
2165
+ value: 85.6727976766699
2166
+ - type: euclidean_precision
2167
+ value: 83.02063789868667
2168
+ - type: euclidean_recall
2169
+ value: 88.5
2170
+ - type: manhattan_accuracy
2171
+ value: 99.72475247524753
2172
+ - type: manhattan_ap
2173
+ value: 93.02792973059809
2174
+ - type: manhattan_f1
2175
+ value: 85.7727737973388
2176
+ - type: manhattan_precision
2177
+ value: 87.84067085953879
2178
+ - type: manhattan_recall
2179
+ value: 83.8
2180
+ - type: max_accuracy
2181
+ value: 99.72475247524753
2182
+ - type: max_ap
2183
+ value: 93.07287396168348
2184
+ - type: max_f1
2185
+ value: 85.7727737973388
2186
+ - task:
2187
+ type: Clustering
2188
+ dataset:
2189
+ type: mteb/stackexchange-clustering
2190
+ name: MTEB StackExchangeClustering
2191
+ config: default
2192
+ split: test
2193
+ revision: 6cbc1f7b2bc0622f2e39d2c77fa502909748c259
2194
+ metrics:
2195
+ - type: v_measure
2196
+ value: 68.77583615550819
2197
+ - task:
2198
+ type: Clustering
2199
+ dataset:
2200
+ type: mteb/stackexchange-clustering-p2p
2201
+ name: MTEB StackExchangeClusteringP2P
2202
+ config: default
2203
+ split: test
2204
+ revision: 815ca46b2622cec33ccafc3735d572c266efdb44
2205
+ metrics:
2206
+ - type: v_measure
2207
+ value: 36.151636938606956
2208
+ - task:
2209
+ type: Reranking
2210
+ dataset:
2211
+ type: mteb/stackoverflowdupquestions-reranking
2212
+ name: MTEB StackOverflowDupQuestions
2213
+ config: default
2214
+ split: test
2215
+ revision: e185fbe320c72810689fc5848eb6114e1ef5ec69
2216
+ metrics:
2217
+ - type: map
2218
+ value: 52.16607939471187
2219
+ - type: mrr
2220
+ value: 52.95172046091163
2221
+ - task:
2222
+ type: Summarization
2223
+ dataset:
2224
+ type: mteb/summeval
2225
+ name: MTEB SummEval
2226
+ config: default
2227
+ split: test
2228
+ revision: cda12ad7615edc362dbf25a00fdd61d3b1eaf93c
2229
+ metrics:
2230
+ - type: cos_sim_pearson
2231
+ value: 31.314646669495666
2232
+ - type: cos_sim_spearman
2233
+ value: 31.83562491439455
2234
+ - type: dot_pearson
2235
+ value: 31.314590842874157
2236
+ - type: dot_spearman
2237
+ value: 31.83363065810437
2238
+ - task:
2239
+ type: Retrieval
2240
+ dataset:
2241
+ type: trec-covid
2242
+ name: MTEB TRECCOVID
2243
+ config: default
2244
+ split: test
2245
+ revision: None
2246
+ metrics:
2247
+ - type: map_at_1
2248
+ value: 0.198
2249
+ - type: map_at_10
2250
+ value: 1.3010000000000002
2251
+ - type: map_at_100
2252
+ value: 7.2139999999999995
2253
+ - type: map_at_1000
2254
+ value: 20.179
2255
+ - type: map_at_3
2256
+ value: 0.528
2257
+ - type: map_at_5
2258
+ value: 0.8019999999999999
2259
+ - type: mrr_at_1
2260
+ value: 72
2261
+ - type: mrr_at_10
2262
+ value: 83.39999999999999
2263
+ - type: mrr_at_100
2264
+ value: 83.39999999999999
2265
+ - type: mrr_at_1000
2266
+ value: 83.39999999999999
2267
+ - type: mrr_at_3
2268
+ value: 81.667
2269
+ - type: mrr_at_5
2270
+ value: 83.06700000000001
2271
+ - type: ndcg_at_1
2272
+ value: 66
2273
+ - type: ndcg_at_10
2274
+ value: 58.059000000000005
2275
+ - type: ndcg_at_100
2276
+ value: 44.316
2277
+ - type: ndcg_at_1000
2278
+ value: 43.147000000000006
2279
+ - type: ndcg_at_3
2280
+ value: 63.815999999999995
2281
+ - type: ndcg_at_5
2282
+ value: 63.005
2283
+ - type: precision_at_1
2284
+ value: 72
2285
+ - type: precision_at_10
2286
+ value: 61.4
2287
+ - type: precision_at_100
2288
+ value: 45.62
2289
+ - type: precision_at_1000
2290
+ value: 19.866
2291
+ - type: precision_at_3
2292
+ value: 70
2293
+ - type: precision_at_5
2294
+ value: 68.8
2295
+ - type: recall_at_1
2296
+ value: 0.198
2297
+ - type: recall_at_10
2298
+ value: 1.517
2299
+ - type: recall_at_100
2300
+ value: 10.587
2301
+ - type: recall_at_1000
2302
+ value: 41.233
2303
+ - type: recall_at_3
2304
+ value: 0.573
2305
+ - type: recall_at_5
2306
+ value: 0.907
2307
+ - task:
2308
+ type: Retrieval
2309
+ dataset:
2310
+ type: webis-touche2020
2311
+ name: MTEB Touche2020
2312
+ config: default
2313
+ split: test
2314
+ revision: None
2315
+ metrics:
2316
+ - type: map_at_1
2317
+ value: 1.894
2318
+ - type: map_at_10
2319
+ value: 8.488999999999999
2320
+ - type: map_at_100
2321
+ value: 14.445
2322
+ - type: map_at_1000
2323
+ value: 16.078
2324
+ - type: map_at_3
2325
+ value: 4.589
2326
+ - type: map_at_5
2327
+ value: 6.019
2328
+ - type: mrr_at_1
2329
+ value: 22.448999999999998
2330
+ - type: mrr_at_10
2331
+ value: 39.82
2332
+ - type: mrr_at_100
2333
+ value: 40.752
2334
+ - type: mrr_at_1000
2335
+ value: 40.771
2336
+ - type: mrr_at_3
2337
+ value: 34.354
2338
+ - type: mrr_at_5
2339
+ value: 37.721
2340
+ - type: ndcg_at_1
2341
+ value: 19.387999999999998
2342
+ - type: ndcg_at_10
2343
+ value: 21.563
2344
+ - type: ndcg_at_100
2345
+ value: 33.857
2346
+ - type: ndcg_at_1000
2347
+ value: 46.199
2348
+ - type: ndcg_at_3
2349
+ value: 22.296
2350
+ - type: ndcg_at_5
2351
+ value: 21.770999999999997
2352
+ - type: precision_at_1
2353
+ value: 22.448999999999998
2354
+ - type: precision_at_10
2355
+ value: 19.796
2356
+ - type: precision_at_100
2357
+ value: 7.142999999999999
2358
+ - type: precision_at_1000
2359
+ value: 1.541
2360
+ - type: precision_at_3
2361
+ value: 24.490000000000002
2362
+ - type: precision_at_5
2363
+ value: 22.448999999999998
2364
+ - type: recall_at_1
2365
+ value: 1.894
2366
+ - type: recall_at_10
2367
+ value: 14.931
2368
+ - type: recall_at_100
2369
+ value: 45.524
2370
+ - type: recall_at_1000
2371
+ value: 83.243
2372
+ - type: recall_at_3
2373
+ value: 5.712
2374
+ - type: recall_at_5
2375
+ value: 8.386000000000001
2376
+ - task:
2377
+ type: Classification
2378
+ dataset:
2379
+ type: mteb/toxic_conversations_50k
2380
+ name: MTEB ToxicConversationsClassification
2381
+ config: default
2382
+ split: test
2383
+ revision: d7c0de2777da35d6aae2200a62c6e0e5af397c4c
2384
+ metrics:
2385
+ - type: accuracy
2386
+ value: 71.049
2387
+ - type: ap
2388
+ value: 13.85116971310922
2389
+ - type: f1
2390
+ value: 54.37504302487686
2391
+ - task:
2392
+ type: Classification
2393
+ dataset:
2394
+ type: mteb/tweet_sentiment_extraction
2395
+ name: MTEB TweetSentimentExtractionClassification
2396
+ config: default
2397
+ split: test
2398
+ revision: d604517c81ca91fe16a244d1248fc021f9ecee7a
2399
+ metrics:
2400
+ - type: accuracy
2401
+ value: 64.1312959818902
2402
+ - type: f1
2403
+ value: 64.11413877009383
2404
+ - task:
2405
+ type: Clustering
2406
+ dataset:
2407
+ type: mteb/twentynewsgroups-clustering
2408
+ name: MTEB TwentyNewsgroupsClustering
2409
+ config: default
2410
+ split: test
2411
+ revision: 6125ec4e24fa026cec8a478383ee943acfbd5449
2412
+ metrics:
2413
+ - type: v_measure
2414
+ value: 54.13103431861502
2415
+ - task:
2416
+ type: PairClassification
2417
+ dataset:
2418
+ type: mteb/twittersemeval2015-pairclassification
2419
+ name: MTEB TwitterSemEval2015
2420
+ config: default
2421
+ split: test
2422
+ revision: 70970daeab8776df92f5ea462b6173c0b46fd2d1
2423
+ metrics:
2424
+ - type: cos_sim_accuracy
2425
+ value: 87.327889372355
2426
+ - type: cos_sim_ap
2427
+ value: 77.42059895975699
2428
+ - type: cos_sim_f1
2429
+ value: 71.02706903250873
2430
+ - type: cos_sim_precision
2431
+ value: 69.75324344950394
2432
+ - type: cos_sim_recall
2433
+ value: 72.34828496042216
2434
+ - type: dot_accuracy
2435
+ value: 87.327889372355
2436
+ - type: dot_ap
2437
+ value: 77.4209479346677
2438
+ - type: dot_f1
2439
+ value: 71.02706903250873
2440
+ - type: dot_precision
2441
+ value: 69.75324344950394
2442
+ - type: dot_recall
2443
+ value: 72.34828496042216
2444
+ - type: euclidean_accuracy
2445
+ value: 87.327889372355
2446
+ - type: euclidean_ap
2447
+ value: 77.42096495861037
2448
+ - type: euclidean_f1
2449
+ value: 71.02706903250873
2450
+ - type: euclidean_precision
2451
+ value: 69.75324344950394
2452
+ - type: euclidean_recall
2453
+ value: 72.34828496042216
2454
+ - type: manhattan_accuracy
2455
+ value: 87.31000774870358
2456
+ - type: manhattan_ap
2457
+ value: 77.38930750711619
2458
+ - type: manhattan_f1
2459
+ value: 71.07935314027831
2460
+ - type: manhattan_precision
2461
+ value: 67.70957726295677
2462
+ - type: manhattan_recall
2463
+ value: 74.80211081794195
2464
+ - type: max_accuracy
2465
+ value: 87.327889372355
2466
+ - type: max_ap
2467
+ value: 77.42096495861037
2468
+ - type: max_f1
2469
+ value: 71.07935314027831
2470
+ - task:
2471
+ type: PairClassification
2472
+ dataset:
2473
+ type: mteb/twitterurlcorpus-pairclassification
2474
+ name: MTEB TwitterURLCorpus
2475
+ config: default
2476
+ split: test
2477
+ revision: 8b6510b0b1fa4e4c4f879467980e9be563ec1cdf
2478
+ metrics:
2479
+ - type: cos_sim_accuracy
2480
+ value: 89.58939729110878
2481
+ - type: cos_sim_ap
2482
+ value: 87.17594155025475
2483
+ - type: cos_sim_f1
2484
+ value: 79.21146953405018
2485
+ - type: cos_sim_precision
2486
+ value: 76.8918527109307
2487
+ - type: cos_sim_recall
2488
+ value: 81.67539267015707
2489
+ - type: dot_accuracy
2490
+ value: 89.58939729110878
2491
+ - type: dot_ap
2492
+ value: 87.17593963273593
2493
+ - type: dot_f1
2494
+ value: 79.21146953405018
2495
+ - type: dot_precision
2496
+ value: 76.8918527109307
2497
+ - type: dot_recall
2498
+ value: 81.67539267015707
2499
+ - type: euclidean_accuracy
2500
+ value: 89.58939729110878
2501
+ - type: euclidean_ap
2502
+ value: 87.17592466925834
2503
+ - type: euclidean_f1
2504
+ value: 79.21146953405018
2505
+ - type: euclidean_precision
2506
+ value: 76.8918527109307
2507
+ - type: euclidean_recall
2508
+ value: 81.67539267015707
2509
+ - type: manhattan_accuracy
2510
+ value: 89.62626615438352
2511
+ - type: manhattan_ap
2512
+ value: 87.16589873161546
2513
+ - type: manhattan_f1
2514
+ value: 79.25143598295348
2515
+ - type: manhattan_precision
2516
+ value: 76.39494177323712
2517
+ - type: manhattan_recall
2518
+ value: 82.32984293193716
2519
+ - type: max_accuracy
2520
+ value: 89.62626615438352
2521
+ - type: max_ap
2522
+ value: 87.17594155025475
2523
+ - type: max_f1
2524
+ value: 79.25143598295348
2525
+ ---
2526
+
2527
+ # hkunlp/instructor-large
2528
+ We introduce **Instructor**👨‍🏫, an instruction-finetuned text embedding model that can generate text embeddings tailored to any task (e.g., classification, retrieval, clustering, text evaluation, etc.) and domains (e.g., science, finance, etc.) ***by simply providing the task instruction, without any finetuning***. Instructor👨‍ achieves sota on 70 diverse embedding tasks ([MTEB leaderboard](https://huggingface.co/spaces/mteb/leaderboard))!
2529
+ The model is easy to use with **our customized** `sentence-transformer` library. For more details, check out [our paper](https://arxiv.org/abs/2212.09741) and [project page](https://instructor-embedding.github.io/)!
2530
+
2531
+ **************************** **Updates** ****************************
2532
+
2533
+ * 12/28: We released a new [checkpoint](https://huggingface.co/hkunlp/instructor-large) trained with hard negatives, which gives better performance.
2534
+ * 12/21: We released our [paper](https://arxiv.org/abs/2212.09741), [code](https://github.com/HKUNLP/instructor-embedding), [checkpoint](https://huggingface.co/hkunlp/instructor-large) and [project page](https://instructor-embedding.github.io/)! Check them out!
2535
+
2536
+ ## Quick start
2537
+ <hr />
2538
+
2539
+ ## Installation
2540
+ ```bash
2541
+ pip install InstructorEmbedding
2542
+ ```
2543
+
2544
+ ## Compute your customized embeddings
2545
+ Then you can use the model like this to calculate domain-specific and task-aware embeddings:
2546
+ ```python
2547
+ from InstructorEmbedding import INSTRUCTOR
2548
+ model = INSTRUCTOR('hkunlp/instructor-large')
2549
+ sentence = "3D ActionSLAM: wearable person tracking in multi-floor environments"
2550
+ instruction = "Represent the Science title:"
2551
+ embeddings = model.encode([[instruction,sentence]])
2552
+ print(embeddings)
2553
+ ```
2554
+
2555
+ ## Use cases
2556
+ <hr />
2557
+
2558
+ ## Calculate embeddings for your customized texts
2559
+ If you want to calculate customized embeddings for specific sentences, you may follow the unified template to write instructions:
2560
+
2561
+ &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Represent the `domain` `text_type` for `task_objective`:
2562
+ * `domain` is optional, and it specifies the domain of the text, e.g., science, finance, medicine, etc.
2563
+ * `text_type` is required, and it specifies the encoding unit, e.g., sentence, document, paragraph, etc.
2564
+ * `task_objective` is optional, and it specifies the objective of embedding, e.g., retrieve a document, classify the sentence, etc.
2565
+
2566
+ ## Calculate Sentence similarities
2567
+ You can further use the model to compute similarities between two groups of sentences, with **customized embeddings**.
2568
+ ```python
2569
+ from sklearn.metrics.pairwise import cosine_similarity
2570
+ sentences_a = [['Represent the Science sentence: ','Parton energy loss in QCD matter'],
2571
+ ['Represent the Financial statement: ','The Federal Reserve on Wednesday raised its benchmark interest rate.']]
2572
+ sentences_b = [['Represent the Science sentence: ','The Chiral Phase Transition in Dissipative Dynamics'],
2573
+ ['Represent the Financial statement: ','The funds rose less than 0.5 per cent on Friday']]
2574
+ embeddings_a = model.encode(sentences_a)
2575
+ embeddings_b = model.encode(sentences_b)
2576
+ similarities = cosine_similarity(embeddings_a,embeddings_b)
2577
+ print(similarities)
2578
+ ```
2579
+
2580
+ ## Information Retrieval
2581
+ You can also use **customized embeddings** for information retrieval.
2582
+ ```python
2583
+ import numpy as np
2584
+ from sklearn.metrics.pairwise import cosine_similarity
2585
+ query = [['Represent the Wikipedia question for retrieving supporting documents: ','where is the food stored in a yam plant']]
2586
+ corpus = [['Represent the Wikipedia document for retrieval: ','Capitalism has been dominant in the Western world since the end of feudalism, but most feel[who?] that the term "mixed economies" more precisely describes most contemporary economies, due to their containing both private-owned and state-owned enterprises. In capitalism, prices determine the demand-supply scale. For example, higher demand for certain goods and services lead to higher prices and lower demand for certain goods lead to lower prices.'],
2587
+ ['Represent the Wikipedia document for retrieval: ',"The disparate impact theory is especially controversial under the Fair Housing Act because the Act regulates many activities relating to housing, insurance, and mortgage loans—and some scholars have argued that the theory's use under the Fair Housing Act, combined with extensions of the Community Reinvestment Act, contributed to rise of sub-prime lending and the crash of the U.S. housing market and ensuing global economic recession"],
2588
+ ['Represent the Wikipedia document for retrieval: ','Disparate impact in United States labor law refers to practices in employment, housing, and other areas that adversely affect one group of people of a protected characteristic more than another, even though rules applied by employers or landlords are formally neutral. Although the protected classes vary by statute, most federal civil rights laws protect based on race, color, religion, national origin, and sex as protected traits, and some laws include disability status and other traits as well.']]
2589
+ query_embeddings = model.encode(query)
2590
+ corpus_embeddings = model.encode(corpus)
2591
+ similarities = cosine_similarity(query_embeddings,corpus_embeddings)
2592
+ retrieved_doc_id = np.argmax(similarities)
2593
+ print(retrieved_doc_id)
2594
+ ```
2595
+
2596
+ ## Clustering
2597
+ Use **customized embeddings** for clustering texts in groups.
2598
+ ```python
2599
+ import sklearn.cluster
2600
+ sentences = [['Represent the Medicine sentence for clustering: ','Dynamical Scalar Degree of Freedom in Horava-Lifshitz Gravity'],
2601
+ ['Represent the Medicine sentence for clustering: ','Comparison of Atmospheric Neutrino Flux Calculations at Low Energies'],
2602
+ ['Represent the Medicine sentence for clustering: ','Fermion Bags in the Massive Gross-Neveu Model'],
2603
+ ['Represent the Medicine sentence for clustering: ',"QCD corrections to Associated t-tbar-H production at the Tevatron"],
2604
+ ['Represent the Medicine sentence for clustering: ','A New Analysis of the R Measurements: Resonance Parameters of the Higher, Vector States of Charmonium']]
2605
+ embeddings = model.encode(sentences)
2606
+ clustering_model = sklearn.cluster.MiniBatchKMeans(n_clusters=2)
2607
+ clustering_model.fit(embeddings)
2608
+ cluster_assignment = clustering_model.labels_
2609
+ print(cluster_assignment)
2610
+ ```
instructor-large/config.json ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "/scratch/acd13578qu/metatrain_models/enhanced_large/checkpoint-300/",
3
+ "architectures": [
4
+ "T5EncoderModel"
5
+ ],
6
+ "d_ff": 4096,
7
+ "d_kv": 64,
8
+ "d_model": 1024,
9
+ "decoder_start_token_id": 0,
10
+ "dense_act_fn": "relu",
11
+ "dropout_rate": 0.1,
12
+ "eos_token_id": 1,
13
+ "feed_forward_proj": "relu",
14
+ "initializer_factor": 1.0,
15
+ "is_encoder_decoder": true,
16
+ "is_gated_act": false,
17
+ "layer_norm_epsilon": 1e-06,
18
+ "model_type": "t5",
19
+ "n_positions": 512,
20
+ "num_decoder_layers": 24,
21
+ "num_heads": 16,
22
+ "num_layers": 24,
23
+ "output_past": true,
24
+ "pad_token_id": 0,
25
+ "relative_attention_max_distance": 128,
26
+ "relative_attention_num_buckets": 32,
27
+ "task_specific_params": {
28
+ "summarization": {
29
+ "early_stopping": true,
30
+ "length_penalty": 2.0,
31
+ "max_length": 200,
32
+ "min_length": 30,
33
+ "no_repeat_ngram_size": 3,
34
+ "num_beams": 4,
35
+ "prefix": "summarize: "
36
+ },
37
+ "translation_en_to_de": {
38
+ "early_stopping": true,
39
+ "max_length": 300,
40
+ "num_beams": 4,
41
+ "prefix": "translate English to German: "
42
+ },
43
+ "translation_en_to_fr": {
44
+ "early_stopping": true,
45
+ "max_length": 300,
46
+ "num_beams": 4,
47
+ "prefix": "translate English to French: "
48
+ },
49
+ "translation_en_to_ro": {
50
+ "early_stopping": true,
51
+ "max_length": 300,
52
+ "num_beams": 4,
53
+ "prefix": "translate English to Romanian: "
54
+ }
55
+ },
56
+ "torch_dtype": "float32",
57
+ "transformers_version": "4.20.0.dev0",
58
+ "use_cache": true,
59
+ "vocab_size": 32128
60
+ }
instructor-large/config_sentence_transformers.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "__version__": {
3
+ "sentence_transformers": "2.2.0",
4
+ "transformers": "4.7.0",
5
+ "pytorch": "1.9.0+cu102"
6
+ }
7
+ }
instructor-large/modules.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "idx": 0,
4
+ "name": "0",
5
+ "path": "",
6
+ "type": "sentence_transformers.models.Transformer"
7
+ },
8
+ {
9
+ "idx": 1,
10
+ "name": "1",
11
+ "path": "1_Pooling",
12
+ "type": "sentence_transformers.models.Pooling"
13
+ },
14
+ {
15
+ "idx": 2,
16
+ "name": "2",
17
+ "path": "2_Dense",
18
+ "type": "sentence_transformers.models.Dense"
19
+ },
20
+ {
21
+ "idx": 3,
22
+ "name": "3",
23
+ "path": "3_Normalize",
24
+ "type": "sentence_transformers.models.Normalize"
25
+ }
26
+ ]
instructor-large/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fb7f63752cf10103be938bc50dfad0b6fa1e63bc67b963471fc827838d9bbb41
3
+ size 1339823867
instructor-large/sentence_bert_config.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "max_seq_length": 512,
3
+ "do_lower_case": false
4
+ }
instructor-large/special_tokens_map.json ADDED
@@ -0,0 +1,107 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<extra_id_0>",
4
+ "<extra_id_1>",
5
+ "<extra_id_2>",
6
+ "<extra_id_3>",
7
+ "<extra_id_4>",
8
+ "<extra_id_5>",
9
+ "<extra_id_6>",
10
+ "<extra_id_7>",
11
+ "<extra_id_8>",
12
+ "<extra_id_9>",
13
+ "<extra_id_10>",
14
+ "<extra_id_11>",
15
+ "<extra_id_12>",
16
+ "<extra_id_13>",
17
+ "<extra_id_14>",
18
+ "<extra_id_15>",
19
+ "<extra_id_16>",
20
+ "<extra_id_17>",
21
+ "<extra_id_18>",
22
+ "<extra_id_19>",
23
+ "<extra_id_20>",
24
+ "<extra_id_21>",
25
+ "<extra_id_22>",
26
+ "<extra_id_23>",
27
+ "<extra_id_24>",
28
+ "<extra_id_25>",
29
+ "<extra_id_26>",
30
+ "<extra_id_27>",
31
+ "<extra_id_28>",
32
+ "<extra_id_29>",
33
+ "<extra_id_30>",
34
+ "<extra_id_31>",
35
+ "<extra_id_32>",
36
+ "<extra_id_33>",
37
+ "<extra_id_34>",
38
+ "<extra_id_35>",
39
+ "<extra_id_36>",
40
+ "<extra_id_37>",
41
+ "<extra_id_38>",
42
+ "<extra_id_39>",
43
+ "<extra_id_40>",
44
+ "<extra_id_41>",
45
+ "<extra_id_42>",
46
+ "<extra_id_43>",
47
+ "<extra_id_44>",
48
+ "<extra_id_45>",
49
+ "<extra_id_46>",
50
+ "<extra_id_47>",
51
+ "<extra_id_48>",
52
+ "<extra_id_49>",
53
+ "<extra_id_50>",
54
+ "<extra_id_51>",
55
+ "<extra_id_52>",
56
+ "<extra_id_53>",
57
+ "<extra_id_54>",
58
+ "<extra_id_55>",
59
+ "<extra_id_56>",
60
+ "<extra_id_57>",
61
+ "<extra_id_58>",
62
+ "<extra_id_59>",
63
+ "<extra_id_60>",
64
+ "<extra_id_61>",
65
+ "<extra_id_62>",
66
+ "<extra_id_63>",
67
+ "<extra_id_64>",
68
+ "<extra_id_65>",
69
+ "<extra_id_66>",
70
+ "<extra_id_67>",
71
+ "<extra_id_68>",
72
+ "<extra_id_69>",
73
+ "<extra_id_70>",
74
+ "<extra_id_71>",
75
+ "<extra_id_72>",
76
+ "<extra_id_73>",
77
+ "<extra_id_74>",
78
+ "<extra_id_75>",
79
+ "<extra_id_76>",
80
+ "<extra_id_77>",
81
+ "<extra_id_78>",
82
+ "<extra_id_79>",
83
+ "<extra_id_80>",
84
+ "<extra_id_81>",
85
+ "<extra_id_82>",
86
+ "<extra_id_83>",
87
+ "<extra_id_84>",
88
+ "<extra_id_85>",
89
+ "<extra_id_86>",
90
+ "<extra_id_87>",
91
+ "<extra_id_88>",
92
+ "<extra_id_89>",
93
+ "<extra_id_90>",
94
+ "<extra_id_91>",
95
+ "<extra_id_92>",
96
+ "<extra_id_93>",
97
+ "<extra_id_94>",
98
+ "<extra_id_95>",
99
+ "<extra_id_96>",
100
+ "<extra_id_97>",
101
+ "<extra_id_98>",
102
+ "<extra_id_99>"
103
+ ],
104
+ "eos_token": "</s>",
105
+ "pad_token": "<pad>",
106
+ "unk_token": "<unk>"
107
+ }
instructor-large/spiece.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d60acb128cf7b7f2536e8f38a5b18a05535c9e14c7a355904270e15b0945ea86
3
+ size 791656
instructor-large/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
instructor-large/tokenizer_config.json ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<extra_id_0>",
4
+ "<extra_id_1>",
5
+ "<extra_id_2>",
6
+ "<extra_id_3>",
7
+ "<extra_id_4>",
8
+ "<extra_id_5>",
9
+ "<extra_id_6>",
10
+ "<extra_id_7>",
11
+ "<extra_id_8>",
12
+ "<extra_id_9>",
13
+ "<extra_id_10>",
14
+ "<extra_id_11>",
15
+ "<extra_id_12>",
16
+ "<extra_id_13>",
17
+ "<extra_id_14>",
18
+ "<extra_id_15>",
19
+ "<extra_id_16>",
20
+ "<extra_id_17>",
21
+ "<extra_id_18>",
22
+ "<extra_id_19>",
23
+ "<extra_id_20>",
24
+ "<extra_id_21>",
25
+ "<extra_id_22>",
26
+ "<extra_id_23>",
27
+ "<extra_id_24>",
28
+ "<extra_id_25>",
29
+ "<extra_id_26>",
30
+ "<extra_id_27>",
31
+ "<extra_id_28>",
32
+ "<extra_id_29>",
33
+ "<extra_id_30>",
34
+ "<extra_id_31>",
35
+ "<extra_id_32>",
36
+ "<extra_id_33>",
37
+ "<extra_id_34>",
38
+ "<extra_id_35>",
39
+ "<extra_id_36>",
40
+ "<extra_id_37>",
41
+ "<extra_id_38>",
42
+ "<extra_id_39>",
43
+ "<extra_id_40>",
44
+ "<extra_id_41>",
45
+ "<extra_id_42>",
46
+ "<extra_id_43>",
47
+ "<extra_id_44>",
48
+ "<extra_id_45>",
49
+ "<extra_id_46>",
50
+ "<extra_id_47>",
51
+ "<extra_id_48>",
52
+ "<extra_id_49>",
53
+ "<extra_id_50>",
54
+ "<extra_id_51>",
55
+ "<extra_id_52>",
56
+ "<extra_id_53>",
57
+ "<extra_id_54>",
58
+ "<extra_id_55>",
59
+ "<extra_id_56>",
60
+ "<extra_id_57>",
61
+ "<extra_id_58>",
62
+ "<extra_id_59>",
63
+ "<extra_id_60>",
64
+ "<extra_id_61>",
65
+ "<extra_id_62>",
66
+ "<extra_id_63>",
67
+ "<extra_id_64>",
68
+ "<extra_id_65>",
69
+ "<extra_id_66>",
70
+ "<extra_id_67>",
71
+ "<extra_id_68>",
72
+ "<extra_id_69>",
73
+ "<extra_id_70>",
74
+ "<extra_id_71>",
75
+ "<extra_id_72>",
76
+ "<extra_id_73>",
77
+ "<extra_id_74>",
78
+ "<extra_id_75>",
79
+ "<extra_id_76>",
80
+ "<extra_id_77>",
81
+ "<extra_id_78>",
82
+ "<extra_id_79>",
83
+ "<extra_id_80>",
84
+ "<extra_id_81>",
85
+ "<extra_id_82>",
86
+ "<extra_id_83>",
87
+ "<extra_id_84>",
88
+ "<extra_id_85>",
89
+ "<extra_id_86>",
90
+ "<extra_id_87>",
91
+ "<extra_id_88>",
92
+ "<extra_id_89>",
93
+ "<extra_id_90>",
94
+ "<extra_id_91>",
95
+ "<extra_id_92>",
96
+ "<extra_id_93>",
97
+ "<extra_id_94>",
98
+ "<extra_id_95>",
99
+ "<extra_id_96>",
100
+ "<extra_id_97>",
101
+ "<extra_id_98>",
102
+ "<extra_id_99>"
103
+ ],
104
+ "eos_token": "</s>",
105
+ "extra_ids": 100,
106
+ "model_max_length": 512,
107
+ "name_or_path": "/scratch/acd13578qu/metatrain_models/enhanced_large/checkpoint-300",
108
+ "pad_token": "<pad>",
109
+ "special_tokens_map_file": null,
110
+ "tokenizer_class": "T5Tokenizer",
111
+ "unk_token": "<unk>"
112
+ }