update model to query

Browse files

Files changed (16) hide show

document_0_SentenceTransformer/1_Pooling/config.json +0 -10
document_0_SentenceTransformer/README.md +0 -254
document_0_SentenceTransformer/config.json +0 -45
document_0_SentenceTransformer/config_sentence_transformers.json +0 -14
document_0_SentenceTransformer/model.safetensors +0 -3
document_0_SentenceTransformer/modules.json +0 -14
document_0_SentenceTransformer/sentence_bert_config.json +0 -4
document_0_SentenceTransformer/special_tokens_map.json +0 -37
document_0_SentenceTransformer/tokenizer.json +0 -0
document_0_SentenceTransformer/tokenizer_config.json +0 -945
query_0_SentenceTransformer/README.md +0 -147
query_0_SentenceTransformer/config_sentence_transformers.json +0 -14
query_0_SentenceTransformer/model.safetensors +0 -3
query_0_SentenceTransformer/modules.json +0 -14
query_0_SentenceTransformer/tokenizer.json +0 -0
router_config.json +0 -18

document_0_SentenceTransformer/1_Pooling/config.json DELETED Viewed

@@ -1,10 +0,0 @@
-{
-    "word_embedding_dimension": 768,
-    "pooling_mode_cls_token": true,
-    "pooling_mode_mean_tokens": false,
-    "pooling_mode_max_tokens": false,
-    "pooling_mode_mean_sqrt_len_tokens": false,
-    "pooling_mode_weightedmean_tokens": false,
-    "pooling_mode_lasttoken": false,
-    "include_prompt": true
-}

document_0_SentenceTransformer/README.md DELETED Viewed

@@ -1,254 +0,0 @@
----
-license: apache-2.0
-language:
-- en
-base_model:
-- answerdotai/ModernBERT-base
-base_model_relation: finetune
-pipeline_tag: sentence-similarity
-library_name: transformers
-tags:
-- sentence-transformers
-- mteb
-- embedding
-- transformers.js
-- text-embeddings-inference
----
-# gte-modernbert-base
-We are excited to introduce the `gte-modernbert` series of models, which are built upon the latest modernBERT pre-trained encoder-only foundation models. The `gte-modernbert` series models include both text embedding models and rerank models.
-The `gte-modernbert` models demonstrates competitive performance in several text embedding and text retrieval evaluation tasks when compared to similar-scale models from the current open-source community. This includes assessments such as MTEB, LoCO, and COIR evaluation.
-## Model Overview
-- Developed by: Tongyi Lab, Alibaba Group
-- Model Type: Text Embedding
-- Primary Language: English
-- Model Size: 149M
-- Max Input Length: 8192 tokens
-- Output Dimension: 768
-### Model list
-|                                         Models                                         | Language |       Model Type       | Model Size | Max Seq. Length | Dimension | MTEB-en | BEIR | LoCo | CoIR |
-|:--------------------------------------------------------------------------------------:|:--------:|:----------------------:|:----------:|:---------------:|:---------:|:-------:|:----:|:----:|:----:|
-|  [`gte-modernbert-base`](https://huggingface.co/Alibaba-NLP/gte-modernbert-base)   | English  |     text embedding     |    149M    |      8192       |    768    |  64.38  | 55.33 | 87.57 | 79.31 |
-| [`gte-reranker-modernbert-base`](https://huggingface.co/Alibaba-NLP/gte-reranker-modernbert-base)  | English  | text reranker     |    149M    |    8192    |     -     |  - | 56.19 | 90.68 | 79.99 |
-## Usage
-> [!TIP]
-> For `transformers` and `sentence-transformers`, if your GPU supports it, the efficient Flash Attention 2 will be used automatically if you have `flash_attn` installed. It is not mandatory.
->
-> ```bash
-> pip install flash_attn
-> ```
-Use with `transformers`
-```python
-# Requires transformers>=4.48.0
-import torch.nn.functional as F
-from transformers import AutoModel, AutoTokenizer
-input_texts = [
-    "what is the capital of China?",
-    "how to implement quick sort in python?",
-    "Beijing",
-    "sorting algorithms"
-]
-model_path = "Alibaba-NLP/gte-modernbert-base"
-tokenizer = AutoTokenizer.from_pretrained(model_path)
-model = AutoModel.from_pretrained(model_path)
-# Tokenize the input texts
-batch_dict = tokenizer(input_texts, max_length=8192, padding=True, truncation=True, return_tensors='pt')
-outputs = model(**batch_dict)
-embeddings = outputs.last_hidden_state[:, 0]
-# (Optionally) normalize embeddings
-embeddings = F.normalize(embeddings, p=2, dim=1)
-scores = (embeddings[:1] @ embeddings[1:].T) * 100
-print(scores.tolist())
-# [[42.89073944091797, 71.30911254882812, 33.664554595947266]]
-```
-Use with `sentence-transformers`:
-```python
-# Requires transformers>=4.48.0
-from sentence_transformers import SentenceTransformer
-from sentence_transformers.util import cos_sim
-input_texts = [
-    "what is the capital of China?",
-    "how to implement quick sort in python?",
-    "Beijing",
-    "sorting algorithms"
-]
-model = SentenceTransformer("Alibaba-NLP/gte-modernbert-base")
-embeddings = model.encode(input_texts)
-print(embeddings.shape)
-# (4, 768)
-similarities = cos_sim(embeddings[0], embeddings[1:])
-print(similarities)
-# tensor([[0.4289, 0.7131, 0.3366]])
-```
-Use with `transformers.js`:
-```js
-// npm i @huggingface/transformers
-import { pipeline, matmul } from "@huggingface/transformers";
-// Create a feature extraction pipeline
-const extractor = await pipeline(
-  "feature-extraction",
-  "Alibaba-NLP/gte-modernbert-base",
-  { dtype: "fp32" }, // Supported options: "fp32", "fp16", "q8", "q4", "q4f16"
-);
-// Embed queries and documents
-const embeddings = await extractor(
-  [
-    "what is the capital of China?",
-    "how to implement quick sort in python?",
-    "Beijing",
-    "sorting algorithms",
-  ],
-  { pooling: "cls", normalize: true },
-);
-// Compute similarity scores
-const similarities = (await matmul(embeddings.slice([0, 1]), embeddings.slice([1, null]).transpose(1, 0))).mul(100);
-console.log(similarities.tolist()); // [[42.89077377319336, 71.30916595458984, 33.66455841064453]]
-```
-Additionally, you can also deploy `Alibaba-NLP/gte-modernbert-base` with [Text Embeddings Inference (TEI)](https://github.com/huggingface/text-embeddings-inference) as follows:
-- CPU
-```bash
-docker run --platform linux/amd64 \
-  -p 8080:80 \
-  -v $PWD/data:/data \
-  --pull always \
-  ghcr.io/huggingface/text-embeddings-inference:cpu-1.7 \
-  --model-id Alibaba-NLP/gte-modernbert-base
-```
-- GPU
-```bash
-docker run --gpus all \
-  -p 8080:80 \
-  -v $PWD/data:/data \
-  --pull always \
-  ghcr.io/huggingface/text-embeddings-inference:1.7 \
-  --model-id Alibaba-NLP/gte-modernbert-base
-```
-Then you can send requests to the deployed API via the OpenAI-compatible `v1/embeddings` route (more information about the [OpenAI Embeddings API](https://platform.openai.com/docs/api-reference/embeddings)):
-```bash
-curl https://0.0.0.0:8080/v1/embeddings \
-  -H "Content-Type: application/json" \
-  -d '{
-    "input": [
-      "what is the capital of China?",
-      "how to implement quick sort in python?",
-      "Beijing",
-      "sorting algorithms"
-    ],
-    "model": "Alibaba-NLP/gte-modernbert-base",
-    "encoding_format": "float"
-  }'
-```
-## Training Details
-The `gte-modernbert` series of models follows the training scheme of the previous [GTE models](https://huggingface.co/collections/Alibaba-NLP/gte-models-6680f0b13f885cb431e6d469), with the only difference being that the pre-training language model base has been replaced from [GTE-MLM](https://huggingface.co/Alibaba-NLP/gte-en-mlm-base) to [ModernBert](https://huggingface.co/answerdotai/ModernBERT-base). For more training details, please refer to our paper: [mGTE: Generalized Long-Context Text Representation and Reranking Models for Multilingual Text Retrieval](https://aclanthology.org/2024.emnlp-industry.103/)
-## Evaluation
-### MTEB
-The results of other models are retrieved from [MTEB leaderboard](https://huggingface.co/spaces/mteb/leaderboard). Given that all models in the `gte-modernbert` series have a size of less than 1B parameters, we focused exclusively on the results of models under 1B from the MTEB leaderboard.
-|                                            Model Name                                            | Param Size (M) | Dimension | Sequence Length | Average (56) | Class. (12) | Clust. (11) | Pair Class. (3) | Reran. (4) | Retr. (15) |  STS (10)   | Summ. (1) |
-|:------------------------------------------------------------------------------------------------:|:--------------:|:---------:|:---------------:|:------------:|:-----------:|:---:|:---:|:---:|:---:|:-----------:|:--------:|
-|        [mxbai-embed-large-v1](https://huggingface.co/mixedbread-ai/mxbai-embed-large-v1)         |      335       |   1024    |       512       |    64.68     |    75.64    | 46.71 | 87.2 | 60.11 | 54.39 |     85      |   32.71  |
-| [multilingual-e5-large-instruct](https://huggingface.co/intfloat/multilingual-e5-large-instruct) |      560       |   1024    |       514       |    64.41     |    77.56    | 47.1 | 86.19 | 58.58 | 52.47 |    84.78    |   30.39  |
-|                [bge-large-en-v1.5](https://huggingface.co/BAAI/bge-large-en-v1.5)                |      335       |   1024    |       512       |    64.23     |    75.97    | 46.08 | 87.12 | 60.03 | 54.29 |    83.11    |   31.61  |
-|             [gte-base-en-v1.5](https://huggingface.co/Alibaba-NLP/gte-base-en-v1.5)              |      137       |    768    |      8192       |  64.11   |    77.17    | 46.82 | 85.33 | 57.66 | 54.09 |    81.97    |   31.17  |
-|                 [bge-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5)                 |      109       |    768    |       512       |    63.55     |    75.53    | 45.77 | 86.55 | 58.86 | 53.25 |    82.4     |   31.07  |
-|            [gte-large-en-v1.5](https://huggingface.co/Alibaba-NLP/gte-large-en-v1.5)             |      409       |   1024    |      8192       |    65.39     |    77.75    | 47.95 | 84.63 | 58.50 | 57.91 |    81.43    |   30.91  |
-| [modernbert-embed-base](https://huggingface.co/nomic-ai/modernbert-embed-base) |      149       |    768    |      8192       |    62.62     |    74.31    | 44.98 | 83.96 | 56.42 | 52.89 |    81.78    |   31.39  |
-| [nomic-embed-text-v1.5](https://huggingface.co/nomic-ai/nomic-embed-text-v1.5) |                |    768    |      8192       |    62.28     |   	73.55    |	43.93 |	84.61 |	55.78 | 53.01|    81.94    |   30.4   |
-| [gte-multilingual-base](https://huggingface.co/Alibaba-NLP/gte-multilingual-base) |      305       |    768    |       8192      |     61.4     | 70.89 | 44.31 | 84.24 | 57.47 |51.08 |    82.11    |   30.58  |
-| [jina-embeddings-v3](https://huggingface.co/jinaai/jina-embeddings-v3) | 572 |   1024    |      8192  |       65.51 | 82.58 |45.21 |84.01 |58.13 |53.88 | 85.81 |   29.71  |
-| [**gte-modernbert-base**](https://huggingface.co/Alibaba-NLP/gte-modernbert-base) | 149 |   768    |      8192  |   **64.38** | **76.99** | **46.47** | **85.93** | **59.24** | **55.33** | **81.57** | **30.68** |
-### LoCo (Long Document Retrieval)(NDCG@10)
-| Model Name |  Dimension | Sequence Length | Average (5) | QsmsumRetrieval | SummScreenRetrieval | QasperAbastractRetrieval | QasperTitleRetrieval |  GovReportRetrieval |
-|:----:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|
-| [gte-qwen1.5-7b](https://huggingface.co/Alibaba-NLP/gte-qwen1.5-7b) | 4096 | 32768 |  87.57 | 49.37 | 93.10 | 99.67 | 97.54 | 98.21 |
-| [gte-large-v1.5](https://huggingface.co/Alibaba-NLP/gte-large-v1.5) |1024 | 8192 | 86.71 | 44.55 | 92.61 | 99.82 | 97.81 | 98.74 |
-| [gte-base-v1.5](https://huggingface.co/Alibaba-NLP/gte-base-v1.5) | 768 | 8192 | 87.44 | 49.91  | 91.78 | 99.82 | 97.13 | 98.58 |
-| [gte-modernbert-base](https://huggingface.co/Alibaba-NLP/gte-modernbert-base) | 768 | 8192 | 88.88 | 54.45 | 93.00 | 99.82 | 98.03 | 98.70 |
-| [gte-reranker-modernbert-base](https://huggingface.co/Alibaba-NLP/gte-reranker-modernbert-base) | - | 8192 | 90.68 | 70.86 | 94.06 | 99.73 | 99.11 | 89.67 |
-### COIR (Code Retrieval Task)(NDCG@10)
-| Model Name | Dimension | Sequence Length | Average(20) | CodeSearchNet-ccr-go | CodeSearchNet-ccr-java | CodeSearchNet-ccr-javascript | CodeSearchNet-ccr-php | CodeSearchNet-ccr-python | CodeSearchNet-ccr-ruby | CodeSearchNet-go | CodeSearchNet-java | CodeSearchNet-javascript | CodeSearchNet-php | CodeSearchNet-python | CodeSearchNet-ruby | apps | codefeedback-mt | codefeedback-st | codetrans-contest | codetrans-dl | cosqa | stackoverflow-qa | synthetic-text2sql |
-|:----:|:---:|:---:|:---:|:---:| :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: |
-| [gte-modernbert-base](https://huggingface.co/Alibaba-NLP/gte-modernbert-base) | 768 | 8192 | 79.31	| 94.15	| 93.57 |	94.27 |	91.51	| 93.93	| 90.63	| 88.32 |	83.27	| 76.05	| 85.12	| 88.16	| 77.59	| 57.54	| 82.34	| 85.95	| 71.89	 | 35.46	| 43.47	| 91.2	| 61.87 |
-| [gte-reranker-modernbert-base](https://huggingface.co/Alibaba-NLP/gte-reranker-modernbert-base) | - | 8192 | 79.99	| 96.43	| 96.88	| 98.32 | 91.81	| 97.7	| 91.96 |	88.81	| 79.71	| 76.27	| 89.39	| 98.37	| 84.11	| 47.57	| 83.37	| 88.91	| 49.66	| 36.36	| 44.37	| 89.58	| 64.21 |
-### BEIR(NDCG@10)
-| Model Name | Dimension | Sequence Length | Average(15) | ArguAna | ClimateFEVER | CQADupstackAndroidRetrieval | DBPedia | FEVER | FiQA2018 | HotpotQA | MSMARCO | NFCorpus | NQ | QuoraRetrieval | SCIDOCS | SciFact | Touche2020 | TRECCOVID |
-| :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: |
-| [gte-modernbert-base](https://huggingface.co/Alibaba-NLP/gte-modernbert-base) | 768 | 8192 | 55.33 | 72.68 | 37.74 | 42.63 | 41.79 | 91.03 | 48.81 | 69.47 | 40.9 | 36.44 | 57.62 | 88.55 | 21.29 | 77.4 | 21.68 | 81.95 |
-| [gte-reranker-modernbert-base](https://huggingface.co/Alibaba-NLP/gte-reranker-modernbert-base) | - | 8192 | 56.73 | 69.03 | 37.79 | 44.68 | 47.23 | 94.54 | 49.81 | 78.16 | 45.38 | 30.69 | 64.57 | 87.77 | 20.60 | 73.57 | 27.36 | 79.89 |
-## Hiring
-We have open positions for **Research Interns** and **Full-Time Researchers** to join our team at Tongyi Lab.
-We are seeking passionate individuals with expertise in representation learning, LLM-driven information retrieval, Retrieval-Augmented Generation (RAG), and agent-based systems.
-Our team is located in the vibrant cities of **Beijing** and **Hangzhou**.
-If you are driven by curiosity and eager to make a meaningful impact through your work, we would love to hear from you. Please submit your resume along with a brief introduction to <a href="mailto:dingkun.ldk@alibaba-inc.com">dingkun.ldk@alibaba-inc.com</a>.
-## Citation
-If you find our paper or models helpful, feel free to give us a cite.
-```
-@inproceedings{zhang2024mgte,
-  title={mGTE: Generalized Long-Context Text Representation and Reranking Models for Multilingual Text Retrieval},
-  author={Zhang, Xin and Zhang, Yanzhao and Long, Dingkun and Xie, Wen and Dai, Ziqi and Tang, Jialong and Lin, Huan and Yang, Baosong and Xie, Pengjun and Huang, Fei and others},
-  booktitle={Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing: Industry Track},
-  pages={1393--1412},
-  year={2024}
-}
-@article{li2023towards,
-  title={Towards general text embeddings with multi-stage contrastive learning},
-  author={Li, Zehan and Zhang, Xin and Zhang, Yanzhao and Long, Dingkun and Xie, Pengjun and Zhang, Meishan},
-  journal={arXiv preprint arXiv:2308.03281},
-  year={2023}
-}
-```

document_0_SentenceTransformer/config.json DELETED Viewed

@@ -1,45 +0,0 @@
-{
-  "architectures": [
-    "ModernBertModel"
-  ],
-  "attention_bias": false,
-  "attention_dropout": 0.0,
-  "bos_token_id": 50281,
-  "classifier_activation": "gelu",
-  "classifier_bias": false,
-  "classifier_dropout": 0.0,
-  "classifier_pooling": "mean",
-  "cls_token_id": 50281,
-  "decoder_bias": true,
-  "deterministic_flash_attn": false,
-  "dtype": "float32",
-  "embedding_dropout": 0.0,
-  "eos_token_id": 50282,
-  "global_attn_every_n_layers": 3,
-  "global_rope_theta": 160000.0,
-  "gradient_checkpointing": false,
-  "hidden_activation": "gelu",
-  "hidden_size": 768,
-  "initializer_cutoff_factor": 2.0,
-  "initializer_range": 0.02,
-  "intermediate_size": 1152,
-  "layer_norm_eps": 1e-05,
-  "local_attention": 128,
-  "local_rope_theta": 10000.0,
-  "max_position_embeddings": 8192,
-  "mlp_bias": false,
-  "mlp_dropout": 0.0,
-  "model_type": "modernbert",
-  "norm_bias": false,
-  "norm_eps": 1e-05,
-  "num_attention_heads": 12,
-  "num_hidden_layers": 22,
-  "pad_token_id": 50283,
-  "position_embedding_type": "absolute",
-  "repad_logits_with_grad": false,
-  "sep_token_id": 50282,
-  "sparse_pred_ignore_index": -100,
-  "sparse_prediction": false,
-  "transformers_version": "4.56.2",
-  "vocab_size": 50368
-}

document_0_SentenceTransformer/config_sentence_transformers.json DELETED Viewed

@@ -1,14 +0,0 @@
-{
-  "__version__": {
-    "sentence_transformers": "5.1.1",
-    "transformers": "4.56.2",
-    "pytorch": "2.8.0"
-  },
-  "prompts": {
-    "query": "",
-    "document": ""
-  },
-  "default_prompt_name": null,
-  "similarity_fn_name": "cosine",
-  "model_type": "SentenceTransformer"
-}

document_0_SentenceTransformer/model.safetensors DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:0f9247027e7d57e8b36440b5b3d10a785ded92c7c9f4a313ff7f54a549967290
-size 596070136

document_0_SentenceTransformer/modules.json DELETED Viewed

@@ -1,14 +0,0 @@
-[
-  {
-    "idx": 0,
-    "name": "0",
-    "path": "",
-    "type": "sentence_transformers.models.Transformer"
-  },
-  {
-    "idx": 1,
-    "name": "1",
-    "path": "1_Pooling",
-    "type": "sentence_transformers.models.Pooling"
-  }
-]

document_0_SentenceTransformer/sentence_bert_config.json DELETED Viewed

@@ -1,4 +0,0 @@
-{
-    "max_seq_length": 8192,
-    "do_lower_case": false
-}

document_0_SentenceTransformer/special_tokens_map.json DELETED Viewed

@@ -1,37 +0,0 @@
-{
-  "cls_token": {
-    "content": "[CLS]",
-    "lstrip": false,
-    "normalized": false,
-    "rstrip": false,
-    "single_word": false
-  },
-  "mask_token": {
-    "content": "[MASK]",
-    "lstrip": true,
-    "normalized": false,
-    "rstrip": false,
-    "single_word": false
-  },
-  "pad_token": {
-    "content": "[PAD]",
-    "lstrip": false,
-    "normalized": false,
-    "rstrip": false,
-    "single_word": false
-  },
-  "sep_token": {
-    "content": "[SEP]",
-    "lstrip": false,
-    "normalized": false,
-    "rstrip": false,
-    "single_word": false
-  },
-  "unk_token": {
-    "content": "[UNK]",
-    "lstrip": false,
-    "normalized": false,
-    "rstrip": false,
-    "single_word": false
-  }
-}

document_0_SentenceTransformer/tokenizer.json DELETED Viewed

The diff for this file is too large to render. See raw diff

document_0_SentenceTransformer/tokenizer_config.json DELETED Viewed

@@ -1,945 +0,0 @@
-{
-  "added_tokens_decoder": {
-    "0": {
-      "content": "|||IP_ADDRESS|||",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "1": {
-      "content": "<|padding|>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "50254": {
-      "content": "                        ",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "50255": {
-      "content": "                       ",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "50256": {
-      "content": "                      ",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "50257": {
-      "content": "                     ",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "50258": {
-      "content": "                    ",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "50259": {
-      "content": "                   ",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "50260": {
-      "content": "                  ",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "50261": {
-      "content": "                 ",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "50262": {
-      "content": "                ",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "50263": {
-      "content": "               ",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "50264": {
-      "content": "              ",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "50265": {
-      "content": "             ",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "50266": {
-      "content": "            ",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "50267": {
-      "content": "           ",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "50268": {
-      "content": "          ",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "50269": {
-      "content": "         ",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "50270": {
-      "content": "        ",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "50271": {
-      "content": "       ",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "50272": {
-      "content": "      ",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "50273": {
-      "content": "     ",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "50274": {
-      "content": "    ",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "50275": {
-      "content": "   ",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "50276": {
-      "content": "  ",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "50277": {
-      "content": "|||EMAIL_ADDRESS|||",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "50278": {
-      "content": "|||PHONE_NUMBER|||",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "50279": {
-      "content": "<|endoftext|>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "50280": {
-      "content": "[UNK]",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "50281": {
-      "content": "[CLS]",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "50282": {
-      "content": "[SEP]",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "50283": {
-      "content": "[PAD]",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "50284": {
-      "content": "[MASK]",
-      "lstrip": true,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "50285": {
-      "content": "[unused0]",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "50286": {
-      "content": "[unused1]",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "50287": {
-      "content": "[unused2]",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "50288": {
-      "content": "[unused3]",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "50289": {
-      "content": "[unused4]",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "50290": {
-      "content": "[unused5]",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "50291": {
-      "content": "[unused6]",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "50292": {
-      "content": "[unused7]",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "50293": {
-      "content": "[unused8]",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "50294": {
-      "content": "[unused9]",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "50295": {
-      "content": "[unused10]",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "50296": {
-      "content": "[unused11]",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "50297": {
-      "content": "[unused12]",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "50298": {
-      "content": "[unused13]",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "50299": {
-      "content": "[unused14]",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "50300": {
-      "content": "[unused15]",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "50301": {
-      "content": "[unused16]",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "50302": {
-      "content": "[unused17]",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "50303": {
-      "content": "[unused18]",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "50304": {
-      "content": "[unused19]",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "50305": {
-      "content": "[unused20]",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "50306": {
-      "content": "[unused21]",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "50307": {
-      "content": "[unused22]",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "50308": {
-      "content": "[unused23]",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "50309": {
-      "content": "[unused24]",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "50310": {
-      "content": "[unused25]",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "50311": {
-      "content": "[unused26]",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "50312": {
-      "content": "[unused27]",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "50313": {
-      "content": "[unused28]",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "50314": {
-      "content": "[unused29]",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "50315": {
-      "content": "[unused30]",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "50316": {
-      "content": "[unused31]",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "50317": {
-      "content": "[unused32]",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "50318": {
-      "content": "[unused33]",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "50319": {
-      "content": "[unused34]",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "50320": {
-      "content": "[unused35]",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "50321": {
-      "content": "[unused36]",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "50322": {
-      "content": "[unused37]",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "50323": {
-      "content": "[unused38]",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "50324": {
-      "content": "[unused39]",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "50325": {
-      "content": "[unused40]",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "50326": {
-      "content": "[unused41]",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "50327": {
-      "content": "[unused42]",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "50328": {
-      "content": "[unused43]",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "50329": {
-      "content": "[unused44]",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "50330": {
-      "content": "[unused45]",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "50331": {
-      "content": "[unused46]",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "50332": {
-      "content": "[unused47]",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "50333": {
-      "content": "[unused48]",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "50334": {
-      "content": "[unused49]",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "50335": {
-      "content": "[unused50]",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "50336": {
-      "content": "[unused51]",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "50337": {
-      "content": "[unused52]",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "50338": {
-      "content": "[unused53]",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "50339": {
-      "content": "[unused54]",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "50340": {
-      "content": "[unused55]",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "50341": {
-      "content": "[unused56]",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "50342": {
-      "content": "[unused57]",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "50343": {
-      "content": "[unused58]",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "50344": {
-      "content": "[unused59]",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "50345": {
-      "content": "[unused60]",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "50346": {
-      "content": "[unused61]",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "50347": {
-      "content": "[unused62]",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "50348": {
-      "content": "[unused63]",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "50349": {
-      "content": "[unused64]",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "50350": {
-      "content": "[unused65]",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "50351": {
-      "content": "[unused66]",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "50352": {
-      "content": "[unused67]",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "50353": {
-      "content": "[unused68]",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "50354": {
-      "content": "[unused69]",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "50355": {
-      "content": "[unused70]",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "50356": {
-      "content": "[unused71]",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "50357": {
-      "content": "[unused72]",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "50358": {
-      "content": "[unused73]",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "50359": {
-      "content": "[unused74]",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "50360": {
-      "content": "[unused75]",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "50361": {
-      "content": "[unused76]",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "50362": {
-      "content": "[unused77]",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "50363": {
-      "content": "[unused78]",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "50364": {
-      "content": "[unused79]",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "50365": {
-      "content": "[unused80]",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "50366": {
-      "content": "[unused81]",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    },
-    "50367": {
-      "content": "[unused82]",
-      "lstrip": false,
-      "normalized": true,
-      "rstrip": false,
-      "single_word": false,
-      "special": false
-    }
-  },
-  "clean_up_tokenization_spaces": true,
-  "cls_token": "[CLS]",
-  "extra_special_tokens": {},
-  "mask_token": "[MASK]",
-  "model_input_names": [
-    "input_ids",
-    "attention_mask"
-  ],
-  "model_max_length": 1000000000000000019884624838656,
-  "pad_token": "[PAD]",
-  "sep_token": "[SEP]",
-  "tokenizer_class": "PreTrainedTokenizerFast",
-  "unk_token": "[UNK]"
-}

query_0_SentenceTransformer/README.md DELETED Viewed

@@ -1,147 +0,0 @@
----
-tags:
-- sentence-transformers
-- sentence-similarity
-- feature-extraction
-- dense
-- generated_from_trainer
-base_model: Alibaba-NLP/gte-modernbert-base
-pipeline_tag: sentence-similarity
-library_name: sentence-transformers
----
-# SentenceTransformer based on Alibaba-NLP/gte-modernbert-base
-This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [Alibaba-NLP/gte-modernbert-base](https://huggingface.co/Alibaba-NLP/gte-modernbert-base). It maps sentences & paragraphs to a 768-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
-## Model Details
-### Model Description
-- **Model Type:** Sentence Transformer
-- **Base model:** [Alibaba-NLP/gte-modernbert-base](https://huggingface.co/Alibaba-NLP/gte-modernbert-base)
-- **Maximum Sequence Length:** inf tokens
-- **Output Dimensionality:** 768 dimensions
-- **Similarity Function:** Cosine Similarity
-<!-- - **Training Dataset:** Unknown -->
-<!-- - **Language:** Unknown -->
-<!-- - **License:** Unknown -->
-### Model Sources
-- **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
-- **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers)
-- **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
-### Full Model Architecture
-```
-SentenceTransformer(
-  (0): StaticEmbedding(
-    (embedding): EmbeddingBag(100003, 768, mode='mean')
-  )
-  (1): Normalize()
-)
-```
-## Usage
-### Direct Usage (Sentence Transformers)
-First install the Sentence Transformers library:
-```bash
-pip install -U sentence-transformers
-```
-Then you can load this model and run inference.
-```python
-from sentence_transformers import SentenceTransformer
-# Download from the 🤗 Hub
-model = SentenceTransformer("stephantulkens/NIFE-gte-modernbert-base")
-# Run inference
-sentences = [
-    'The weather is lovely today.',
-    "It's so sunny outside!",
-    'He drove to the stadium.',
-]
-embeddings = model.encode(sentences)
-print(embeddings.shape)
-# [3, 768]
-# Get the similarity scores for the embeddings
-similarities = model.similarity(embeddings, embeddings)
-print(similarities)
-# tensor([[1.0000, 0.4225, 0.2490],
-#         [0.4225, 1.0000, 0.3060],
-#         [0.2490, 0.3060, 1.0000]])
-```
-<!--
-### Direct Usage (Transformers)
-<details><summary>Click to see the direct usage in Transformers</summary>
-</details>
--->
-<!--
-### Downstream Usage (Sentence Transformers)
-You can finetune this model on your own dataset.
-<details><summary>Click to expand</summary>
-</details>
--->
-<!--
-### Out-of-Scope Use
-*List how the model may foreseeably be misused and address what users ought not to do with the model.*
--->
-<!--
-## Bias, Risks and Limitations
-*What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
--->
-<!--
-### Recommendations
-*What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
--->
-## Training Details
-### Framework Versions
-- Python: 3.12.8
-- Sentence Transformers: 5.1.1
-- Transformers: 4.56.2
-- PyTorch: 2.8.0
-- Accelerate: 1.10.1
-- Datasets: 4.1.1
-- Tokenizers: 0.22.1
-## Citation
-### BibTeX
-<!--
-## Glossary
-*Clearly define terms in order to be accessible across audiences.*
--->
-<!--
-## Model Card Authors
-*Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
--->
-<!--
-## Model Card Contact
-*Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
--->

query_0_SentenceTransformer/config_sentence_transformers.json DELETED Viewed

@@ -1,14 +0,0 @@
-{
-  "model_type": "SentenceTransformer",
-  "__version__": {
-    "sentence_transformers": "5.1.1",
-    "transformers": "4.56.2",
-    "pytorch": "2.8.0"
-  },
-  "prompts": {
-    "query": "",
-    "document": ""
-  },
-  "default_prompt_name": null,
-  "similarity_fn_name": "cosine"
-}

query_0_SentenceTransformer/model.safetensors DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:bd3aabaef0433c9b51e538dfdbbc8da720dbe79c96650beae4ce0f68001ac81f
-size 307209312

query_0_SentenceTransformer/modules.json DELETED Viewed

@@ -1,14 +0,0 @@
-[
-  {
-    "idx": 0,
-    "name": "0",
-    "path": "",
-    "type": "sentence_transformers.models.StaticEmbedding"
-  },
-  {
-    "idx": 1,
-    "name": "1",
-    "path": "1_Normalize",
-    "type": "sentence_transformers.models.Normalize"
-  }
-]

query_0_SentenceTransformer/tokenizer.json DELETED Viewed

The diff for this file is too large to render. See raw diff

router_config.json DELETED Viewed

@@ -1,18 +0,0 @@
-{
-    "types": {
-        "query_0_SentenceTransformer": "sentence_transformers.SentenceTransformer.SentenceTransformer",
-        "document_0_SentenceTransformer": "sentence_transformers.SentenceTransformer.SentenceTransformer"
-    },
-    "structure": {
-        "query": [
-            "query_0_SentenceTransformer"
-        ],
-        "document": [
-            "document_0_SentenceTransformer"
-        ]
-    },
-    "parameters": {
-        "default_route": "document",
-        "allow_empty_key": true
-    }
-}