FINGU-AI commited on Jul 22

Commit

fa038e6

•

1 Parent(s): 90b08e0

Upload folder using huggingface_hub

Browse files

Files changed (23) hide show

1_Pooling/config.json +10 -0
2_Dense/config.json +1 -0
2_Dense/model.safetensors +3 -0
README.md +437 -3
added_tokens.json +5 -0
config.json +33 -0
config_sentence_transformers.json +13 -0
merges.txt +0 -0
model.safetensors +3 -0
modules.json +20 -0
optimizer.pt +3 -0
rng_state_0.pth +3 -0
rng_state_1.pth +3 -0
rng_state_2.pth +3 -0
rng_state_3.pth +3 -0
scheduler.pt +3 -0
sentence_bert_config.json +4 -0
special_tokens_map.json +20 -0
tokenizer.json +0 -0
tokenizer_config.json +50 -0
trainer_state.json +63 -0
training_args.bin +3 -0
vocab.json +0 -0

1_Pooling/config.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+  "word_embedding_dimension": 1536,
+  "pooling_mode_cls_token": false,
+  "pooling_mode_mean_tokens": true,
+  "pooling_mode_max_tokens": false,
+  "pooling_mode_mean_sqrt_len_tokens": false,
+  "pooling_mode_weightedmean_tokens": false,
+  "pooling_mode_lasttoken": false,
+  "include_prompt": true
+}

2_Dense/config.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"in_features": 1536, "out_features": 1024, "bias": true, "activation_function": "torch.nn.modules.linear.Identity"}

2_Dense/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:32700832aa24ced2f7bd6208eb3f39989029ad7c61a4dc93ebf4acf2af6a34ba
+size 6295712

README.md CHANGED Viewed

@@ -1,3 +1,437 @@
----
-license: apache-2.0
----

+---
+base_model: dunzhang/stella_en_1.5B_v5
+datasets: []
+language: []
+library_name: sentence-transformers
+pipeline_tag: sentence-similarity
+tags:
+- sentence-transformers
+- sentence-similarity
+- feature-extraction
+- generated_from_trainer
+- dataset_size:99000
+- loss:MultipleNegativesSymmetricRankingLoss
+widget:
+- source_sentence: 'Instruct: Given a web search query, retrieve relevant passages
+    that answer the query.
+    Query: Glay'
+  sentences:
+  - The Theory of Good and Evil is a 1907 book about ethics by the English philosopher
+    Hastings Rashdall, in which the author expounds a theory he calls "ideal utilitarianism".
+    It has been seen as Rashdall's most important philosophical work.
+  - GLAY is a Japanese rock band , formed in Hakodate in 1988 . Glay primarily composes
+    songs in the rock and pop genres , but they have also arranged songs using elements
+    from a wide variety of genres , including punk , electronic , R&B , progressive
+    rock , folk , reggae , gospel , and ska . Originally a visual kei band , the group
+    slowly shifted to less dramatic attire through the years . As of 2008 , Glay had
+    sold an estimated 51 million records ; 28 million singles and 23 million albums
+    , making them one of the top ten best-selling artists of all time in Japan .
+  - Aashirwad is a 1968 Bollywood film , directed by Hrishikesh Mukherjee . The film
+    stars Ashok Kumar and Sanjeev Kumar .   The film is notable for its inclusion
+    of a rap-like song performed by Ashok Kumar , `` Rail Gaadi '' .
+- source_sentence: 'Instruct: Given a web search query, retrieve relevant passages
+    that answer the query.
+    Query: Indexing does not work with index package'
+  sentences:
+  - 'I am trying to do indexing with the following code:               \documentclass[a4paper]{article}     \usepackage{index}     \makeindex     \newindex{aut}{adx}{and}{Name
+    Index}     \begin{document}     Hellow \index[aut]{FiRST}     \printindex[aut]     \end{document}      Acccording
+    to documention of the `index` package it should work. But makeindex creates empty
+    `.idx` and `.ind`. If I run code like this:               \documentclass[a4paper]{article}     \usepackage{index}     \makeindex     \begin{document}      Hellow
+    \index{FiRST}     \printindex     \end{document}      It runs. But I need to have
+    user-defined index. Please help me with it. I''ve searched for several hours on
+    internet, but without success.'
+  - 'Body materials may include, but are not limited to, any of these materials:'
+  - Berberis aemulans is a shrub endemic to the region of Sichuan in southern China.
+    It grows there in thickets and on slopes at elevations of 2900-3200 m.Berberis
+    aemulans is a deciduous shrub up to 2 m tall, with spines along the branches.
+    Leaves are simple, elliptical to ovate, up to 4 cm long, lighter in color on the
+    underside because of a waxy layer. Flowers are in simple racemes of only a few
+    flowers. Berries egg-shaped, orange, up to 16 mm long.
+- source_sentence: 'Instruct: Given a web search query, retrieve relevant passages
+    that answer the query.
+    Query: Parodi''s hemispingus'
+  sentences:
+  - Another event dubbed a "Battle of the Sexes" took place during the 1998 Australian
+    Open[51] between Karsten Braasch and the Williams sisters. Venus and Serena Williams
+    had claimed that they could beat any male player ranked outside the world's top
+    200, so Braasch, then ranked 203rd, challenged them both. Braasch was described
+    by one journalist as "a man whose training regime centered around a pack of cigarettes
+    and more than a couple bottles of ice cold lager".[52][51] The matches took place
+    on court number 12 in Melbourne Park,[53] after Braasch had finished a round of
+    golf and two shandies. He first took on Serena and after leading 5–0, beat her
+    6–1. Venus then walked on court and again Braasch was victorious, this time winning
+    6–2.[54] Braasch said afterwards, "500 and above, no chance". He added that he
+    had played like someone ranked 600th in order to keep the game "fun".[55] Braasch
+    said the big difference was that men can chase down shots much easier, and that
+    men put spin on the ball that the women can't handle. The Williams sisters adjusted
+    their claim to beating men outside the top 350.[51]
+  - The Parodi 's hemispingus ( Hemispingus parodii ) is a species of bird in the
+    family Thraupidae that is endemic to Peru .   Its natural habitat is subtropical
+    or tropical moist montane forests .
+  - 'I need help because my Minecraft launcher doesn''t work... It''s been a long
+    time I haven''t played Minecraft and until now it worked nicely. But now that
+    I want to play on it again and I run the launcher, this appears (click images
+    to enlarge): ![enter image description here](http://i.stack.imgur.com/hvD9R.png)
+    At the bottom left of the screen the profile names keep loading (normally my username
+    appears in the box) and as you can see I am unable to click on the "Play" button.
+    I tried creating another profile but it doesn''t work because soon after they
+    ask to enter my Minecraft username and password. The password I entered disappears
+    and it keeps loading (I''ve tried waiting like, 30 minutes and it still doesn''t
+    work) so this is definitely not normal. ![enter image description here](http://i.stack.imgur.com/yDYjX.png)
+    ![enter image description here](http://i.stack.imgur.com/4Nf1L.png) ![enter image
+    description here](http://i.stack.imgur.com/T6cJu.png) So basically I can''t play
+    on Minecraft anymore (version 1.7.9)... P.S. I use Windows 7.'
+- source_sentence: 'Instruct: Given a web search query, retrieve relevant passages
+    that answer the query.
+    Query: Mahabharata'
+  sentences:
+  - The epic employs the story within a story structure, otherwise known as frametales,
+    popular in many Indian religious and non-religious works. It is first recited
+    at Takshashila by the sage Vaiśampāyana,[12][13] a disciple of Vyāsa, to the King
+    Janamejaya who is the great-grandson of the Pāṇḍava prince Arjuna. The story is
+    then recited again by a professional storyteller named Ugraśrava Sauti, many years
+    later, to an assemblage of sages performing the 12-year sacrifice for the king
+    Saunaka Kulapati in the Naimiśa Forest.
+  - 'Guncati (Serbian Cyrillic: Гунцати) is a suburban settlement of Belgrade, the
+    capital of Serbia. It is located in the municipality of Barajevo.Guncati is located
+    west of the municipal seat of Barajevo, halfway between the Belgrade-Bar railway
+    and Ibarska magistrala (Highway of Ibar).It is a rural settlement with a steady
+    population growth: from 1,718 (Census 1991) to 2,102 (Census 2002).'
+  - Beck 's Brewery , also known as Brauerei Beck & Co. , is a brewery in the northern
+    German city of Bremen . In 2001 , Interbrew agreed to buy Brauerei Beck for 1.8
+    billion euro ; at that time it was the fourth largest brewer in Germany . US manufacture
+    of Beck 's Brew has been based in St. Louis , Missouri , since early 2012 but
+    some customers have rebelled against the US market version .   Since 2008 , it
+    has been owned by the Interbrew subsidiary of Anheuser-Busch InBev SA/NV .   The
+    Beck 's Art Label Campaign has offered artists the opportunity to provide designs
+    to replace the brand 's label . It started in London in 1987 with Gilbert and
+    George . The artists created an art label , because Beck 's sponsored their retrospective
+    at the Hayward Gallery . The labels of the 2000 limited edition Beck 's bottles
+    were matching their exhibition poster . Other participants of the Art Label Campaign
+    are members of the loose group `` Young British Artists '' and nominees or winners
+    of the Turner Prize . Damien Hirst for example , designed a label for Beck 's
+    in 1995 , showing his famous spots . In 2000 , Tracey Emin created a label , which
+    shows herself , posing in a bathtub . Furthermore , Rachel Whiteread designed
+    a label in 1993 , presenting her artwork `` house '' , which was also financed
+    by Beck 's . The Art Label Campaign has also been parodied by Matthew Higgs ,
+    who is a member of the British art collective `` Bank '' . In the Bank exhibition
+    `` The Charge of the Light Brigade '' in 1995 , he brewed a beer , called `` Kunstlerbrau
+    '' . In 2012 , Beck 's started giving young and independent musicians the opportunity
+    to design a label for the Beck 's bottle . Beck 's summer 2009 limited-edition
+    labels were designed by the musical groups Hard-Fi and Ladyhawke .
+- source_sentence: 'Instruct: Given a web search query, retrieve relevant passages
+    that answer the query.
+    Query: Ahu A Umi Heiau'
+  sentences:
+  - The 1967 All-Ireland Intermediate Hurling Championship was the seventh staging
+    of the All-Ireland hurling championship. The championship ended on 17 September
+    1967.Tipperary were the defending champions, however, they were defeated in the
+    provincial championship. London won the title after defeating Cork by 1-9 to 1-5
+    in the final.
+  - 'The digit ratio is the ratio of the lengths of different digits or fingers typically
+    measured from the midpoint of bottom crease ( where the finger joins the hand
+    ) to the tip of the finger . It has been suggested by some scientists that the
+    ratio of two digits in particular , the 2nd ( index finger ) and 4th ( ring finger
+    ) , is affected by exposure to androgens , e.g. , testosterone while in the uterus
+    and that this 2D :4 D ratio can be considered a crude measure for prenatal androgen
+    exposure , with lower 2D :4 D ratios pointing to higher prenatal androgen exposure
+    . The 2D :4 D ratio is calculated by dividing the length of the index finger of
+    a given hand by the length of the ring finger of the same hand . A longer index
+    finger will result in a ratio higher than 1 , while a longer ring finger will
+    result in a ratio lower than 1 .   The 2D :4 D digit ratio is sexually dimorphic
+    : although the second digit is typically shorter in both females and males , the
+    difference between the lengths of the two digits is greater in males than in females
+    .   A number of studies have shown a correlation between the 2D :4 D digit ratio
+    and various physical and behavioral traits .'
+  - Ahu A ʻ Umi Heiau means "shrine at the temple of ʻ Umi" in the Hawaiian Language.
+---
+# SentenceTransformer based on dunzhang/stella_en_1.5B_v5
+This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [dunzhang/stella_en_1.5B_v5](https://huggingface.co/dunzhang/stella_en_1.5B_v5). It maps sentences & paragraphs to a 1024-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
+## Model Details
+### Model Description
+- **Model Type:** Sentence Transformer
+- **Base model:** [dunzhang/stella_en_1.5B_v5](https://huggingface.co/dunzhang/stella_en_1.5B_v5) <!-- at revision 129dc50d3ca5f0f5ee0ce8944f65a8553c0f26e0 -->
+- **Maximum Sequence Length:** 8096 tokens
+- **Output Dimensionality:** 1024 tokens
+- **Similarity Function:** Cosine Similarity
+<!-- - **Training Dataset:** Unknown -->
+<!-- - **Language:** Unknown -->
+<!-- - **License:** Unknown -->
+### Model Sources
+- **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
+- **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers)
+- **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
+### Full Model Architecture
+```
+SentenceTransformer(
+  (0): Transformer({'max_seq_length': 8096, 'do_lower_case': False}) with Transformer model: Qwen2Model
+  (1): Pooling({'word_embedding_dimension': 1536, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
+  (2): Dense({'in_features': 1536, 'out_features': 1024, 'bias': True, 'activation_function': 'torch.nn.modules.linear.Identity'})
+)
+```
+## Usage
+### Direct Usage (Sentence Transformers)
+First install the Sentence Transformers library:
+```bash
+pip install -U sentence-transformers
+```
+Then you can load this model and run inference.
+```python
+from sentence_transformers import SentenceTransformer
+# Download from the 🤗 Hub
+model = SentenceTransformer("sentence_transformers_model_id")
+# Run inference
+sentences = [
+    'Instruct: Given a web search query, retrieve relevant passages that answer the query.\nQuery: Ahu A Umi Heiau',
+    'Ahu A ʻ Umi Heiau means "shrine at the temple of ʻ Umi" in the Hawaiian Language.',
+    'The digit ratio is the ratio of the lengths of different digits or fingers typically measured from the midpoint of bottom crease ( where the finger joins the hand ) to the tip of the finger . It has been suggested by some scientists that the ratio of two digits in particular , the 2nd ( index finger ) and 4th ( ring finger ) , is affected by exposure to androgens , e.g. , testosterone while in the uterus and that this 2D :4 D ratio can be considered a crude measure for prenatal androgen exposure , with lower 2D :4 D ratios pointing to higher prenatal androgen exposure . The 2D :4 D ratio is calculated by dividing the length of the index finger of a given hand by the length of the ring finger of the same hand . A longer index finger will result in a ratio higher than 1 , while a longer ring finger will result in a ratio lower than 1 .   The 2D :4 D digit ratio is sexually dimorphic : although the second digit is typically shorter in both females and males , the difference between the lengths of the two digits is greater in males than in females .   A number of studies have shown a correlation between the 2D :4 D digit ratio and various physical and behavioral traits .',
+]
+embeddings = model.encode(sentences)
+print(embeddings.shape)
+# [3, 1024]
+# Get the similarity scores for the embeddings
+similarities = model.similarity(embeddings, embeddings)
+print(similarities.shape)
+# [3, 3]
+```
+<!--
+### Direct Usage (Transformers)
+<details><summary>Click to see the direct usage in Transformers</summary>
+</details>
+-->
+<!--
+### Downstream Usage (Sentence Transformers)
+You can finetune this model on your own dataset.
+<details><summary>Click to expand</summary>
+</details>
+-->
+<!--
+### Out-of-Scope Use
+*List how the model may foreseeably be misused and address what users ought not to do with the model.*
+-->
+<!--
+## Bias, Risks and Limitations
+*What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
+-->
+<!--
+### Recommendations
+*What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
+-->
+## Training Details
+### Training Hyperparameters
+#### Non-Default Hyperparameters
+- `eval_strategy`: steps
+- `per_device_eval_batch_size`: 4
+- `gradient_accumulation_steps`: 4
+- `learning_rate`: 2e-05
+- `max_steps`: 1500
+- `lr_scheduler_type`: cosine
+- `warmup_ratio`: 0.1
+- `warmup_steps`: 5
+- `bf16`: True
+- `tf32`: True
+- `optim`: adamw_torch_fused
+- `gradient_checkpointing`: True
+- `gradient_checkpointing_kwargs`: {'use_reentrant': False}
+- `batch_sampler`: no_duplicates
+#### All Hyperparameters
+<details><summary>Click to expand</summary>
+- `overwrite_output_dir`: False
+- `do_predict`: False
+- `eval_strategy`: steps
+- `prediction_loss_only`: True
+- `per_device_train_batch_size`: 8
+- `per_device_eval_batch_size`: 4
+- `per_gpu_train_batch_size`: None
+- `per_gpu_eval_batch_size`: None
+- `gradient_accumulation_steps`: 4
+- `eval_accumulation_steps`: None
+- `learning_rate`: 2e-05
+- `weight_decay`: 0.0
+- `adam_beta1`: 0.9
+- `adam_beta2`: 0.999
+- `adam_epsilon`: 1e-08
+- `max_grad_norm`: 1.0
+- `num_train_epochs`: 3.0
+- `max_steps`: 1500
+- `lr_scheduler_type`: cosine
+- `lr_scheduler_kwargs`: {}
+- `warmup_ratio`: 0.1
+- `warmup_steps`: 5
+- `log_level`: passive
+- `log_level_replica`: warning
+- `log_on_each_node`: True
+- `logging_nan_inf_filter`: True
+- `save_safetensors`: True
+- `save_on_each_node`: False
+- `save_only_model`: False
+- `restore_callback_states_from_checkpoint`: False
+- `no_cuda`: False
+- `use_cpu`: False
+- `use_mps_device`: False
+- `seed`: 42
+- `data_seed`: None
+- `jit_mode_eval`: False
+- `use_ipex`: False
+- `bf16`: True
+- `fp16`: False
+- `fp16_opt_level`: O1
+- `half_precision_backend`: auto
+- `bf16_full_eval`: False
+- `fp16_full_eval`: False
+- `tf32`: True
+- `local_rank`: 0
+- `ddp_backend`: None
+- `tpu_num_cores`: None
+- `tpu_metrics_debug`: False
+- `debug`: []
+- `dataloader_drop_last`: True
+- `dataloader_num_workers`: 0
+- `dataloader_prefetch_factor`: None
+- `past_index`: -1
+- `disable_tqdm`: False
+- `remove_unused_columns`: True
+- `label_names`: None
+- `load_best_model_at_end`: False
+- `ignore_data_skip`: False
+- `fsdp`: []
+- `fsdp_min_num_params`: 0
+- `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
+- `fsdp_transformer_layer_cls_to_wrap`: None
+- `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
+- `deepspeed`: None
+- `label_smoothing_factor`: 0.0
+- `optim`: adamw_torch_fused
+- `optim_args`: None
+- `adafactor`: False
+- `group_by_length`: False
+- `length_column_name`: length
+- `ddp_find_unused_parameters`: None
+- `ddp_bucket_cap_mb`: None
+- `ddp_broadcast_buffers`: False
+- `dataloader_pin_memory`: True
+- `dataloader_persistent_workers`: False
+- `skip_memory_metrics`: True
+- `use_legacy_prediction_loop`: False
+- `push_to_hub`: False
+- `resume_from_checkpoint`: None
+- `hub_model_id`: None
+- `hub_strategy`: every_save
+- `hub_private_repo`: False
+- `hub_always_push`: False
+- `gradient_checkpointing`: True
+- `gradient_checkpointing_kwargs`: {'use_reentrant': False}
+- `include_inputs_for_metrics`: False
+- `eval_do_concat_batches`: True
+- `fp16_backend`: auto
+- `push_to_hub_model_id`: None
+- `push_to_hub_organization`: None
+- `mp_parameters`:
+- `auto_find_batch_size`: False
+- `full_determinism`: False
+- `torchdynamo`: None
+- `ray_scope`: last
+- `ddp_timeout`: 1800
+- `torch_compile`: False
+- `torch_compile_backend`: None
+- `torch_compile_mode`: None
+- `dispatch_batches`: None
+- `split_batches`: None
+- `include_tokens_per_second`: False
+- `include_num_input_tokens_seen`: False
+- `neftune_noise_alpha`: None
+- `optim_target_modules`: None
+- `batch_eval_metrics`: False
+- `batch_sampler`: no_duplicates
+- `multi_dataset_batch_sampler`: proportional
+</details>
+### Training Logs
+| Epoch  | Step | Training Loss | retrival loss |
+|:------:|:----:|:-------------:|:-------------:|
+| 0.6466 | 500  | 0.0424        | 0.0060        |
+| 1.2932 | 1000 | 0.0073        | 0.0040        |
+### Framework Versions
+- Python: 3.10.12
+- Sentence Transformers: 3.0.1
+- Transformers: 4.41.2
+- PyTorch: 2.2.0+cu121
+- Accelerate: 0.32.1
+- Datasets: 2.20.0
+- Tokenizers: 0.19.1
+## Citation
+### BibTeX
+#### Sentence Transformers
+```bibtex
+@inproceedings{reimers-2019-sentence-bert,
+    title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
+    author = "Reimers, Nils and Gurevych, Iryna",
+    booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
+    month = "11",
+    year = "2019",
+    publisher = "Association for Computational Linguistics",
+    url = "https://arxiv.org/abs/1908.10084",
+}
+```
+<!--
+## Glossary
+*Clearly define terms in order to be accessible across audiences.*
+-->
+<!--
+## Model Card Authors
+*Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
+-->
+<!--
+## Model Card Contact
+*Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
+-->

added_tokens.json ADDED Viewed

	@@ -0,0 +1,5 @@

+{
+  "<|endoftext|>": 151643,
+  "<|im_end|>": 151645,
+  "<|im_start|>": 151644
+}

config.json ADDED Viewed

	@@ -0,0 +1,33 @@

+{
+  "_name_or_path": "dunzhang/stella_en_1.5B_v5",
+  "architectures": [
+    "Qwen2Model"
+  ],
+  "attention_dropout": 0.0,
+  "auto_map": {
+    "AutoModel": "dunzhang/stella_en_1.5B_v5--modeling_qwen.Qwen2Model",
+    "AutoModelForCausalLM": "dunzhang/stella_en_1.5B_v5--modeling_qwen.Qwen2ForCausalLM",
+    "AutoModelForSequenceClassification": "dunzhang/stella_en_1.5B_v5--modeling_qwen.Qwen2ForSequenceClassification"
+  },
+  "bos_token_id": 151643,
+  "eos_token_id": 151643,
+  "hidden_act": "silu",
+  "hidden_size": 1536,
+  "initializer_range": 0.02,
+  "intermediate_size": 8960,
+  "max_position_embeddings": 131072,
+  "max_window_layers": 21,
+  "model_type": "qwen2",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 28,
+  "num_key_value_heads": 2,
+  "rms_norm_eps": 1e-06,
+  "rope_theta": 1000000.0,
+  "sliding_window": 131072,
+  "tie_word_embeddings": false,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.41.2",
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151646
+}

config_sentence_transformers.json ADDED Viewed

	@@ -0,0 +1,13 @@

+{
+  "__version__": {
+    "sentence_transformers": "3.0.1",
+    "transformers": "4.41.2",
+    "pytorch": "2.2.0+cu121"
+  },
+  "prompts": {
+    "s2p_query": "Instruct: Given a web search query, retrieve relevant passages that answer the query.\nQuery: ",
+    "s2s_query": "Instruct: Retrieve semantically similar text.\nQuery: "
+  },
+  "default_prompt_name": null,
+  "similarity_fn_name": "cosine"
+}

merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2ec8a392f95a1f155b4d57972b154604cc70ff25352b4254cad88946040fab0c
+size 3086574240

modules.json ADDED Viewed

	@@ -0,0 +1,20 @@

+[
+  {
+    "idx": 0,
+    "name": "0",
+    "path": "",
+    "type": "sentence_transformers.models.Transformer"
+  },
+  {
+    "idx": 1,
+    "name": "1",
+    "path": "1_Pooling",
+    "type": "sentence_transformers.models.Pooling"
+  },
+  {
+    "idx": 2,
+    "name": "2",
+    "path": "2_Dense",
+    "type": "sentence_transformers.models.Dense"
+  }
+]

optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:11c670a746630413f1f536caf0ba77cb46b08aef11b3845af2114daffd7c83d8
+size 6185963010

rng_state_0.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:159cd6c963eb6b01d75717fcb81492f36eb18f1afa0c1a66f97b36c513bd9f0e
+size 14960

rng_state_1.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5d0ce6947923edf04c42bdbbc2f882b694dc995513bb185049afc416a54f51e2
+size 14960

rng_state_2.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b07d14c4598c4559ff05eb9786c7151630ae77cfb373a1716b25fdab1d689f90
+size 14960

rng_state_3.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d5227cda574fd4d158de436f6a372175b834abfe7c0d44be4bace22574878d70
+size 14960

scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c1c966d855305dbdbe3e7516b03da5b72d250a16a08641463cf1ff44c6809016
+size 1064

sentence_bert_config.json ADDED Viewed

	@@ -0,0 +1,4 @@

+{
+  "max_seq_length": 8096,
+  "do_lower_case": false
+}

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,20 @@

+{
+  "additional_special_tokens": [
+    "<|im_start|>",
+    "<|im_end|>"
+  ],
+  "eos_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,50 @@

+{
+  "add_eos_token": true,
+  "add_prefix_space": false,
+  "added_tokens_decoder": {
+    "151643": {
+      "content": "<|endoftext|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151644": {
+      "content": "<|im_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151645": {
+      "content": "<|im_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "additional_special_tokens": [
+    "<|im_start|>",
+    "<|im_end|>"
+  ],
+  "auto_map": {
+    "AutoTokenizer": [
+      "dunzhang/stella_en_1.5B_v5--tokenization_qwen.Qwen2Tokenizer",
+      "dunzhang/stella_en_1.5B_v5--tokenization_qwen.Qwen2TokenizerFast"
+    ]
+  },
+  "bos_token": null,
+  "chat_template": "{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "<|endoftext|>",
+  "errors": "replace",
+  "model_max_length": 512,
+  "pad_token": "<|endoftext|>",
+  "split_special_tokens": false,
+  "tokenizer_class": "Qwen2Tokenizer",
+  "unk_token": null
+}

trainer_state.json ADDED Viewed

	@@ -0,0 +1,63 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 1.2932428063368897,
+  "eval_steps": 500,
+  "global_step": 1000,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.6466214031684449,
+      "grad_norm": 1.7960103750228882,
+      "learning_rate": 1.5060539027168317e-05,
+      "loss": 0.0424,
+      "step": 500
+    },
+    {
+      "epoch": 0.6466214031684449,
+      "eval_retrival_loss": 0.0060070110484957695,
+      "eval_retrival_runtime": 2.5865,
+      "eval_retrival_samples_per_second": 386.62,
+      "eval_retrival_steps_per_second": 24.357,
+      "step": 500
+    },
+    {
+      "epoch": 1.2932428063368897,
+      "grad_norm": 0.3418584167957306,
+      "learning_rate": 5.030361696847706e-06,
+      "loss": 0.0073,
+      "step": 1000
+    },
+    {
+      "epoch": 1.2932428063368897,
+      "eval_retrival_loss": 0.004016744904220104,
+      "eval_retrival_runtime": 2.5619,
+      "eval_retrival_samples_per_second": 390.338,
+      "eval_retrival_steps_per_second": 24.591,
+      "step": 1000
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 1500,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 2,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 0.0,
+  "train_batch_size": 8,
+  "trial_name": null,
+  "trial_params": null
+}

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:49382efc5de7184fd84cdf4d63f6b4a17f5f1b7ed4f3c22b66dfa4d94673ed04
+size 5368

vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff