Files changed (1) hide show
  1. README.md +195 -0
README.md CHANGED
@@ -15,8 +15,200 @@ datasets:
15
  - bookcorpus
16
  - bookcorpusopen
17
  - nRuaif/OpenOrca-GPT3.5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  language:
19
  - en
 
 
 
 
20
  metrics:
21
  - accuracy
22
  - bertscore
@@ -25,8 +217,11 @@ metrics:
25
  - bleu
26
  - perplexity
27
  - mean_iou
 
28
  tags:
29
  - code
 
 
30
  ---
31
  # Model Card for Model ID
32
 
 
15
  - bookcorpus
16
  - bookcorpusopen
17
  - nRuaif/OpenOrca-GPT3.5
18
+ - irds/codesearchnet
19
+ - giganticode/java-cmpx-v1
20
+ - nickrosh/Evol-Instruct-Code-80k-v1
21
+ - bigcode/starcoderdata
22
+ - bigcode/the-stack
23
+ - bigcode/the-stack-smol
24
+ - Cdaprod/AI-Developer-Prompts
25
+ - code_x_glue_ct_code_to_text
26
+ - codeparrot/github-code
27
+ - codeparrot/github-code-clean
28
+ - code_x_glue_cc_code_completion_line
29
+ - >-
30
+ autoevaluate/autoeval-eval-jeffdshen__inverse_superglue_mixedp1-jeffdshen__inverse-63643c-1665558893
31
+ - bentrevett/multi30k
32
+ - edbeeching/decision_transformer_gym_replay
33
+ - psyche/common_crawl
34
+ - Birchlabs/openai-prm800k-solutions-only
35
+ - openchat/openchat_sharegpt4_dataset
36
+ - Open-Orca/OpenOrca
37
+ - cjvt/slownet
38
+ - para_crawl
39
+ - zeroshot/twitter-financial-news-sentiment
40
+ - laugustyniak/political-advertising-pl
41
+ - code_search_net
42
+ - sukaka/novelai-webui
43
+ - P1ayer-1/chatgpt-conversations-chatlogs.net
44
+ - daniel2588/sarcasm
45
+ - psmathur/orca_minis_uncensored_dataset
46
+ - player1537/Bloom-560m-trained-on-Wizard-Vicuna-Uncensored-trained-on-Based
47
+ - shahules786/prosocial-nsfw-reddit
48
+ - Thewillonline/reddit-sarcasm
49
+ - datasciencemmw/current-data
50
+ - Oniichat/bluemoon_roleplay_chat_data_300k_messages
51
+ - dell-research-harvard/AmericanStories
52
+ - b-mc2/sql-create-context
53
+ - rahulmallah/autotrain-data-emotion-detection
54
+ - theblackcat102/multiround-programming-convo
55
+ - Lsavints/software_knowledgebase
56
+ - RazinAleks/SO-Python_QA-Web_Development_class
57
+ - codeparrot/apps
58
+ - vlsp-2023-vllm/en-to-vi-formal-informal-tranlations
59
+ - fraug-library/english_contractions_extensions
60
+ - spencer/software_slacks
61
+ - Abirate/english_quotes
62
+ - Nexdata/American_English_Natural_Dialogue_Speech_Data
63
+ - Nexdata/Latin_American_Speaking_English_Speech_Data_by_Mobile_Phone
64
+ - Nexdata/American_English_Speech_Data_by_Mobile_Phone_Reading
65
+ - Nexdata/American_English_Speech_Synthesis_Corpus-Female
66
+ - rombodawg/LimitlessCodeTraining
67
+ - RikoteMaster/Emotion_Recognition_4_llama2
68
+ - Villian7/Emotions_Data
69
+ - alanland/llama2-self-cognition
70
+ - CognitiveScience/coscidata
71
+ - bibidentuhanoi/gideon_self_cognition
72
+ - gollark/consciousness
73
+ - juletxara/visual-spatial-reasoning
74
+ - lintang/numerical_reasoning_arithmetic
75
+ - reasoning-machines/gsm-hard
76
+ - open-source-metrics/reinforcement-learning-checkpoint-downloads
77
+ - igbo_english_machine_translation
78
+ - US-Artificial-Intelligence/algemap
79
+ - rombodawg/2XUNCENSORED_alpaca_840k_Evol_USER_ASSIS
80
+ - griffin/chain_of_density
81
+ - >-
82
+ shirsh10mall/LLM_Instruct_Learning_Project_Preprocessed_Tokenized_Open_Orca_Dataset_Flan_T5
83
+ - Thaweewat/chain-of-thought-74k-th
84
+ - AlekseyKorshuk/chain-of-thoughts-chatml-deduplicated
85
+ - dair-ai/emotion
86
+ - hita/social-behavior-emotions
87
+ - Bingsu/Human_Action_Recognition
88
+ - anjandash/java-8m-methods-v1
89
+ - nadiamaqbool81/java_code_instructions_1.178k_alpaca
90
+ - DavidMOBrien/8000-java
91
+ - rombodawg/LimitlessCodeTraining_1k-Python-Javascript_GuanacoFormat
92
+ - angie-chen55/javascript-github-code
93
+ - kye/all-lucidrain-python-3
94
+ - Fraser/python-state-changes
95
+ - ammarnasr/the-stack-ruby-clean
96
+ - ammarnasr/the-stack-rust-clean
97
+ - seyyedaliayati/solidity-dataset
98
+ - jkhedri/psychology-dataset
99
+ - KonradSzafer/stackoverflow_linux
100
+ - vikp/textbook_quality_programming
101
+ - rombodawg/LosslessMegaCodeTrainingV3_MINI
102
+ - BelleGroup/multiturn_chat_0.8M
103
+ - smangrul/code-chat-assistant-v1
104
+ - goendalf666/sales-textbook_for_convincing_and_selling
105
+ - readerbench/ConversationalAgent-Ro
106
+ - beurkinger/autotrain-data-human-action-recognition
107
+ - jpwahle/autoencoder-paraphrase-dataset
108
+ - jpwahle/autoregressive-paraphrase-dataset
109
+ - teknium/GPT4-LLM-Cleaned
110
+ - Anthropic/model-written-evals
111
+ - openai_humaneval
112
+ - kye/all-google-ai-python-code
113
+ - kye/all-openai-github-code
114
+ - EleutherAI/lambada_openai
115
+ - CShorten/ML-ArXiv-Papers
116
+ - WaltonFuture/InstructionGPT-4
117
+ - open-llm-leaderboard/details_AIDC-ai-business__Marcoroni-70B
118
+ - seansullivan/INT-Business-Syllabus
119
+ - theoldmandthesea/17k_business_book
120
+ - SunRise228/business-doc
121
+ - gauravshrm211/VC-startup-evaluation-for-investment
122
+ - TuningAI/Startups_V1
123
+ - TuningAI/Startups_V2
124
+ - AdiOO7/llama-2-finance
125
+ - scillm/scientific_papers
126
+ - gokuls/wiki_book_corpus_complete_processed_bert_dataset
127
+ - the_pile_books3
128
+ - go_emotions
129
+ - yizhongw/self_instruct
130
+ - codeparrot/self-instruct-starcoder
131
+ - Amani27/massive_translation_dataset
132
+ - huggingface/transformers-metadata
133
+ - hf-internal-testing/transformers-metadata
134
+ - commonsense_qa
135
+ - nlplabtdtu/test-edu-crawl
136
+ - kernelmachine/open-license-corpus
137
+ - BDas/EnglishNLPDataset
138
+ - CyberNative/github_cybersecurity_READMEs
139
+ - thomwolf/github-python
140
+ - CM/codexglue_code2text_java
141
+ - autoevaluate/autoeval-staging-eval-project-glue-f16e6c43-14015917
142
+ - lemonteaa/algorithmic-reasoning-seed
143
+ - EmpathyFirstMedia/algolia
144
+ - vicgalle/alpaca-gpt4
145
+ - pariajm/sharif_emotional_speech_dataset
146
+ - lighteval/synthetic_reasoning_natural
147
+ - jxu124/llava_complex_reasoning_77k
148
+ - bibidentuhanoi/gideon_self_cognition_text
149
+ - ohilikeit/empathetic_dialogues_mutli_turn_ko
150
+ - KevinZ/psycholinguistic_eval
151
+ - fiveflow/psychology-dataset
152
+ - shahidul034/text_generation_model_data
153
+ - qwedsacf/story-generation
154
+ - EnigmaOfTheWorld/b-mc2-sql-create-context
155
+ - HuggingFaceH4/testing_self_instruct_small
156
+ - RUCAIBox/Data-to-text-Generation
157
+ - Fhrozen/AudioSet2K22
158
+ - Chr0my/Epidemic_sounds
159
+ - ChristophSchuhmann/lyrics-index
160
+ - Cropinky/rap_lyrics_english
161
+ - tsterbak/eurovision-lyrics-1956-2023
162
+ - brunokreiner/genius-lyrics
163
+ - google/MusicCaps
164
+ - ccmusic-database/music_genre
165
+ - Hyeon2/riffusion-musiccaps-dataset
166
+ - SamAct/autotrain-data-musicprompt
167
+ - Chr0my/Epidemic_music
168
+ - juliensimon/autonlp-data-song-lyrics
169
+ - Datatang/North_American_English_Speech_Data_by_Mobile_Phone_and_PC
170
+ - Chr0my/freesound.org
171
+ - teticio/audio-diffusion-256
172
+ - KELONMYOSA/dusha_emotion_audio
173
+ - Ar4ikov/iemocap_audio_text_splitted
174
+ - flexthink/ljspeech
175
+ - mozilla-foundation/common_voice_13_0
176
+ - facebook/voxpopuli
177
+ - SocialGrep/one-million-reddit-jokes
178
+ - breadlicker45/human-midi-rlhf
179
+ - breadlicker45/midi-gpt-music-small
180
+ - projectlosangeles/Los-Angeles-MIDI-Dataset
181
+ - huggingartists/epic-rap-battles-of-history
182
+ - SocialGrep/one-million-reddit-confessions
183
+ - shahules786/prosocial-nsfw-reddit
184
+ - Thewillonline/reddit-sarcasm
185
+ - autoevaluate/autoeval-eval-futin__guess-vi-4200fb-2012366606
186
+ - lmsys/chatbot_arena_conversations
187
+ - mozilla-foundation/common_voice_11_0
188
+ - mozilla-foundation/common_voice_4_0
189
+ - dell-research-harvard/AmericanStories
190
+ - zZWipeoutZz/insane_style
191
+ - mu-llama/MusicQA
192
+ - RaphaelOlivier/whisper_adversarial_examples
193
+ - huggingartists/metallica
194
+ - vldsavelyev/guitar_tab
195
+ - NLPCoreTeam/humaneval_ru
196
+ - seungheondoh/audioset-music
197
+ - gary109/onset-singing3_corpora_parliament_processed_MIR-ST500
198
+ - LDD5522/Rock_Vocals
199
+ - huggingartists/rage-against-the-machine
200
+ - huggingartists/chester-bennington
201
+ - huggingartists/logic
202
+ - cmsolson75/artist_song_lyric_dataset
203
+ - BhavyaMuni/artist-lyrics
204
+ - vjain/emotional_intelligence
205
+ - mhenrichsen/context-aware-splits
206
  language:
207
  - en
208
+ - es
209
+ - it
210
+ - ru
211
+ - la
212
  metrics:
213
  - accuracy
214
  - bertscore
 
217
  - bleu
218
  - perplexity
219
  - mean_iou
220
+ - hyperml/balanced_accuracy
221
  tags:
222
  - code
223
+ - music
224
+ library_name: transformers
225
  ---
226
  # Model Card for Model ID
227