jsunn-y commited on
Commit
69817f5
1 Parent(s): 82ecaf2

adding non mds files and most large files

Browse files
.gitattributes CHANGED
@@ -1,4 +1,5 @@
1
  *.7z filter=lfs diff=lfs merge=lfs -text
 
2
  *.csv filter=lfs diff=lfs merge=lfs -text
3
  *.txt filter=lfs diff=lfs merge=lfs -text
4
  *.tsv filter=lfs diff=lfs merge=lfs -text
 
1
  *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.mds filter=lfs diff=lfs merge=lfs -text
3
  *.csv filter=lfs diff=lfs merge=lfs -text
4
  *.txt filter=lfs diff=lfs merge=lfs -text
5
  *.tsv filter=lfs diff=lfs merge=lfs -text
data/raw_data/swissprot.fasta ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:78cdcf474e4ca95fa1d7fd1045c3dc969e6dbf9a97008a2f715bec6fcb0754df
3
+ size 285210057
data/raw_data/swissprot_proteinDT.fasta ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:813ede0b20c67ca41557aab9c5795822f1d3e7dccb8cb6e3df4790d83f5f4f9e
3
+ size 119224170
data/ref_databases/swissprot.dmnd ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:76ded45f498eb22861e7cc0cb640aff3e038f72d52920183ee2a6249fde8a6bc
3
+ size 291193859
data/ref_databases/swissprot_ProteinDT.dmnd ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2e32f5258baa987ffd5504c54bb61e021554dfc15bca563d2bd755b74fe3a011
3
+ size 126280490
data/ref_databases/swissprot_enzyme.tsv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4945b4ac736cabc61468137d7c8dcfa133e6fb2ced89555d21063ad6c996e181
3
+ size 283542514
data/ref_databases/swissprot_proteinDT_text.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5ca7a8f9eaf1c6eb8cfe70f566897b6b68b00107a65d2f3534a4e0669034ebb2
3
+ size 327837815
data/sharded_datasets/swissprot-text/train/index.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"shards": [{"column_encodings": ["str", "str"], "column_names": ["sequence", "text"], "column_sizes": [null, null], "compression": null, "format": "mds", "hashes": [], "raw_data": {"basename": "shard.00000.mds", "bytes": 67107999, "hashes": {}}, "samples": 89072, "size_limit": 67108864, "version": 2, "zip_data": null}, {"column_encodings": ["str", "str"], "column_names": ["sequence", "text"], "column_sizes": [null, null], "compression": null, "format": "mds", "hashes": [], "raw_data": {"basename": "shard.00001.mds", "bytes": 67108747, "hashes": {}}, "samples": 89176, "size_limit": 67108864, "version": 2, "zip_data": null}, {"column_encodings": ["str", "str"], "column_names": ["sequence", "text"], "column_sizes": [null, null], "compression": null, "format": "mds", "hashes": [], "raw_data": {"basename": "shard.00002.mds", "bytes": 67108240, "hashes": {}}, "samples": 88865, "size_limit": 67108864, "version": 2, "zip_data": null}, {"column_encodings": ["str", "str"], "column_names": ["sequence", "text"], "column_sizes": [null, null], "compression": null, "format": "mds", "hashes": [], "raw_data": {"basename": "shard.00003.mds", "bytes": 67108288, "hashes": {}}, "samples": 89535, "size_limit": 67108864, "version": 2, "zip_data": null}, {"column_encodings": ["str", "str"], "column_names": ["sequence", "text"], "column_sizes": [null, null], "compression": null, "format": "mds", "hashes": [], "raw_data": {"basename": "shard.00004.mds", "bytes": 63335893, "hashes": {}}, "samples": 84365, "size_limit": 67108864, "version": 2, "zip_data": null}], "version": 2}
data/text2encoding.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0ff3049ddd49bb6309e351e6b8d4a477e35ff61c19d6aa8bb5a94225e866480d
3
+ size 310405882
data/useful_from_ProteinDT/pairwise_representation.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:40a211a4c22cd0f7ef6792d8564a4ec0a2df6f1952595915d834639c36f99e00
3
+ size 451597878
data/useful_from_ProteinDT/protein_sequence.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:46478541c5ba67cd226ac8d264af7a73d49b0f2f6d67a90df3b0af5b5468d121
3
+ size 227204222
data/useful_from_ProteinDT/text_sequence.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:34951890adfa52bf692dc5dd33047291a2d56ce8a44bfbf90113115073d7bd08
3
+ size 213314903