jsunn-y
commited on
Commit
•
69817f5
1
Parent(s):
82ecaf2
adding non mds files and most large files
Browse files- .gitattributes +1 -0
- data/raw_data/swissprot.fasta +3 -0
- data/raw_data/swissprot_proteinDT.fasta +3 -0
- data/ref_databases/swissprot.dmnd +3 -0
- data/ref_databases/swissprot_ProteinDT.dmnd +3 -0
- data/ref_databases/swissprot_enzyme.tsv +3 -0
- data/ref_databases/swissprot_proteinDT_text.csv +3 -0
- data/sharded_datasets/swissprot-text/train/index.json +1 -0
- data/text2encoding.pt +3 -0
- data/useful_from_ProteinDT/pairwise_representation.npz +3 -0
- data/useful_from_ProteinDT/protein_sequence.txt +3 -0
- data/useful_from_ProteinDT/text_sequence.txt +3 -0
.gitattributes
CHANGED
@@ -1,4 +1,5 @@
|
|
1 |
*.7z filter=lfs diff=lfs merge=lfs -text
|
|
|
2 |
*.csv filter=lfs diff=lfs merge=lfs -text
|
3 |
*.txt filter=lfs diff=lfs merge=lfs -text
|
4 |
*.tsv filter=lfs diff=lfs merge=lfs -text
|
|
|
1 |
*.7z filter=lfs diff=lfs merge=lfs -text
|
2 |
+
*.mds filter=lfs diff=lfs merge=lfs -text
|
3 |
*.csv filter=lfs diff=lfs merge=lfs -text
|
4 |
*.txt filter=lfs diff=lfs merge=lfs -text
|
5 |
*.tsv filter=lfs diff=lfs merge=lfs -text
|
data/raw_data/swissprot.fasta
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:78cdcf474e4ca95fa1d7fd1045c3dc969e6dbf9a97008a2f715bec6fcb0754df
|
3 |
+
size 285210057
|
data/raw_data/swissprot_proteinDT.fasta
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:813ede0b20c67ca41557aab9c5795822f1d3e7dccb8cb6e3df4790d83f5f4f9e
|
3 |
+
size 119224170
|
data/ref_databases/swissprot.dmnd
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:76ded45f498eb22861e7cc0cb640aff3e038f72d52920183ee2a6249fde8a6bc
|
3 |
+
size 291193859
|
data/ref_databases/swissprot_ProteinDT.dmnd
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2e32f5258baa987ffd5504c54bb61e021554dfc15bca563d2bd755b74fe3a011
|
3 |
+
size 126280490
|
data/ref_databases/swissprot_enzyme.tsv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4945b4ac736cabc61468137d7c8dcfa133e6fb2ced89555d21063ad6c996e181
|
3 |
+
size 283542514
|
data/ref_databases/swissprot_proteinDT_text.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5ca7a8f9eaf1c6eb8cfe70f566897b6b68b00107a65d2f3534a4e0669034ebb2
|
3 |
+
size 327837815
|
data/sharded_datasets/swissprot-text/train/index.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"shards": [{"column_encodings": ["str", "str"], "column_names": ["sequence", "text"], "column_sizes": [null, null], "compression": null, "format": "mds", "hashes": [], "raw_data": {"basename": "shard.00000.mds", "bytes": 67107999, "hashes": {}}, "samples": 89072, "size_limit": 67108864, "version": 2, "zip_data": null}, {"column_encodings": ["str", "str"], "column_names": ["sequence", "text"], "column_sizes": [null, null], "compression": null, "format": "mds", "hashes": [], "raw_data": {"basename": "shard.00001.mds", "bytes": 67108747, "hashes": {}}, "samples": 89176, "size_limit": 67108864, "version": 2, "zip_data": null}, {"column_encodings": ["str", "str"], "column_names": ["sequence", "text"], "column_sizes": [null, null], "compression": null, "format": "mds", "hashes": [], "raw_data": {"basename": "shard.00002.mds", "bytes": 67108240, "hashes": {}}, "samples": 88865, "size_limit": 67108864, "version": 2, "zip_data": null}, {"column_encodings": ["str", "str"], "column_names": ["sequence", "text"], "column_sizes": [null, null], "compression": null, "format": "mds", "hashes": [], "raw_data": {"basename": "shard.00003.mds", "bytes": 67108288, "hashes": {}}, "samples": 89535, "size_limit": 67108864, "version": 2, "zip_data": null}, {"column_encodings": ["str", "str"], "column_names": ["sequence", "text"], "column_sizes": [null, null], "compression": null, "format": "mds", "hashes": [], "raw_data": {"basename": "shard.00004.mds", "bytes": 63335893, "hashes": {}}, "samples": 84365, "size_limit": 67108864, "version": 2, "zip_data": null}], "version": 2}
|
data/text2encoding.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0ff3049ddd49bb6309e351e6b8d4a477e35ff61c19d6aa8bb5a94225e866480d
|
3 |
+
size 310405882
|
data/useful_from_ProteinDT/pairwise_representation.npz
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:40a211a4c22cd0f7ef6792d8564a4ec0a2df6f1952595915d834639c36f99e00
|
3 |
+
size 451597878
|
data/useful_from_ProteinDT/protein_sequence.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:46478541c5ba67cd226ac8d264af7a73d49b0f2f6d67a90df3b0af5b5468d121
|
3 |
+
size 227204222
|
data/useful_from_ProteinDT/text_sequence.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:34951890adfa52bf692dc5dd33047291a2d56ce8a44bfbf90113115073d7bd08
|
3 |
+
size 213314903
|