Commit
•
b294421
1
Parent(s):
941390d
Get the gene keys and gene list keys from the token dictionary instead of medians (#304)
Browse files- Get the gene keys and gene list keys from the token dictionary instead of medians (faab0d7678ab0347fae033a620c388ca970b59e0)
Co-authored-by: Han Chen <hchen725@users.noreply.huggingface.co>
- geneformer/tokenizer.py +1 -1
geneformer/tokenizer.py
CHANGED
@@ -133,7 +133,7 @@ class TranscriptomeTokenizer:
|
|
133 |
self.gene_token_dict = pickle.load(f)
|
134 |
|
135 |
# gene keys for full vocabulary
|
136 |
-
self.gene_keys = list(self.
|
137 |
|
138 |
# protein-coding and miRNA gene list dictionary for selecting .loom rows for tokenization
|
139 |
self.genelist_dict = dict(zip(self.gene_keys, [True] * len(self.gene_keys)))
|
|
|
133 |
self.gene_token_dict = pickle.load(f)
|
134 |
|
135 |
# gene keys for full vocabulary
|
136 |
+
self.gene_keys = list(self.gene_token_dict.keys())
|
137 |
|
138 |
# protein-coding and miRNA gene list dictionary for selecting .loom rows for tokenization
|
139 |
self.genelist_dict = dict(zip(self.gene_keys, [True] * len(self.gene_keys)))
|