Fixes to stats and adding gene dict attempt number 2
#13
by
davidjwen
- opened
- MANIFEST.in +1 -0
- geneformer/in_silico_perturber_stats.py +3 -3
MANIFEST.in
CHANGED
@@ -1,2 +1,3 @@
|
|
1 |
include geneformer/gene_median_dictionary.pkl
|
2 |
include geneformer/token_dictionary.pkl
|
|
|
|
1 |
include geneformer/gene_median_dictionary.pkl
|
2 |
include geneformer/token_dictionary.pkl
|
3 |
+
include geneformer/gene_name_id_dict.pkl
|
geneformer/in_silico_perturber_stats.py
CHANGED
@@ -150,10 +150,10 @@ def isp_stats_vs_null(cos_sims_df, dict_list, null_dict_list):
|
|
150 |
null_shifts = []
|
151 |
|
152 |
for dict_i in dict_list:
|
153 |
-
|
154 |
|
155 |
for dict_i in null_dict_list:
|
156 |
-
|
157 |
|
158 |
cos_sims_full_df.loc[i, "Test_avg_shift"] = np.mean(test_shifts)
|
159 |
cos_sims_full_df.loc[i, "Null_avg_shift"] = np.mean(null_shifts)
|
@@ -302,6 +302,7 @@ class InSilicoPerturberStats:
|
|
302 |
self.gene_id_name_dict = invert_dict(self.gene_name_id_dict)
|
303 |
|
304 |
# obtain total gene list
|
|
|
305 |
gene_list = get_gene_list(dict_list)
|
306 |
|
307 |
# initiate results dataframe
|
@@ -314,7 +315,6 @@ class InSilicoPerturberStats:
|
|
314 |
for genes in gene_list]}, \
|
315 |
index=[i for i in range(len(gene_list))])
|
316 |
|
317 |
-
dict_list = read_dictionaries(input_data_directory, "cell")
|
318 |
if self.mode == "goal_state_shift":
|
319 |
cos_sims_df = isp_stats_to_goal_state(cos_sims_df_initial, dict_list)
|
320 |
|
|
|
150 |
null_shifts = []
|
151 |
|
152 |
for dict_i in dict_list:
|
153 |
+
test_shifts += dict_i.get((token, "cell_emb"),[])
|
154 |
|
155 |
for dict_i in null_dict_list:
|
156 |
+
null_shifts += dict_i.get((token, "cell_emb"),[])
|
157 |
|
158 |
cos_sims_full_df.loc[i, "Test_avg_shift"] = np.mean(test_shifts)
|
159 |
cos_sims_full_df.loc[i, "Null_avg_shift"] = np.mean(null_shifts)
|
|
|
302 |
self.gene_id_name_dict = invert_dict(self.gene_name_id_dict)
|
303 |
|
304 |
# obtain total gene list
|
305 |
+
dict_list = read_dictionaries(input_data_directory, "cell")
|
306 |
gene_list = get_gene_list(dict_list)
|
307 |
|
308 |
# initiate results dataframe
|
|
|
315 |
for genes in gene_list]}, \
|
316 |
index=[i for i in range(len(gene_list))])
|
317 |
|
|
|
318 |
if self.mode == "goal_state_shift":
|
319 |
cos_sims_df = isp_stats_to_goal_state(cos_sims_df_initial, dict_list)
|
320 |
|