Move example input files to dataset repository to include example datasets for fine-tuning

Browse files

Files changed (9) hide show

benchmarking/castle_cell_type_annotation.r +0 -80
benchmarking/prepare_datasplits_for_cell_type_annotation.ipynb +0 -288
benchmarking/randomForest_token_classifier_dosageTF_10k.ipynb +0 -0
benchmarking/scDeepsort_train_predict.ipynb +0 -166
examples/example_input_files/bivalent_promoters/bivalent_gene_labels.txt +0 -107
examples/example_input_files/bivalent_promoters/lys4_only_gene_labels.txt +0 -80
examples/example_input_files/bivalent_promoters/no_methylation_gene_labels.txt +0 -42
examples/example_input_files/dosage_sensitive_tfs/dosage_sens_tf_labels.csv +0 -369
examples/example_input_files/gene_info_table.csv +0 -0

benchmarking/castle_cell_type_annotation.r DELETED Viewed

@@ -1,80 +0,0 @@
-# Usage: Rscript castle_cell_type_annotation.r organ
-# parse ordered arguments
-args <- commandArgs(trailingOnly=TRUE)
-organ <- args[1]
-suppressPackageStartupMessages(library(scater))
-suppressPackageStartupMessages(library(xgboost))
-suppressPackageStartupMessages(library(igraph))
-BREAKS=c(-1, 0, 1, 6, Inf)
-nFeatures = 100
-print(paste("Training ", organ, sep=""))
-# import training and test data
-rootdir="/path/to/data/"
-train_counts <- t(as.matrix(read.csv(file = paste(rootdir, organ, "_filtered_data_train.csv", sep=""), row.names = 1)))
-test_counts <- t(as.matrix(read.csv(file = paste(rootdir, organ, "_filtered_data_test.csv", sep=""), row.names = 1)))
-train_celltype <- as.matrix(read.csv(file = paste(rootdir, organ, "_filtered_celltype_train.csv", sep="")))
-test_celltype <- as.matrix(read.csv(file = paste(rootdir, organ, "_filtered_celltype_test.csv", sep="")))
-# select features
-sourceCellTypes = as.factor(train_celltype[,"Cell_type"])
-ds = rbind(train_counts,test_counts)
-ds[is.na(ds)] <- 0
-isSource = c(rep(TRUE,nrow(train_counts)), rep(FALSE,nrow(test_counts)))
-topFeaturesAvg = colnames(ds[isSource,])[order(apply(ds[isSource,], 2, mean), decreasing = T)]
-topFeaturesMi = names(sort(apply(ds[isSource,],2,function(x) { compare(cut(x,breaks=BREAKS),sourceCellTypes,method = "nmi") }), decreasing = T))
-selectedFeatures = union(head(topFeaturesAvg, nFeatures) , head(topFeaturesMi, nFeatures) )
-tmp = cor(ds[isSource,selectedFeatures], method = "pearson")
-tmp[!lower.tri(tmp)] = 0
-selectedFeatures = selectedFeatures[apply(tmp,2,function(x) any(x < 0.9))]
-remove(tmp)
-# bin expression values and expand features by bins
-dsBins = apply(ds[, selectedFeatures], 2, cut, breaks= BREAKS)
-nUniq = apply(dsBins, 2, function(x) { length(unique(x)) })
-ds = model.matrix(~ . , as.data.frame(dsBins[,nUniq>1]))
-remove(dsBins, nUniq)
-# train model
-train = runif(nrow(ds[isSource,]))<0.8
-# slightly different setup for multiclass and binary classification
-if (length(unique(sourceCellTypes)) > 2) {
-  xg=xgboost(data=ds[isSource,][train, ] ,
-       label=as.numeric(sourceCellTypes[train])-1,
-       objective="multi:softmax", num_class=length(unique(sourceCellTypes)),
-       eta=0.7 , nthread=5, nround=20, verbose=0,
-       gamma=0.001, max_depth=5, min_child_weight=10)
-} else {
-  xg=xgboost(data=ds[isSource,][train, ] ,
-       label=as.numeric(sourceCellTypes[train])-1,
-       eta=0.7 , nthread=5, nround=20, verbose=0,
-       gamma=0.001, max_depth=5, min_child_weight=10)
-}
-# validate model
-predictedClasses = predict(xg, ds[!isSource, ])
-testCellTypes = as.factor(test_celltype[,"Cell_type"])
-trueClasses <- as.numeric(testCellTypes)-1
-cm <- as.matrix(table(Actual = trueClasses, Predicted = predictedClasses))
-n <- sum(cm)
-nc = nrow(cm) # number of classes
-diag = diag(cm) # number of correctly classified instances per class
-rowsums = apply(cm, 1, sum) # number of instances per class
-colsums = apply(cm, 2, sum) # number of predictions per class
-p = rowsums / n # distribution of instances over the actual classes
-q = colsums / n # distribution of instances over the predicted classes
-accuracy = sum(diag) / n
-precision = diag / colsums
-recall = diag / rowsums
-f1 = 2 * precision * recall / (precision + recall)
-macroF1 = mean(f1)
-print(paste(organ, " accuracy: ", accuracy, sep=""))
-print(paste(organ, " macroF1: ", macroF1, sep=""))
-results_df = data.frame(Accuracy=c(accuracy),macroF1=c(macroF1))
-write.csv(results_df,paste(rootdir, organ, "_castle_results_test.csv", sep=""), row.names = FALSE)

benchmarking/prepare_datasplits_for_cell_type_annotation.ipynb DELETED Viewed

@@ -1,288 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "id": "25107132",
-   "metadata": {},
-   "source": [
-    "### Preparing train and test data splits for cell type annotation application"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "id": "83d8d249-affe-45dd-915e-992b4b35b31a",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import os\n",
-    "import pandas as pd\n",
-    "from sklearn.model_selection import train_test_split\n",
-    "from tqdm.notebook import tqdm\n",
-    "from collections import Counter\n",
-    "import pickle"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "id": "e3e6a2bf-44c8-4164-9ecd-1686230ea8be",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "['pancreas',\n",
-       " 'liver',\n",
-       " 'blood',\n",
-       " 'lung',\n",
-       " 'spleen',\n",
-       " 'placenta',\n",
-       " 'colorectum',\n",
-       " 'kidney',\n",
-       " 'brain']"
-      ]
-     },
-     "execution_count": 4,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "rootdir = \"/path/to/data/\"\n",
-    "\n",
-    "# collect panel of tissues to test\n",
-    "dir_list = []\n",
-    "for dir_i in os.listdir(rootdir):\n",
-    "    if (\"results\" not in dir_i) & (os.path.isdir(os.path.join(rootdir, dir_i))):\n",
-    "        dir_list += [dir_i]\n",
-    "dir_list"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "id": "0b205eec-a518-472a-ab90-dd63ef9803cd",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>filter_pass</th>\n",
-       "      <th>original_cell_id</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>0</td>\n",
-       "      <td>C_1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>1</td>\n",
-       "      <td>C_2</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>0</td>\n",
-       "      <td>C_3</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>1</td>\n",
-       "      <td>C_4</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>0</td>\n",
-       "      <td>C_5</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>...</th>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>9590</th>\n",
-       "      <td>1</td>\n",
-       "      <td>C_9591</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>9591</th>\n",
-       "      <td>1</td>\n",
-       "      <td>C_9592</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>9592</th>\n",
-       "      <td>1</td>\n",
-       "      <td>C_9593</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>9593</th>\n",
-       "      <td>1</td>\n",
-       "      <td>C_9594</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>9594</th>\n",
-       "      <td>1</td>\n",
-       "      <td>C_9595</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "<p>9595 rows × 2 columns</p>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "      filter_pass original_cell_id\n",
-       "0               0              C_1\n",
-       "1               1              C_2\n",
-       "2               0              C_3\n",
-       "3               1              C_4\n",
-       "4               0              C_5\n",
-       "...           ...              ...\n",
-       "9590            1           C_9591\n",
-       "9591            1           C_9592\n",
-       "9592            1           C_9593\n",
-       "9593            1           C_9594\n",
-       "9594            1           C_9595\n",
-       "\n",
-       "[9595 rows x 2 columns]"
-      ]
-     },
-     "execution_count": 5,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "# dictionary of cell barcodes that passed QC filtering applied by Geneformer \n",
-    "# to ensure same cells were used for comparison\n",
-    "with open(f\"{rootdir}deepsort_filter_dict.pickle\", \"rb\") as fp:\n",
-    "    filter_dict = pickle.load(fp)\n",
-    "\n",
-    "# for example:\n",
-    "filter_dict[\"human_Placenta9595_data\"]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "207e3571-0236-4493-83b3-a89b67b16cb2",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "for dir_name in tqdm(dir_list):\n",
-    "\n",
-    "    df = pd.DataFrame()\n",
-    "    ct_df = pd.DataFrame(columns=[\"Cell\",\"Cell_type\"])\n",
-    "    \n",
-    "    subrootdir = f\"{rootdir}{dir_name}/\"\n",
-    "    for subdir, dirs, files in os.walk(subrootdir):\n",
-    "        for i in range(len(files)):\n",
-    "            file = files[i]\n",
-    "            if file.endswith(\"_data.csv\"):\n",
-    "                file_prefix = file.replace(\"_data.csv\",\"\")\n",
-    "                sample_prefix = file.replace(\".csv\",\"\")\n",
-    "                filter_df = filter_dict[sample_prefix]\n",
-    "                sample_to_analyze = list(filter_df[filter_df[\"filter_pass\"]==1][\"original_cell_id\"])\n",
-    "                \n",
-    "                # collect data for each tissue\n",
-    "                df_i = pd.read_csv(f\"{subrootdir}{file}\", index_col=0)\n",
-    "                df_i = df_i[sample_to_analyze]\n",
-    "                df_i.columns = [f\"{i}_{cell_id}\" for cell_id in df_i.columns]\n",
-    "                df = pd.concat([df,df_i],axis=1)\n",
-    "                \n",
-    "                # collect cell type metadata\n",
-    "                ct_df_i = pd.read_csv(f\"{subrootdir}{file_prefix}_celltype.csv\", index_col=0)\n",
-    "                ct_df_i.columns = [\"Cell\",\"Cell_type\"]\n",
-    "                ct_df_i[\"Cell\"] = [f\"{i}_{cell_id}\" for cell_id in ct_df_i[\"Cell\"]]\n",
-    "                ct_df = pd.concat([ct_df,ct_df_i],axis=0)\n",
-    "        \n",
-    "    # per published scDeepsort method, filter data for cell types >0.5% of data\n",
-    "    ct_counts = Counter(ct_df[\"Cell_type\"])\n",
-    "    total_count = sum(ct_counts.values())\n",
-    "    nonrare_cell_types = [cell_type for cell_type,count in ct_counts.items() if count>(total_count*0.005)]\n",
-    "    nonrare_cells = list(ct_df[ct_df[\"Cell_type\"].isin(nonrare_cell_types)][\"Cell\"])\n",
-    "    df = df[df.columns.intersection(nonrare_cells)]\n",
-    "\n",
-    "    # split into 80/20 train/test data\n",
-    "    train, test = train_test_split(df.T, test_size=0.2)\n",
-    "    train = train.T\n",
-    "    test = test.T  \n",
-    "    \n",
-    "    # save filtered train/test data\n",
-    "    train.to_csv(f\"{subrootdir}{dir_name}_filtered_data_train.csv\")\n",
-    "    test.to_csv(f\"{subrootdir}{dir_name}_filtered_data_test.csv\")\n",
-    "\n",
-    "    # split metadata into train/test data\n",
-    "    ct_df_train = ct_df[ct_df[\"Cell\"].isin(list(train.columns))]\n",
-    "    ct_df_test = ct_df[ct_df[\"Cell\"].isin(list(test.columns))]\n",
-    "    train_order_dict = dict(zip(train.columns,[i for i in range(len(train.columns))]))\n",
-    "    test_order_dict = dict(zip(test.columns,[i for i in range(len(test.columns))]))\n",
-    "    ct_df_train[\"order\"] = [train_order_dict[cell_id] for cell_id in ct_df_train[\"Cell\"]]\n",
-    "    ct_df_test[\"order\"] = [test_order_dict[cell_id] for cell_id in ct_df_test[\"Cell\"]]\n",
-    "    ct_df_train = ct_df_train.sort_values(\"order\")\n",
-    "    ct_df_test = ct_df_test.sort_values(\"order\")\n",
-    "    ct_df_train = ct_df_train.drop(\"order\",axis=1)\n",
-    "    ct_df_test = ct_df_test.drop(\"order\",axis=1)\n",
-    "    assert list(ct_df_train[\"Cell\"]) == list(train.columns)\n",
-    "    assert list(ct_df_test[\"Cell\"]) == list(test.columns)\n",
-    "    train_labels = list(Counter(ct_df_train[\"Cell_type\"]).keys())\n",
-    "    test_labels = list(Counter(ct_df_test[\"Cell_type\"]).keys())\n",
-    "    assert set(train_labels) == set(test_labels)\n",
-    "    \n",
-    "    # save train/test cell type annotations\n",
-    "    ct_df_train.to_csv(f\"{subrootdir}{dir_name}_filtered_celltype_train.csv\")\n",
-    "    ct_df_test.to_csv(f\"{subrootdir}{dir_name}_filtered_celltype_test.csv\")\n",
-    "                "
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3.8.6 64-bit ('3.8.6')",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.8.6"
-  },
-  "vscode": {
-   "interpreter": {
-    "hash": "eba1599a1f7e611c14c87ccff6793920aa63510b01fc0e229d6dd014149b8829"
-   }
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}

benchmarking/randomForest_token_classifier_dosageTF_10k.ipynb DELETED Viewed

The diff for this file is too large to render. See raw diff

benchmarking/scDeepsort_train_predict.ipynb DELETED Viewed

@@ -1,166 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "id": "83d8d249-affe-45dd-915e-992b4b35b31a",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import os\n",
-    "import numpy as np\n",
-    "import pandas as pd\n",
-    "import deepsort\n",
-    "from sklearn.metrics import accuracy_score, f1_score\n",
-    "from tqdm.notebook import tqdm\n",
-    "import pickle"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "id": "25de46ec-8a41-484d-8e14-d2b19768fc2c",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def compute_metrics(labels, preds):\n",
-    "\n",
-    "    # calculate accuracy and macro f1 using sklearn's function\n",
-    "    acc = accuracy_score(labels, preds)\n",
-    "    macro_f1 = f1_score(labels, preds, average='macro')\n",
-    "    return {\n",
-    "      'accuracy': acc,\n",
-    "      'macro_f1': macro_f1\n",
-    "    }"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "id": "a4029b2b-afca-4300-82a2-082fec59f191",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "['pancreas',\n",
-       " 'liver',\n",
-       " 'blood',\n",
-       " 'lung',\n",
-       " 'spleen',\n",
-       " 'placenta',\n",
-       " 'colorectum',\n",
-       " 'kidney',\n",
-       " 'brain']"
-      ]
-     },
-     "execution_count": 5,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "rootdir = \"/path/to/data/\"\n",
-    "\n",
-    "dir_list = []\n",
-    "for dir_i in os.listdir(rootdir):\n",
-    "    if (\"results\" not in dir_i) & (os.path.isdir(os.path.join(rootdir, dir_i))):\n",
-    "        dir_list += [dir_i]\n",
-    "dir_list"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "ddcdc5cd-871e-4fd2-8457-18d3049fa76c",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "output_dir = \"results_EDefault_filtered\"\n",
-    "n_epochs = \"Default\"  # scDeepsort default epochs = 300\n",
-    "\n",
-    "results_dict = dict()\n",
-    "for dir_name in tqdm(dir_list):\n",
-    "    print(f\"TRAINING: {dir_name}\")\n",
-    "    subrootdir = f\"{rootdir}{dir_name}/\"\n",
-    "    train_files = [(f\"{subrootdir}{dir_name}_filtered_data_train.csv\",f\"{subrootdir}{dir_name}_filtered_celltype_train.csv\")]\n",
-    "    test_file = f\"{subrootdir}{dir_name}_filtered_data_test.csv\"\n",
-    "    label_file = f\"{subrootdir}{dir_name}_filtered_celltype_test.csv\"\n",
-    "    \n",
-    "    # define the model\n",
-    "    model = deepsort.DeepSortClassifier(species='human',\n",
-    "                               tissue=dir_name,\n",
-    "                               gpu_id=0,\n",
-    "                               random_seed=1,\n",
-    "                               validation_fraction=0)  # use all training data (already held out 20% in test data file)\n",
-    "\n",
-    "    # fit the model\n",
-    "    model.fit(train_files, save_path=f\"{subrootdir}{output_dir}\")\n",
-    "    \n",
-    "    # use the saved model to predict cell types in test data\n",
-    "    model.predict(input_file=test_file,\n",
-    "                   model_path=f\"{subrootdir}{output_dir}\",\n",
-    "                   save_path=f\"{subrootdir}{output_dir}\",\n",
-    "                   unsure_rate=0,\n",
-    "                   file_type='csv')\n",
-    "    labels_df = pd.read_csv(label_file)\n",
-    "    preds_df = pd.read_csv(f\"{subrootdir}{output_dir}/human_{dir_name}_{dir_name}_filtered_data_test.csv\")\n",
-    "    label_cell_ids = labels_df[\"Cell\"]\n",
-    "    pred_cell_ids = preds_df[\"index\"]\n",
-    "    assert list(label_cell_ids) == list(pred_cell_ids)\n",
-    "    labels = list(labels_df[\"Cell_type\"])\n",
-    "    if isinstance(preds_df[\"cell_subtype\"][0],float):\n",
-    "        if np.isnan(preds_df[\"cell_subtype\"][0]):\n",
-    "            preds = list(preds_df[\"cell_type\"])\n",
-    "            results = compute_metrics(labels, preds)\n",
-    "    else:\n",
-    "        preds1 = list(preds_df[\"cell_type\"])\n",
-    "        preds2 = list(preds_df[\"cell_subtype\"])\n",
-    "        results1 = compute_metrics(labels, preds1)\n",
-    "        results2 = compute_metrics(labels, preds2)\n",
-    "        if results2[\"accuracy\"] > results1[\"accuracy\"]:\n",
-    "            results = results2\n",
-    "        else:\n",
-    "            results = results1\n",
-    "        \n",
-    "    print(f\"{dir_name}: {results}\")\n",
-    "    results_dict[dir_name] = results\n",
-    "    with open(f\"{subrootdir}deepsort_E{n_epochs}_filtered_pred_{dir_name}.pickle\", \"wb\") as output_file:\n",
-    "        pickle.dump(results, output_file)\n",
-    "\n",
-    "# save results\n",
-    "with open(f\"{rootdir}deepsort_E{n_epochs}_filtered_pred_dict.pickle\", \"wb\") as output_file:\n",
-    "    pickle.dump(results_dict, output_file)\n",
-    "    "
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3.8.6 64-bit ('3.8.6')",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.8.6"
-  },
-  "vscode": {
-   "interpreter": {
-    "hash": "eba1599a1f7e611c14c87ccff6793920aa63510b01fc0e229d6dd014149b8829"
-   }
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}

examples/example_input_files/bivalent_promoters/bivalent_gene_labels.txt DELETED Viewed

@@ -1,107 +0,0 @@
-ENSG00000005073
-ENSG00000007372
-ENSG00000007372
-ENSG00000043355
-ENSG00000068120
-ENSG00000075891
-ENSG00000078399
-ENSG00000105991
-ENSG00000105996
-ENSG00000105997
-ENSG00000106004
-ENSG00000106006
-ENSG00000106031
-ENSG00000106038
-ENSG00000107807
-ENSG00000107821
-ENSG00000107831
-ENSG00000107859
-ENSG00000107862
-ENSG00000108511
-ENSG00000108786
-ENSG00000108797
-ENSG00000110693
-ENSG00000110693
-ENSG00000113430
-ENSG00000115844
-ENSG00000117707
-ENSG00000117707
-ENSG00000119915
-ENSG00000120068
-ENSG00000120075
-ENSG00000120093
-ENSG00000120093
-ENSG00000120094
-ENSG00000122592
-ENSG00000125285
-ENSG00000125798
-ENSG00000125813
-ENSG00000125813
-ENSG00000125816
-ENSG00000125820
-ENSG00000128573
-ENSG00000128645
-ENSG00000128652
-ENSG00000128709
-ENSG00000128710
-ENSG00000128713
-ENSG00000128714
-ENSG00000129514
-ENSG00000131196
-ENSG00000131196
-ENSG00000136327
-ENSG00000136944
-ENSG00000138083
-ENSG00000139800
-ENSG00000143013
-ENSG00000143632
-ENSG00000144355
-ENSG00000148680
-ENSG00000148826
-ENSG00000151615
-ENSG00000152192
-ENSG00000152977
-ENSG00000159184
-ENSG00000159387
-ENSG00000163412
-ENSG00000163421
-ENSG00000163623
-ENSG00000164330
-ENSG00000164438
-ENSG00000164690
-ENSG00000164778
-ENSG00000165588
-ENSG00000165588
-ENSG00000165588
-ENSG00000166407
-ENSG00000166407
-ENSG00000168505
-ENSG00000168875
-ENSG00000169946
-ENSG00000170166
-ENSG00000170178
-ENSG00000170549
-ENSG00000170561
-ENSG00000170577
-ENSG00000170689
-ENSG00000173917
-ENSG00000174279
-ENSG00000174963
-ENSG00000174963
-ENSG00000175879
-ENSG00000176842
-ENSG00000177508
-ENSG00000178573
-ENSG00000182568
-ENSG00000182742
-ENSG00000185551
-ENSG00000185551
-ENSG00000187140
-ENSG00000196092
-ENSG00000197576
-ENSG00000198807
-ENSG00000253293
-ENSG00000256463
-ENSG00000260027
-ENSG00000276644
-ENSG00000285708

examples/example_input_files/bivalent_promoters/lys4_only_gene_labels.txt DELETED Viewed

@@ -1,80 +0,0 @@
-ENSG00000012048
-ENSG00000033627
-ENSG00000037042
-ENSG00000055950
-ENSG00000067596
-ENSG00000069248
-ENSG00000072682
-ENSG00000085274
-ENSG00000088035
-ENSG00000088930
-ENSG00000095539
-ENSG00000102471
-ENSG00000102967
-ENSG00000104313
-ENSG00000105146
-ENSG00000105379
-ENSG00000105982
-ENSG00000105983
-ENSG00000107816
-ENSG00000107819
-ENSG00000107829
-ENSG00000107833
-ENSG00000108784
-ENSG00000108799
-ENSG00000108828
-ENSG00000108830
-ENSG00000109911
-ENSG00000113522
-ENSG00000119487
-ENSG00000120049
-ENSG00000125347
-ENSG00000126581
-ENSG00000131374
-ENSG00000131437
-ENSG00000131462
-ENSG00000131467
-ENSG00000131469
-ENSG00000131470
-ENSG00000131475
-ENSG00000131477
-ENSG00000135272
-ENSG00000135776
-ENSG00000135801
-ENSG00000136158
-ENSG00000140262
-ENSG00000140450
-ENSG00000140563
-ENSG00000140829
-ENSG00000140830
-ENSG00000145494
-ENSG00000146909
-ENSG00000147905
-ENSG00000148688
-ENSG00000148840
-ENSG00000148950
-ENSG00000151332
-ENSG00000151338
-ENSG00000165637
-ENSG00000165644
-ENSG00000166135
-ENSG00000166136
-ENSG00000166167
-ENSG00000166169
-ENSG00000166189
-ENSG00000166197
-ENSG00000166377
-ENSG00000167081
-ENSG00000168118
-ENSG00000171421
-ENSG00000175832
-ENSG00000186480
-ENSG00000187098
-ENSG00000188554
-ENSG00000196628
-ENSG00000196628
-ENSG00000198728
-ENSG00000198728
-ENSG00000198863
-ENSG00000285283
-ENSG00000285708

examples/example_input_files/bivalent_promoters/no_methylation_gene_labels.txt DELETED Viewed

@@ -1,42 +0,0 @@
-ENSG00000068079
-ENSG00000068383
-ENSG00000075290
-ENSG00000104313
-ENSG00000105370
-ENSG00000105374
-ENSG00000105383
-ENSG00000106536
-ENSG00000113520
-ENSG00000113525
-ENSG00000118557
-ENSG00000125257
-ENSG00000128573
-ENSG00000131471
-ENSG00000131480
-ENSG00000131482
-ENSG00000134532
-ENSG00000136319
-ENSG00000138792
-ENSG00000140262
-ENSG00000140718
-ENSG00000147488
-ENSG00000147488
-ENSG00000148677
-ENSG00000151322
-ENSG00000151322
-ENSG00000156113
-ENSG00000164399
-ENSG00000164400
-ENSG00000167749
-ENSG00000167754
-ENSG00000167755
-ENSG00000169035
-ENSG00000170927
-ENSG00000182177
-ENSG00000186153
-ENSG00000187098
-ENSG00000204764
-ENSG00000213022
-ENSG00000213822
-ENSG00000261701
-ENSG00000285708

examples/example_input_files/dosage_sensitive_tfs/dosage_sens_tf_labels.csv DELETED Viewed

@@ -1,369 +0,0 @@
-dosage_sensitive,dosage_insensitive
-ENSG00000008197,ENSG00000010539
-ENSG00000008441,ENSG00000011590
-ENSG00000010818,ENSG00000063438
-ENSG00000011332,ENSG00000063587
-ENSG00000030419,ENSG00000064218
-ENSG00000062194,ENSG00000064489
-ENSG00000065970,ENSG00000067646
-ENSG00000067082,ENSG00000075407
-ENSG00000069667,ENSG00000079263
-ENSG00000072736,ENSG00000081386
-ENSG00000073282,ENSG00000083812
-ENSG00000073861,ENSG00000083814
-ENSG00000077092,ENSG00000083828
-ENSG00000083307,ENSG00000083838
-ENSG00000084676,ENSG00000083844
-ENSG00000085276,ENSG00000085644
-ENSG00000087510,ENSG00000089335
-ENSG00000087903,ENSG00000089775
-ENSG00000089225,ENSG00000102901
-ENSG00000091656,ENSG00000103199
-ENSG00000091831,ENSG00000105136
-ENSG00000095951,ENSG00000105610
-ENSG00000100644,ENSG00000105672
-ENSG00000100968,ENSG00000106410
-ENSG00000101076,ENSG00000106948
-ENSG00000101412,ENSG00000109705
-ENSG00000102870,ENSG00000115568
-ENSG00000102935,ENSG00000117010
-ENSG00000103449,ENSG00000118620
-ENSG00000105698,ENSG00000119574
-ENSG00000105866,ENSG00000120669
-ENSG00000106689,ENSG00000121406
-ENSG00000106852,ENSG00000121864
-ENSG00000111249,ENSG00000122085
-ENSG00000111783,ENSG00000124203
-ENSG00000112033,ENSG00000124232
-ENSG00000112246,ENSG00000124444
-ENSG00000112561,ENSG00000124613
-ENSG00000112837,ENSG00000125520
-ENSG00000115112,ENSG00000127081
-ENSG00000116809,ENSG00000127903
-ENSG00000116833,ENSG00000127989
-ENSG00000117000,ENSG00000129028
-ENSG00000118263,ENSG00000129071
-ENSG00000118922,ENSG00000129194
-ENSG00000119547,ENSG00000130544
-ENSG00000120798,ENSG00000130818
-ENSG00000121068,ENSG00000131848
-ENSG00000123358,ENSG00000132010
-ENSG00000123411,ENSG00000132846
-ENSG00000124496,ENSG00000133250
-ENSG00000124813,ENSG00000134874
-ENSG00000125398,ENSG00000135899
-ENSG00000125618,ENSG00000136866
-ENSG00000126368,ENSG00000137185
-ENSG00000127152,ENSG00000137504
-ENSG00000128573,ENSG00000138380
-ENSG00000129173,ENSG00000140993
-ENSG00000131759,ENSG00000141946
-ENSG00000132005,ENSG00000142556
-ENSG00000133794,ENSG00000143067
-ENSG00000134046,ENSG00000144026
-ENSG00000134317,ENSG00000144161
-ENSG00000134323,ENSG00000145908
-ENSG00000134852,ENSG00000146587
-ENSG00000135111,ENSG00000147183
-ENSG00000137203,ENSG00000147789
-ENSG00000137270,ENSG00000148300
-ENSG00000138795,ENSG00000149054
-ENSG00000139083,ENSG00000149922
-ENSG00000139793,ENSG00000151500
-ENSG00000140548,ENSG00000151650
-ENSG00000140968,ENSG00000151657
-ENSG00000142611,ENSG00000152439
-ENSG00000143033,ENSG00000152467
-ENSG00000143171,ENSG00000152475
-ENSG00000143190,ENSG00000153975
-ENSG00000143355,ENSG00000155592
-ENSG00000143365,ENSG00000156469
-ENSG00000143373,ENSG00000157429
-ENSG00000143437,ENSG00000159882
-ENSG00000144355,ENSG00000159885
-ENSG00000147862,ENSG00000159915
-ENSG00000148516,ENSG00000160224
-ENSG00000150907,ENSG00000160229
-ENSG00000151090,ENSG00000160352
-ENSG00000153234,ENSG00000160908
-ENSG00000158055,ENSG00000160961
-ENSG00000160007,ENSG00000161277
-ENSG00000160094,ENSG00000162086
-ENSG00000161405,ENSG00000163516
-ENSG00000162761,ENSG00000164011
-ENSG00000162924,ENSG00000164048
-ENSG00000164683,ENSG00000164296
-ENSG00000164684,ENSG00000164299
-ENSG00000167182,ENSG00000165066
-ENSG00000168610,ENSG00000165512
-ENSG00000168916,ENSG00000165643
-ENSG00000169554,ENSG00000165684
-ENSG00000169946,ENSG00000166529
-ENSG00000170370,ENSG00000166823
-ENSG00000172733,ENSG00000166860
-ENSG00000172819,ENSG00000167034
-ENSG00000177463,ENSG00000167384
-ENSG00000178177,ENSG00000167554
-ENSG00000179348,ENSG00000167625
-ENSG00000179361,ENSG00000167785
-ENSG00000179456,ENSG00000167800
-ENSG00000180357,ENSG00000167840
-ENSG00000185551,ENSG00000167962
-ENSG00000185591,ENSG00000167981
-ENSG00000187098,ENSG00000168152
-ENSG00000187605,ENSG00000168286
-ENSG00000189308,ENSG00000168769
-ENSG00000196092,ENSG00000169131
-ENSG00000196482,ENSG00000169136
-ENSG00000196628,ENSG00000169548
-ENSG00000197757,ENSG00000169951
-ENSG00000198815,ENSG00000169955
-ENSG00000198945,ENSG00000169989
-ENSG00000198963,ENSG00000170260
-ENSG00000204231,ENSG00000170608
-,ENSG00000170954
-,ENSG00000171291
-,ENSG00000171295
-,ENSG00000171425
-,ENSG00000171443
-,ENSG00000171466
-,ENSG00000171469
-,ENSG00000171574
-,ENSG00000171606
-,ENSG00000171827
-,ENSG00000171872
-,ENSG00000171970
-,ENSG00000172000
-,ENSG00000172888
-,ENSG00000173041
-,ENSG00000173258
-,ENSG00000173480
-,ENSG00000173673
-,ENSG00000173825
-,ENSG00000174255
-,ENSG00000174652
-,ENSG00000174796
-,ENSG00000175279
-,ENSG00000175325
-,ENSG00000175395
-,ENSG00000175691
-,ENSG00000176009
-,ENSG00000176024
-,ENSG00000176083
-,ENSG00000176222
-,ENSG00000176302
-,ENSG00000176472
-,ENSG00000176678
-,ENSG00000176679
-,ENSG00000177030
-,ENSG00000177494
-,ENSG00000177599
-,ENSG00000177683
-,ENSG00000177842
-,ENSG00000177873
-,ENSG00000177932
-,ENSG00000177946
-,ENSG00000178150
-,ENSG00000178229
-,ENSG00000178338
-,ENSG00000178386
-,ENSG00000178665
-,ENSG00000178917
-,ENSG00000178928
-,ENSG00000178935
-,ENSG00000179195
-,ENSG00000179772
-,ENSG00000179774
-,ENSG00000179886
-,ENSG00000179909
-,ENSG00000179922
-,ENSG00000179930
-,ENSG00000179943
-,ENSG00000179965
-,ENSG00000180257
-,ENSG00000180346
-,ENSG00000180532
-,ENSG00000180535
-,ENSG00000180938
-,ENSG00000181135
-,ENSG00000181444
-,ENSG00000181450
-,ENSG00000181638
-,ENSG00000181894
-,ENSG00000181896
-,ENSG00000182318
-,ENSG00000182983
-,ENSG00000182986
-,ENSG00000183340
-,ENSG00000183647
-,ENSG00000183734
-,ENSG00000183850
-,ENSG00000184221
-,ENSG00000184517
-,ENSG00000184635
-,ENSG00000184677
-,ENSG00000184895
-,ENSG00000185155
-,ENSG00000185252
-,ENSG00000185404
-,ENSG00000185730
-,ENSG00000186020
-,ENSG00000186026
-,ENSG00000186051
-,ENSG00000186103
-,ENSG00000186230
-,ENSG00000186300
-,ENSG00000186376
-,ENSG00000186446
-,ENSG00000186496
-,ENSG00000186777
-,ENSG00000186812
-,ENSG00000186814
-,ENSG00000187626
-,ENSG00000187801
-,ENSG00000187821
-,ENSG00000187855
-,ENSG00000187987
-,ENSG00000188033
-,ENSG00000188095
-,ENSG00000188171
-,ENSG00000188295
-,ENSG00000188321
-,ENSG00000188629
-,ENSG00000188785
-,ENSG00000188868
-,ENSG00000189164
-,ENSG00000189190
-,ENSG00000189298
-,ENSG00000189299
-,ENSG00000196152
-,ENSG00000196172
-,ENSG00000196214
-,ENSG00000196345
-,ENSG00000196357
-,ENSG00000196378
-,ENSG00000196381
-,ENSG00000196387
-,ENSG00000196391
-,ENSG00000196417
-,ENSG00000196418
-,ENSG00000196456
-,ENSG00000196460
-,ENSG00000196466
-,ENSG00000196605
-,ENSG00000196646
-,ENSG00000196652
-,ENSG00000196670
-,ENSG00000196693
-,ENSG00000196705
-,ENSG00000196812
-,ENSG00000196946
-,ENSG00000197008
-,ENSG00000197020
-,ENSG00000197037
-,ENSG00000197044
-,ENSG00000197054
-,ENSG00000197124
-,ENSG00000197134
-,ENSG00000197162
-,ENSG00000197213
-,ENSG00000197279
-,ENSG00000197343
-,ENSG00000197360
-,ENSG00000197363
-,ENSG00000197472
-,ENSG00000197779
-,ENSG00000197841
-,ENSG00000197857
-,ENSG00000197863
-,ENSG00000197928
-,ENSG00000197933
-,ENSG00000197951
-,ENSG00000198028
-,ENSG00000198039
-,ENSG00000198046
-,ENSG00000198185
-,ENSG00000198205
-,ENSG00000198300
-,ENSG00000198315
-,ENSG00000198342
-,ENSG00000198346
-,ENSG00000198429
-,ENSG00000198440
-,ENSG00000198464
-,ENSG00000198466
-,ENSG00000198482
-,ENSG00000198538
-,ENSG00000198546
-,ENSG00000198551
-,ENSG00000198556
-,ENSG00000198633
-,ENSG00000198939
-,ENSG00000203326
-,ENSG00000204514
-,ENSG00000204519
-,ENSG00000204532
-,ENSG00000204595
-,ENSG00000204604
-,ENSG00000204644
-,ENSG00000204946
-,ENSG00000213020
-,ENSG00000213799
-,ENSG00000213973
-,ENSG00000213988
-,ENSG00000214189
-,ENSG00000215271
-,ENSG00000215372
-,ENSG00000215612
-,ENSG00000220201
-,ENSG00000221923
-,ENSG00000223547
-,ENSG00000227124
-,ENSG00000229676
-,ENSG00000229809
-,ENSG00000230797
-,ENSG00000232040
-,ENSG00000234284
-,ENSG00000234444
-,ENSG00000235109
-,ENSG00000235608
-,ENSG00000236104
-,ENSG00000236609
-,ENSG00000237440
-,ENSG00000242852
-,ENSG00000243660
-,ENSG00000245680
-,ENSG00000248483
-,ENSG00000249459
-,ENSG00000249471
-,ENSG00000249709
-,ENSG00000250571
-,ENSG00000250709
-,ENSG00000251192
-,ENSG00000251247
-,ENSG00000251369
-,ENSG00000253831
-,ENSG00000254004
-,ENSG00000256087
-,ENSG00000256223
-,ENSG00000256229
-,ENSG00000256294
-,ENSG00000256463
-,ENSG00000256683
-,ENSG00000256771
-,ENSG00000257446
-,ENSG00000257591
-,ENSG00000258405
-,ENSG00000258873
-,ENSG00000263002
-,ENSG00000264668
-,ENSG00000265763
-,ENSG00000267041
-,ENSG00000267179
-,ENSG00000267281
-,ENSG00000267508
-,ENSG00000267680
-,ENSG00000269067
-,ENSG00000269343
-,ENSG00000269699
-,ENSG00000272602

examples/example_input_files/gene_info_table.csv DELETED Viewed

The diff for this file is too large to render. See raw diff