juliocesar-io commited on
Commit
b6f1234
1 Parent(s): 5b85ed1

Added initial app

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitignore +4 -0
  2. Dockerfile +44 -0
  3. README.md +121 -11
  4. app.py +154 -0
  5. data/__init__.py +4 -0
  6. data/dataset.py +418 -0
  7. data/dataset_saliency.py +378 -0
  8. data/datasets/AD/Inference.csv +1 -0
  9. data/datasets/AD/Smiles_AD_1.csv +0 -0
  10. data/datasets/AD/Smiles_AD_2.csv +0 -0
  11. data/datasets/AD/Smiles_AD_3.csv +0 -0
  12. data/datasets/AD/Smiles_AD_4.csv +0 -0
  13. data/datasets/AD/Smiles_AD_Test.csv +0 -0
  14. data/datasets/AD/Targets_Fasta.csv +102 -0
  15. data/datasets/AD/saliency.csv +1 -0
  16. data/datasets/DUDE/Smiles_1.csv +0 -0
  17. data/datasets/DUDE/Smiles_2.csv +0 -0
  18. data/datasets/DUDE/Smiles_3.csv +0 -0
  19. data/datasets/DUDE/Smiles_4.csv +0 -0
  20. data/datasets/DUDE/Smiles_Test.csv +0 -0
  21. data/features.py +136 -0
  22. example/input_smiles.csv +4 -0
  23. gcn_lib/__init__.py +0 -0
  24. gcn_lib/dense/__init__.py +4 -0
  25. gcn_lib/dense/torch_edge.py +101 -0
  26. gcn_lib/dense/torch_nn.py +93 -0
  27. gcn_lib/dense/torch_vertex.py +115 -0
  28. gcn_lib/sparse/__init__.py +4 -0
  29. gcn_lib/sparse/torch_edge.py +113 -0
  30. gcn_lib/sparse/torch_message.py +98 -0
  31. gcn_lib/sparse/torch_nn.py +160 -0
  32. gcn_lib/sparse/torch_vertex.py +355 -0
  33. gradio/title.md +19 -0
  34. model/__init__.py +4 -0
  35. model/model.py +246 -0
  36. model/model_concatenation.py +92 -0
  37. model/model_encoder.py +54 -0
  38. pretrained-models/BINARY_ada/Fold1/Best_Model.pth +3 -0
  39. pretrained-models/BINARY_ada/Fold2/Best_Model.pth +3 -0
  40. pretrained-models/BINARY_ada/Fold3/Best_Model.pth +3 -0
  41. pretrained-models/BINARY_ada/Fold4/Best_Model.pth +3 -0
  42. requirements.txt +9 -0
  43. scripts/__init__.py +0 -0
  44. scripts/model/__init__.py +4 -0
  45. scripts/model/model.py +246 -0
  46. scripts/model/model_concatenation.py +92 -0
  47. scripts/model/model_encoder.py +54 -0
  48. scripts/pla_net_inference.py +82 -0
  49. setup.py +14 -0
  50. utils/__init__.py +4 -0
.gitignore ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ __pycache__
2
+ log
3
+ tmp
4
+ output_predictions.csv
Dockerfile ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use NVIDIA PyTorch image as the base
2
+ FROM nvcr.io/nvidia/pytorch:22.03-py3
3
+
4
+ RUN apt-get update && apt-get install -y libxrender1
5
+
6
+ # Base pytorch
7
+ RUN conda install pytorch==1.12.0 torchvision==0.13.0 torchaudio==0.12.0 cudatoolkit=11.6 -c pytorch -c conda-forge
8
+
9
+ # Set required versions for each core dependency using cu116
10
+ RUN pip install torch-scatter==2.0.9 torch-sparse==0.6.14 torch-cluster==1.6.0 torch-spline-conv==1.2.1 torch-geometric==2.1.0 -f https://data.pyg.org/whl/torch-1.12.0+cu116.html
11
+
12
+ # Create a new user named "user" with UID 1000
13
+ RUN useradd -m -u 1000 user
14
+
15
+ # Set environment variables
16
+ ENV PYTHONUNBUFFERED=1 \
17
+ GRADIO_ALLOW_FLAGGING=never \
18
+ GRADIO_NUM_PORTS=1 \
19
+ GRADIO_SERVER_NAME=0.0.0.0 \
20
+ GRADIO_THEME=huggingface \
21
+ SYSTEM=spaces \
22
+ HOME=/home/user \
23
+ PATH=/home/user/.local/bin:$PATH
24
+
25
+ # Set the working directory to the user's app directory as root
26
+ WORKDIR $HOME/app
27
+
28
+ # Copy the current directory contents into the container at $HOME/app
29
+ COPY . $HOME/app
30
+
31
+ # Change ownership of the app directory to "user"
32
+ RUN chown -R user:user $HOME/app
33
+
34
+ # Switch to the "user" user
35
+ USER user
36
+
37
+ # Upgrade pip as the user
38
+ RUN pip install --no-cache-dir --upgrade pip
39
+
40
+ # Install the local package as the user
41
+ RUN pip install --user .
42
+
43
+ # Set the default command to bash
44
+ CMD ["/bin/bash"]
README.md CHANGED
@@ -1,11 +1,121 @@
1
- ---
2
- title: PLA Net
3
- emoji: 📈
4
- colorFrom: red
5
- colorTo: gray
6
- sdk: docker
7
- pinned: false
8
- license: mit
9
- ---
10
-
11
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # PLA-Net: Predicting Protein-Ligand Interactions with Deep Graph Networks
2
+
3
+ Forked version of [PLA-Net](https://github.com/BCV-Uniandes/PLA-Net)
4
+
5
+ ## Background
6
+
7
+ **PLA-Net** is a deep learning model designed to predict interactions between small organic molecules (ligands) and any of the 102 target proteins in the Alzheimer's Disease (AD) dataset. By transforming molecular and protein sequences into graph representations, PLA-Net leverages Graph Convolutional Networks (GCNs) to analyze and predict target-ligand interaction probabilities. Developed by [BCV-Uniandes](https://github.com/BCV-Uniandes/PLA-Net).
8
+
9
+ ## Key Features
10
+
11
+ - **Graph-Based Input Representation**
12
+ - **Ligand Module (LM):** Converts SMILES sequences of molecules into graph representations.
13
+ - **Protein Module (PM):** Transforms FASTA sequences of proteins into graph structures.
14
+
15
+ - **Deep Graph Convolutional Networks**
16
+ - Each module employs a deep GCN followed by an average pooling layer to extract meaningful features from the input graphs.
17
+
18
+ - **Interaction Prediction**
19
+ - The feature representations from the LM and PM are concatenated.
20
+ - A fully connected layer processes the combined features to predict the interaction probability between the ligand and the target protein.
21
+
22
+ ## Quick Start
23
+
24
+ If you want to run PLA-Net without installing it, you can run it freely on this [Hugging Face Space](https://huggingface.co/spaces/juliocesar-io/PLA-Net).
25
+
26
+ ## Docker Install
27
+
28
+ To prevent conflicts with the host machine, it is recommended to run PLA-Net in a Docker container.
29
+
30
+ First make sure you have an NVIDIA GPU and [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html) installed. Then build the image with the following command:
31
+
32
+ ```bash
33
+ docker build -t pla-net:latest .
34
+ ```
35
+
36
+ ### Inference
37
+
38
+ To run inference, run the following command:
39
+
40
+ This will run inference for the target protein `ada` with the SMILES in the `input_smiles.csv` file and save the predictions to the `output_predictions.csv` file.
41
+
42
+ The prediction file has the following format:
43
+
44
+ ```bash
45
+ target,smiles,interaction_probability,interaction_class
46
+ ada,Cn4c(CCC(=O)Nc3ccc2ccn(CC[C@H](CO)n1cnc(C(N)=O)c1)c2c3)nc5ccccc45,0.9994347542524338,1
47
+ ```
48
+
49
+ Where `interaction_class` is 1 if the interaction probability is greater than 0.5, and 0 otherwise.
50
+
51
+ ```bash
52
+ docker run \
53
+ -it --rm --gpus all \
54
+ -v "$(pwd)":/home/user/output \
55
+ pla-net:latest \
56
+ python /home/user/app/scripts/pla_net_inference.py \
57
+ --use_gpu \
58
+ --target ada \
59
+ --target_list /home/user/app/data/datasets/AD/Targets_Fasta.csv \
60
+ --target_checkpoint_path /home/user/app/pretrained-models/BINARY_ada \
61
+ --input_file_smiles /home/user/app/example/input_smiles.csv \
62
+ --output_file /home/user/output/output_predictions.csv
63
+ ```
64
+
65
+ Args:
66
+
67
+ - `use_gpu`: Use GPU for inference.
68
+ - `target`: Target protein ID from the list of targets. Check the list of available targets in the [data](https://github.com/juliocesar-io/PLA-Net/blob/main/data/datasets/AD/Targets_Fasta.csv) folder.
69
+ - `target_list`: Path to the target list CSV file.
70
+ - `target_checkpoint_path`: Path to the target checkpoint. (e.g. `/workspace/pretrained-models/BINARY_ada`) one checkpoint for each target.
71
+ - `input_file_smiles`: Path to the input SMILES file.
72
+ - `output_file`: Path to the output predictions file.
73
+
74
+
75
+ ### Gradio Server
76
+ We provide a simple graphical user interface to run PLA-Net with Gradio. To use it, run the following command:
77
+
78
+ ```bash
79
+ docker run \
80
+ -it --rm --gpus all \
81
+ -p 7860:7860 \
82
+ pla-net:latest \
83
+ python app.py
84
+ ```
85
+
86
+ Then open your browser and go to `http://localhost:7860/` to access the web interface.
87
+
88
+
89
+ ## Local Install
90
+
91
+ To do inference with PLA-Net, you need to install the dependencies and activate the environment. You can do this by running the following commands:
92
+
93
+ ```bash
94
+ conda env create -f environment.yml
95
+ conda activate pla-net
96
+ ```
97
+
98
+ Now you can run inference with PLA-Net locally. In the project folder, run the following command:
99
+
100
+ ```bash
101
+ python scripts/pla_net_inference.py \
102
+ --use_gpu \
103
+ --target ada \
104
+ --target_list data/datasets/AD/Targets_Fasta.csv \
105
+ --target_checkpoint_path pretrained-models/BINARY_ada \
106
+ --input_file_smiles example/input_smiles.csv \
107
+ --output_file example/output_predictions.csv
108
+ ```
109
+
110
+ ## Models
111
+
112
+ You can download the pre-trained models from [Hugging Face](https://huggingface.co/juliocesar-io/PLA-Net).
113
+ ## Training
114
+
115
+ To train each of the components of our method: LM, LM+Advs, LMPM and PLA-Net please refer to planet.sh file and run the desired models.
116
+
117
+ To evaluate each of the components of our method: LM, LM+Advs, LMPM and PLA-Net please run the corresponding bash file in the inference folder.
118
+
119
+ ## Citation
120
+
121
+ Ruiz Puentes, P., Rueda-Gensini, L., Valderrama, N. et al. Predicting target–ligand interactions with graph convolutional networks for interpretable pharmaceutical discovery. Sci Rep 12, 8434 (2022). https://doi.org/10.1038/s41598-022-12180-x
app.py ADDED
@@ -0,0 +1,154 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import uuid
2
+ import gradio as gr
3
+ import torch
4
+ import os
5
+ import pandas as pd
6
+ from rdkit import Chem
7
+ from scripts.pla_net_inference import main
8
+ from utils.args import ArgsInit
9
+
10
+ os.system("nvidia-smi")
11
+ print("TORCH_CUDA", torch.cuda.is_available())
12
+
13
+ PROJECT_URL = "https://www.nature.com/articles/s41598-022-12180-x"
14
+
15
+ DEFAULT_PATH_DOCKER = "/home/user/app"
16
+
17
+ def load_and_filter_data(protein_id, ligand_smiles):
18
+
19
+ # generate random short id, make short
20
+ random_id = str(uuid.uuid4())[:8]
21
+
22
+ print("Inference ID: ", random_id)
23
+
24
+ # check that ligand_smiles is not empty
25
+ if not ligand_smiles or ligand_smiles.strip() == "":
26
+ error_msg = f"!SMILES string is required"
27
+ raise gr.Error(error_msg, duration=5)
28
+
29
+ # Split the input SMILES string by ':' to get a list
30
+ smiles_list = ligand_smiles.split(':')
31
+
32
+
33
+
34
+ print("Smiles to predict: ", smiles_list)
35
+ print("Target Protein ID: ", protein_id)
36
+
37
+ # Validate SMILES
38
+ invalid_smiles = []
39
+ for smiles in smiles_list:
40
+ mol = Chem.MolFromSmiles(smiles.strip())
41
+ if mol is None:
42
+ invalid_smiles.append(smiles.strip())
43
+
44
+
45
+
46
+ if invalid_smiles:
47
+ error_msg = f"!Invalid 💥 SMILES string(s) : {', '.join(invalid_smiles)}"
48
+ raise gr.Error(error_msg, duration=5)
49
+
50
+ # Create tmp folder
51
+ os.makedirs(f"{DEFAULT_PATH_DOCKER}/example/tmp", exist_ok=True)
52
+
53
+ # Save SMILES to CSV
54
+ df = pd.DataFrame({"smiles": [s.strip() for s in smiles_list if s.strip()]})
55
+ df.to_csv(f"{DEFAULT_PATH_DOCKER}/example/tmp/{random_id}_input_smiles.csv", index=False)
56
+
57
+ # Run inference
58
+ args = ArgsInit().args
59
+ args.nclasses = 2
60
+ args.batch_size = 10
61
+ args.use_prot = True
62
+ args.freeze_molecule = True
63
+ args.conv_encode_edge = True
64
+ args.learn_t = True
65
+ args.binary = True
66
+
67
+ args.use_gpu = True
68
+ args.target = protein_id
69
+ args.target_list = f"{DEFAULT_PATH_DOCKER}/data/datasets/AD/Targets_Fasta.csv"
70
+ args.target_checkpoint_path = f"{DEFAULT_PATH_DOCKER}/pretrained-models/BINARY_{protein_id}"
71
+ args.input_file_smiles = f"{DEFAULT_PATH_DOCKER}/example/tmp/{random_id}_input_smiles.csv"
72
+ args.output_file = f"{DEFAULT_PATH_DOCKER}/example/tmp/{random_id}_output_predictions.csv"
73
+
74
+
75
+ print("Args: ", args)
76
+ main(args)
77
+
78
+ # Load the CSV file
79
+ df = pd.read_csv(f'{DEFAULT_PATH_DOCKER}/example/tmp/{random_id}_output_predictions.csv')
80
+
81
+ print("Prediction Results output: ", df)
82
+ return df
83
+
84
+ def load_description(fp):
85
+ with open(fp, 'r', encoding='utf-8') as f:
86
+ content = f.read()
87
+ return content
88
+
89
+ def run_inference(protein_id, ligand_smile):
90
+ result_df = load_and_filter_data(protein_id, ligand_smile)
91
+ return result_df
92
+
93
+ def create_interface():
94
+ with gr.Blocks(title="PLA-Net Web Inference") as inference:
95
+ gr.HTML(load_description("gradio/title.md"))
96
+
97
+ gr.Markdown("### Input")
98
+ with gr.Row():
99
+ with gr.Column():
100
+ gr.Markdown("#### Target Protein")
101
+ protein_id = gr.Dropdown(
102
+ choices=["ada"],
103
+ label="Target Protein ID",
104
+ info="Select the target protein from the dropdown menu.",
105
+ value="ada"
106
+ )
107
+ with gr.Column():
108
+ gr.Markdown("#### Ligand")
109
+ ligand_smile = gr.Textbox(
110
+ info="Provide SMILES input (separate multiple SMILES with ':' )",
111
+ placeholder="SMILES input",
112
+ label="SMILES string(s)",
113
+ )
114
+ gr.Examples(
115
+ examples=[
116
+ "Cn4c(CCC(=O)Nc3ccc2ccn(CC[C@H](CO)n1cnc(C(N)=O)c1)c2c3)nc5ccccc45",
117
+ "OCCCCCn1cnc2C(O)CN=CNc12",
118
+ "Nc4nc(c1ccco1)c3ncn(C(=O)NCCc2ccccc2)c3n4"
119
+ ],
120
+ inputs=ligand_smile,
121
+ label="Example SMILES"
122
+ )
123
+ btn = gr.Button("Run")
124
+ gr.Markdown("### Output")
125
+ out = gr.Dataframe(
126
+ headers=["target", "smiles", "interaction_probability", "interaction_class"],
127
+ datatype=["str", "str", "number", "number"],
128
+ label="Prediction Results"
129
+ )
130
+
131
+ btn.click(fn=run_inference, inputs=[protein_id, ligand_smile], outputs=out)
132
+
133
+ gr.Markdown("""
134
+ PLA-Net model for predicting interactions
135
+ between small organic molecules and one of the 102 target proteins in the AD dataset. Graph representations
136
+ of the molecule and a given target protein are generated from SMILES and FASTA sequences and are used as
137
+ input to the Ligand Module (LM) and Protein Module (PM), respectively. Each module comprises a deep GCN
138
+ followed by an average pooling layer, which extracts relevant features of their corresponding input graph. Both
139
+ representations are finally concatenated and combined through a fully connected layer to predict the target–
140
+ ligand interaction probability.
141
+ """)
142
+
143
+ gr.Markdown("""
144
+ Ruiz Puentes, P., Rueda-Gensini, L., Valderrama, N. et al.
145
+ Predicting target–ligand interactions with graph convolutional networks
146
+ for interpretable pharmaceutical discovery. Sci Rep 12, 8434 (2022).
147
+ [https://doi.org/10.1038/s41598-022-12180-x](https://doi.org/10.1038/s41598-022-12180-x)
148
+ """)
149
+
150
+ return inference
151
+
152
+ if __name__ == "__main__":
153
+ interface = create_interface()
154
+ interface.launch()
data/__init__.py ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ import sys
2
+ import os
3
+ ROOT_DIR = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
4
+ sys.path.append(ROOT_DIR)
data/dataset.py ADDED
@@ -0,0 +1,418 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import shutil, os
3
+ import os.path as osp
4
+ import numpy as np
5
+ from tqdm import tqdm
6
+
7
+ import torch
8
+ from torch_geometric.data import Data
9
+ from torch.autograd import Variable
10
+
11
+ from rdkit import Chem
12
+
13
+ from data.features import (
14
+ allowable_features,
15
+ atom_to_feature_vector,
16
+ bond_to_feature_vector,
17
+ atom_feature_vector_to_dict,
18
+ bond_feature_vector_to_dict,
19
+ )
20
+
21
+ from utils.data_util import one_hot_vector_sm, one_hot_vector_am, get_atom_feature_dims
22
+
23
+
24
+ def load_dataset(
25
+ cross_val, binary_task, target, args, use_prot=False, advs=False, test=False, inference=False, saliency=False
26
+ ):
27
+ """
28
+ Load data and return data in dataframes format for each split and the loader of each split.
29
+ Args:
30
+ cross_val (int): Data partition being used [1-4].
31
+ binary_tast (boolean): Whether to perform binary classification or multiclass classification.
32
+ target (string): Name of the protein target for binary classification.
33
+ args (parser): Complete arguments (configuration) of the model.
34
+ use_prot (boolean): Whether to use the PM module.
35
+ advs (boolean): Whether to train the LM module with adversarial augmentations.
36
+ test (boolean): Whether the model is being tested or trained.
37
+ Return:
38
+ train (loader): Training loader
39
+ valid (loader): Validation loader
40
+ test (loader): Test loader
41
+ data_train (dataframe): Training data dataframe
42
+ data_valid (dataframe): Validation data dataframe
43
+ data_test (dataframe): Test data dataframe
44
+
45
+ """
46
+ # Read all data files
47
+ if binary_task:
48
+ path = "data/datasets/AD/"
49
+ add_val = '_AD'
50
+ else:
51
+ path = "data/datasets/DUDE/"
52
+ add_val = ''
53
+
54
+ data_test = pd.read_csv(
55
+ path + f"Smiles{add_val}_Test.csv", names=["Smiles", "Target", "Label"]
56
+ )
57
+
58
+ data_inference = pd.read_csv(
59
+ path + f"Inference.csv", names=["Smiles", "Target", "Label"]
60
+ )
61
+ if not test and not inference:
62
+ # Verify cross validation partition is defined
63
+ assert cross_val in [1, 2, 3, 4], "{} data partition is not defined".format(
64
+ cross_val
65
+ )
66
+ print("Loading data...")
67
+
68
+ A = pd.read_csv(
69
+ path + f"Smiles{add_val}_1.csv", names=["Smiles", "Target", "Label"]
70
+ )
71
+ B = pd.read_csv(
72
+ path + f"Smiles{add_val}_2.csv", names=["Smiles", "Target", "Label"]
73
+ )
74
+ C = pd.read_csv(
75
+ path + f"Smiles{add_val}_3.csv", names=["Smiles", "Target", "Label"]
76
+ )
77
+ D = pd.read_csv(
78
+ path + f"Smiles{add_val}_4.csv", names=["Smiles", "Target", "Label"]
79
+ )
80
+
81
+ if use_prot and binary_task:
82
+ data_target = pd.read_csv(
83
+ path + "Targets_Fasta.csv", names=["Fasta", "Target", "Label"]
84
+ )
85
+ else:
86
+ data_target = []
87
+
88
+ # Generate train and validation splits according to cross validation number
89
+ if cross_val == 1:
90
+ data_train = pd.concat([A, B, C], ignore_index=True)
91
+ data_val = D
92
+ elif cross_val == 2:
93
+ data_train = pd.concat([A, C, D], ignore_index=True)
94
+ data_val = B
95
+ elif cross_val == 3:
96
+ data_train = pd.concat([A, B, D], ignore_index=True)
97
+ data_val = C
98
+ elif cross_val == 4:
99
+ data_train = pd.concat([B, C, D], ignore_index=True)
100
+ data_val = A
101
+
102
+ # If in binary classification select data for the specific target being train
103
+ if binary_task:
104
+ data_train = data_train[data_train.Target == target]
105
+ data_val = data_val[data_val.Target == target]
106
+ data_test = data_test[data_test.Target == target]
107
+ if use_prot:
108
+ data_target = data_target[data_target.Target == target]
109
+
110
+ # Get dataset for each split
111
+ train = get_dataset(data_train, use_prot, data_target, args, advs)
112
+ valid = get_dataset(data_val, use_prot, data_target, args)
113
+ test = get_dataset(data_test, use_prot, data_target, args)
114
+
115
+ elif test and not inference:
116
+ # Read test data file
117
+ data_target = None
118
+ if binary_task:
119
+ data_test = data_test[data_test.Target == target]
120
+ if use_prot:
121
+ data_target = pd.read_csv(
122
+ path + "Targets_Fasta.csv", names=["Fasta", "Target", "Label"]
123
+ )
124
+ data_target = data_target[data_target.Target == target]
125
+
126
+ test = get_dataset(
127
+ data_test,
128
+ target=data_target,
129
+ use_prot=use_prot,
130
+ args=args,
131
+ advs=advs,
132
+ saliency=saliency,
133
+ )
134
+ # No need for these sets in test mode
135
+ train = []
136
+ valid = []
137
+ data_train = []
138
+ data_val = []
139
+
140
+ elif inference:
141
+ # Read inference data file
142
+ data_target = None
143
+ if binary_task:
144
+ data_inference = data_inference[data_inference.Target == target]
145
+ if use_prot:
146
+ data_target = pd.read_csv(
147
+ path + "Targets_Fasta.csv", names=["Fasta", "Target", "Label"]
148
+ )
149
+ data_target = data_target[data_target.Target == target]
150
+
151
+ test = get_dataset(
152
+ data_inference,
153
+ target=data_target,
154
+ use_prot=use_prot,
155
+ args=args,
156
+ advs=advs,
157
+ saliency=args.saliency,
158
+ )
159
+ # No need for these sets in test mode
160
+ train = []
161
+ valid = []
162
+ data_train = []
163
+ data_val = []
164
+
165
+ print("Done.")
166
+ return train, valid, test, data_train, data_val, data_test
167
+
168
+
169
+ def reload_dataset(cross_val, binary_task, target, args, advs=False):
170
+ print("Reloading data")
171
+ args.edge_dict = {}
172
+ if binary_task:
173
+ path = "data/datasets/AD/"
174
+ A = pd.read_csv(path + "Smiles_AD_1.csv", names=["Smiles", "Target", "Label"])
175
+ B = pd.read_csv(path + "Smiles_AD_2.csv", names=["Smiles", "Target", "Label"])
176
+ C = pd.read_csv(path + "Smiles_AD_3.csv", names=["Smiles", "Target", "Label"])
177
+ D = pd.read_csv(path + "Smiles_AD_4.csv", names=["Smiles", "Target", "Label"])
178
+ data_test = pd.read_csv(
179
+ path + "AD_Test.csv", names=["Smiles", "Target", "Label"]
180
+ )
181
+
182
+ if cross_val == 1:
183
+ data_train = pd.concat([A, B, C], ignore_index=True)
184
+ elif cross_val == 2:
185
+ data_train = pd.concat([A, C, D], ignore_index=True)
186
+ elif cross_val == 3:
187
+ data_train = pd.concat([A, B, D], ignore_index=True)
188
+ else:
189
+ data_train = pd.concat([B, C, D], ignore_index=True)
190
+
191
+ if binary_task:
192
+ data_train = data_train[data_train.Target == target]
193
+
194
+ train = get_dataset(data_train, args=args, advs=advs)
195
+ print("Done.")
196
+
197
+ return train, data_train
198
+
199
+
200
+ def smiles_to_graph(smiles_string, is_prot=False, received_mol=False):
201
+ """
202
+ Converts SMILES string to graph Data object
203
+ :input: SMILES string (str)
204
+ :return: graph object
205
+ """
206
+
207
+ if not is_prot:
208
+ mol = Chem.MolFromSmiles(smiles_string)
209
+ else:
210
+ mol = Chem.MolFromFASTA(smiles_string)
211
+ # atoms
212
+ atom_features_list = []
213
+ for atom in mol.GetAtoms():
214
+ ftrs = atom_to_feature_vector(atom)
215
+ atom_features_list.append(ftrs)
216
+
217
+ x = np.array(atom_features_list, dtype=np.int64)
218
+
219
+ # bonds
220
+ num_bond_features = 3 # bond type, bond stereo, is_conjugated
221
+ if len(mol.GetBonds()) > 0: # mol has bonds
222
+ edges_list = []
223
+ edge_features_list = []
224
+ for bond in mol.GetBonds():
225
+ i = bond.GetBeginAtomIdx()
226
+ j = bond.GetEndAtomIdx()
227
+
228
+ edge_feature = bond_to_feature_vector(bond)
229
+
230
+ # add edges in both directions
231
+ edges_list.append((i, j))
232
+ edge_features_list.append(edge_feature)
233
+ edges_list.append((j, i))
234
+ edge_features_list.append(edge_feature)
235
+
236
+ # data.edge_index: Graph connectivity in COO format with shape [2, num_edges]
237
+ edge_index = np.array(edges_list, dtype=np.int64).T
238
+
239
+ # data.edge_attr: Edge feature matrix with shape [num_edges, num_edge_features]
240
+ edge_attr = np.array(edge_features_list, dtype=np.int64)
241
+
242
+ else: # mol has no bonds
243
+ edge_index = np.empty((2, 0), dtype=np.int64)
244
+ edge_attr = np.empty((0, num_bond_features), dtype=np.int64)
245
+
246
+ return edge_attr, edge_index, x
247
+
248
+
249
+ def smiles_to_graph_advs(
250
+ smiles_string, args, advs=False, received_mol=False, saliency=False
251
+ ):
252
+ """
253
+ Converts SMILES string to graph Data object
254
+ :input: SMILES string (str)
255
+ :return: graph object
256
+ """
257
+ if not received_mol:
258
+ mol = Chem.MolFromSmiles(smiles_string)
259
+ else:
260
+ mol = smiles_string
261
+
262
+ # atoms
263
+ atom_features_list = []
264
+ atom_feat_dims = get_atom_feature_dims()
265
+
266
+ for atom in mol.GetAtoms():
267
+ ftrs = atom_to_feature_vector(atom)
268
+ if saliency:
269
+ ftrs_oh = one_hot_vector_am(ftrs, atom_feat_dims)
270
+ atom_features_list.append(torch.unsqueeze(ftrs_oh, 0))
271
+ else:
272
+ atom_features_list.append(ftrs)
273
+
274
+ if saliency:
275
+ x = torch.cat(atom_features_list)
276
+ else:
277
+ x = np.array(atom_features_list, dtype=np.int64)
278
+
279
+ if advs:
280
+ # bonds
281
+ mol_edge_dict = {}
282
+
283
+ num_bond_features = 3 # bond type, bond stereo, is_conjugated
284
+ features_dim1 = torch.eye(5)
285
+ features_dim2 = torch.eye(6)
286
+ features_dim3 = torch.eye(2)
287
+ if len(mol.GetBonds()) > 0: # mol has bonds
288
+ edges_list = []
289
+ edge_features_list = []
290
+ for bond in mol.GetBonds():
291
+ i = bond.GetBeginAtomIdx()
292
+ j = bond.GetEndAtomIdx()
293
+ edge_feature = bond_to_feature_vector(bond)
294
+
295
+ # add edges in both directions
296
+ edges_list.append((i, j))
297
+ edges_list.append((j, i))
298
+
299
+ edge_feature_oh = one_hot_vector_sm(
300
+ edge_feature, features_dim1, features_dim2, features_dim3
301
+ )
302
+ if advs:
303
+ mol_edge_dict[(i, j)] = Variable(
304
+ torch.tensor([1.0]), requires_grad=True
305
+ )
306
+
307
+ # add edges in both directions
308
+ edge_features_list.append(
309
+ torch.unsqueeze(mol_edge_dict[(i, j)] * edge_feature_oh, 0)
310
+ )
311
+ edge_features_list.append(
312
+ torch.unsqueeze(mol_edge_dict[(i, j)] * edge_feature_oh, 0)
313
+ )
314
+ else:
315
+ # add edges in both directions
316
+ edge_features_list.append(torch.unsqueeze(edge_feature_oh, 0))
317
+ edge_features_list.append(torch.unsqueeze(edge_feature_oh, 0))
318
+ if advs:
319
+ # Update edge dict
320
+ args.edge_dict[smiles_string] = mol_edge_dict
321
+
322
+ # data.edge_index: Graph connectivity in COO format with shape [2, num_edges]
323
+ edge_index = np.array(edges_list, dtype=np.int64).T
324
+ # data.edge_attr: Edge feature matrix with shape [num_edges, num_edge_features]
325
+
326
+ edge_attr = torch.cat(edge_features_list)
327
+
328
+ else: # mol has no bonds
329
+ edge_index = np.empty((2, 0), dtype=np.int64)
330
+ edge_attr = np.empty((0, num_bond_features), dtype=np.int64)
331
+ args.edge_dict[smiles_string] = {}
332
+
333
+ return edge_attr, edge_index, x
334
+
335
+
336
+ def get_dataset(
337
+ dataset, use_prot=False, target=None, args=None, advs=False, saliency=False
338
+ ):
339
+ total_dataset = []
340
+ if use_prot:
341
+ prot_graph = transform_molecule_pg(
342
+ target["Fasta"].item(), label=None, is_prot=use_prot
343
+ )
344
+
345
+ for mol, label in tqdm(
346
+ zip(dataset["Smiles"], dataset["Label"]), total=len(dataset["Smiles"])
347
+ ):
348
+ if use_prot:
349
+ total_dataset.append(
350
+ [
351
+ transform_molecule_pg(mol, label, args, advs, saliency=saliency),
352
+ prot_graph,
353
+ ]
354
+ )
355
+ else:
356
+ total_dataset.append(
357
+ transform_molecule_pg(mol, label, args, advs, saliency=saliency)
358
+ )
359
+ return total_dataset
360
+
361
+
362
+ def get_perturbed_dataset(mols, labels, args, valence=False):
363
+ total_dataset = []
364
+ for mol, label in zip(mols, labels):
365
+ total_dataset.append(transform_molecule_pg(mol, label, args, received_mol=True))
366
+ return total_dataset
367
+
368
+
369
+ def transform_molecule_pg(
370
+ smiles,
371
+ label,
372
+ args=None,
373
+ advs=False,
374
+ received_mol=False,
375
+ saliency=False,
376
+ is_prot=False,
377
+ ):
378
+
379
+ if is_prot:
380
+ edge_attr_p, edge_index_p, x_p = smiles_to_graph(smiles, is_prot)
381
+ x_p = torch.tensor(x_p)
382
+ edge_index_p = torch.tensor(edge_index_p)
383
+ edge_attr_p = torch.tensor(edge_attr_p)
384
+
385
+ return Data(edge_attr=edge_attr_p, edge_index=edge_index_p, x=x_p)
386
+
387
+ else:
388
+ if args.advs or received_mol:
389
+ if advs or received_mol:
390
+ edge_attr, edge_index, x = smiles_to_graph_advs(
391
+ smiles,
392
+ args,
393
+ advs=True,
394
+ received_mol=received_mol,
395
+ saliency=saliency,
396
+ )
397
+ else:
398
+ edge_attr, edge_index, x = smiles_to_graph_advs(
399
+ smiles, args, received_mol=received_mol, saliency=saliency
400
+ )
401
+ else:
402
+ edge_attr, edge_index, x = smiles_to_graph(smiles)
403
+
404
+ if not saliency:
405
+ x = torch.tensor(x)
406
+ y = torch.tensor([label])
407
+ edge_index = torch.tensor(edge_index)
408
+ if not args.advs and not received_mol:
409
+ edge_attr = torch.tensor(edge_attr)
410
+
411
+ if received_mol:
412
+ mol = smiles
413
+ else:
414
+ mol = Chem.MolFromSmiles(smiles)
415
+
416
+ return Data(
417
+ edge_attr=edge_attr, edge_index=edge_index, x=x, y=y, mol=mol, smiles=smiles
418
+ )
data/dataset_saliency.py ADDED
@@ -0,0 +1,378 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import pandas as pd
3
+ import shutil, os
4
+ import os.path as osp
5
+ import numpy as np
6
+ from tqdm import tqdm
7
+
8
+ import torch
9
+ from torch_geometric.data import Data
10
+ from torch.autograd import Variable
11
+
12
+ from rdkit import Chem
13
+
14
+ from data.features import (
15
+ allowable_features,
16
+ atom_to_feature_vector,
17
+ bond_to_feature_vector,
18
+ atom_feature_vector_to_dict,
19
+ bond_feature_vector_to_dict,
20
+ )
21
+
22
+ from utils.data_util import one_hot_vector_sm, one_hot_vector_am, get_atom_feature_dims
23
+
24
+
25
+ def load_dataset(
26
+ cross_val, binary_task, target, args, use_prot=False, advs=False, test=False
27
+ ):
28
+ """
29
+ Load data and return data in dataframes format for each split and the loader of each split.
30
+ Args:
31
+ cross_val (int): Data partition being used [1-4].
32
+ binary_tast (boolean): Whether to perform binary classification or multiclass classification.
33
+ target (string): Name of the protein target for binary classification.
34
+ args (parser): Complete arguments (configuration) of the model.
35
+ use_prot (boolean): Whether to use the PM module.
36
+ advs (boolean): Whether to train the LM module with adversarial augmentations.
37
+ test (boolean): Whether the model is being tested or trained.
38
+ Return:
39
+ train (loader): Training loader
40
+ valid (loader): Validation loader
41
+ test (loader): Test loader
42
+ data_train (dataframe): Training data dataframe
43
+ data_valid (dataframe): Validation data dataframe
44
+ data_test (dataframe): Test data dataframe
45
+
46
+ """
47
+ # TODO: NO QUEREMOS QUE ESTÉ LA PARTICIÓN DEL MULTICLASE?
48
+ # Read all data files
49
+ if not test:
50
+ # Verify cross validation partition is defined
51
+ assert cross_val in [1, 2, 3, 4], "{} data partition is not defined".format(
52
+ cross_val
53
+ )
54
+ print("Loading data...")
55
+ if binary_task:
56
+ path = "data/datasets/AD/"
57
+ A = pd.read_csv(
58
+ path + "Smiles_AD_1.csv", names=["Smiles", "Target", "Label"]
59
+ )
60
+ B = pd.read_csv(
61
+ path + "Smiles_AD_2.csv", names=["Smiles", "Target", "Label"]
62
+ )
63
+ C = pd.read_csv(
64
+ path + "Smiles_AD_3.csv", names=["Smiles", "Target", "Label"]
65
+ )
66
+ D = pd.read_csv(
67
+ path + "Smiles_AD_4.csv", names=["Smiles", "Target", "Label"]
68
+ )
69
+ data_test = pd.read_csv(
70
+ path + "AD_Test.csv", names=["Smiles", "Target", "Label"]
71
+ )
72
+ if use_prot:
73
+ data_target = pd.read_csv(
74
+ path + "Targets_Fasta.csv", names=["Fasta", "Target", "Label"]
75
+ )
76
+ else:
77
+ data_target = []
78
+ # Generate train and validation splits according to cross validation number
79
+ if cross_val == 1:
80
+ data_train = pd.concat([A, B, C], ignore_index=True)
81
+ data_val = D
82
+ elif cross_val == 2:
83
+ data_train = pd.concat([A, C, D], ignore_index=True)
84
+ data_val = B
85
+ elif cross_val == 3:
86
+ data_train = pd.concat([A, B, D], ignore_index=True)
87
+ data_val = C
88
+ elif cross_val == 4:
89
+ data_train = pd.concat([B, C, D], ignore_index=True)
90
+ data_val = A
91
+ # If in binary classification select data for the specific target being train
92
+ if binary_task:
93
+ data_train = data_train[data_train.Target == target]
94
+ data_val = data_val[data_val.Target == target]
95
+ data_test = data_test[data_test.Target == target]
96
+ if use_prot:
97
+ data_target = data_target[data_target.Target == target]
98
+ # Get dataset for each split
99
+ train = get_dataset(data_train, use_prot, data_target, args, advs)
100
+ valid = get_dataset(data_val, use_prot, data_target, args)
101
+ test = get_dataset(data_test, use_prot, data_target, args)
102
+ else:
103
+ # Read test data file
104
+ if binary_task:
105
+ path = "data/datasets/AD/"
106
+ data_test = pd.read_csv(
107
+ path + "Smiles_AD_Test.csv", names=["Smiles", "Target", "Label"]
108
+ )
109
+ data_test = data_test[data_test.Target == target]
110
+ if use_prot:
111
+ data_target = pd.read_csv(
112
+ path + "Targets_Fasta.csv", names=["Fasta", "Target", "Label"]
113
+ )
114
+ data_target = data_target[data_target.Target == target]
115
+ else:
116
+ data_target = []
117
+ test = get_dataset(data_test,target=data_target, use_prot=use_prot, args=args, advs=advs, saliency=args.saliency)
118
+ train = []
119
+ valid = []
120
+ data_train = []
121
+ data_val = []
122
+ print("Done.")
123
+ return train, valid, test, data_train, data_val, data_test
124
+
125
+
126
+ def reload_dataset(cross_val, binary_task, target, args, advs=False):
127
+ print("Reloading data")
128
+ args.edge_dict = {}
129
+ if binary_task:
130
+ path = "data/datasets/AD/"
131
+ A = pd.read_csv(path + "Smiles_AD_1.csv", names=["Smiles", "Target", "Label"])
132
+ B = pd.read_csv(path + "Smiles_AD_2.csv", names=["Smiles", "Target", "Label"])
133
+ C = pd.read_csv(path + "Smiles_AD_3.csv", names=["Smiles", "Target", "Label"])
134
+ D = pd.read_csv(path + "Smiles_AD_4.csv", names=["Smiles", "Target", "Label"])
135
+ data_test = pd.read_csv(
136
+ path + "AD_Test.csv", names=["Smiles", "Target", "Label"]
137
+ )
138
+
139
+ if cross_val == 1:
140
+ data_train = pd.concat([A, B, C], ignore_index=True)
141
+ elif cross_val == 2:
142
+ data_train = pd.concat([A, C, D], ignore_index=True)
143
+ elif cross_val == 3:
144
+ data_train = pd.concat([A, B, D], ignore_index=True)
145
+ else:
146
+ data_train = pd.concat([B, C, D], ignore_index=True)
147
+
148
+ if binary_task:
149
+ data_train = data_train[data_train.Target == target]
150
+
151
+ train = get_dataset(data_train, args=args, advs=advs)
152
+ print("Done.")
153
+
154
+ return train, data_train
155
+
156
+
157
+ def smiles_to_graph(smiles_string, is_prot=False, received_mol=False, saliency=False):
158
+ """
159
+ Converts SMILES string to graph Data object
160
+ :input: SMILES string (str)
161
+ :return: graph object
162
+ """
163
+
164
+ if not is_prot:
165
+ mol = Chem.MolFromSmiles(smiles_string)
166
+ else:
167
+ mol = Chem.MolFromFASTA(smiles_string)
168
+ # atoms
169
+ atom_features_list = []
170
+ atom_feat_dims = get_atom_feature_dims()
171
+ for atom in mol.GetAtoms():
172
+ ftrs = atom_to_feature_vector(atom)
173
+ if saliency:
174
+ ftrs_oh = one_hot_vector_am(ftrs, atom_feat_dims)
175
+ atom_features_list.append(torch.unsqueeze(ftrs_oh, 0))
176
+ else:
177
+ atom_features_list.append(ftrs)
178
+
179
+ if saliency:
180
+ x = torch.cat(atom_features_list)
181
+ else:
182
+ x = np.array(atom_features_list, dtype=np.int64)
183
+
184
+ # bonds
185
+ num_bond_features = 3 # bond type, bond stereo, is_conjugated
186
+ if len(mol.GetBonds()) > 0: # mol has bonds
187
+ edges_list = []
188
+ edge_features_list = []
189
+ for bond in mol.GetBonds():
190
+ i = bond.GetBeginAtomIdx()
191
+ j = bond.GetEndAtomIdx()
192
+
193
+ edge_feature = bond_to_feature_vector(bond)
194
+
195
+ # add edges in both directions
196
+ edges_list.append((i, j))
197
+ edge_features_list.append(edge_feature)
198
+ edges_list.append((j, i))
199
+ edge_features_list.append(edge_feature)
200
+
201
+ # data.edge_index: Graph connectivity in COO format with shape [2, num_edges]
202
+ edge_index = np.array(edges_list, dtype=np.int64).T
203
+
204
+ # data.edge_attr: Edge feature matrix with shape [num_edges, num_edge_features]
205
+ edge_attr = np.array(edge_features_list, dtype=np.int64)
206
+
207
+ else: # mol has no bonds
208
+ edge_index = np.empty((2, 0), dtype=np.int64)
209
+ edge_attr = np.empty((0, num_bond_features), dtype=np.int64)
210
+
211
+ return edge_attr, edge_index, x
212
+
213
+
214
+ def smiles_to_graph_advs(
215
+ smiles_string, args, advs=False, received_mol=False, saliency=False
216
+ ):
217
+ """
218
+ Converts SMILES string to graph Data object
219
+ :input: SMILES string (str)
220
+ :return: graph object
221
+ """
222
+ if not received_mol:
223
+ mol = Chem.MolFromSmiles(smiles_string)
224
+ else:
225
+ mol = smiles_string
226
+
227
+ # atoms
228
+ atom_features_list = []
229
+ atom_feat_dims = get_atom_feature_dims()
230
+
231
+ for atom in mol.GetAtoms():
232
+ ftrs = atom_to_feature_vector(atom)
233
+ if saliency:
234
+ ftrs_oh = one_hot_vector_am(ftrs, atom_feat_dims)
235
+ atom_features_list.append(torch.unsqueeze(ftrs_oh, 0))
236
+ else:
237
+ atom_features_list.append(ftrs)
238
+
239
+ if saliency:
240
+ x = torch.cat(atom_features_list)
241
+ else:
242
+ x = np.array(atom_features_list, dtype=np.int64)
243
+
244
+ if advs:
245
+ # bonds
246
+ mol_edge_dict = {}
247
+
248
+ num_bond_features = 3 # bond type, bond stereo, is_conjugated
249
+ features_dim1 = torch.eye(5)
250
+ features_dim2 = torch.eye(6)
251
+ features_dim3 = torch.eye(2)
252
+ if len(mol.GetBonds()) > 0: # mol has bonds
253
+ edges_list = []
254
+ edge_features_list = []
255
+ for bond in mol.GetBonds():
256
+ i = bond.GetBeginAtomIdx()
257
+ j = bond.GetEndAtomIdx()
258
+ edge_feature = bond_to_feature_vector(bond)
259
+
260
+ # add edges in both directions
261
+ edges_list.append((i, j))
262
+ edges_list.append((j, i))
263
+
264
+ edge_feature_oh = one_hot_vector_sm(
265
+ edge_feature, features_dim1, features_dim2, features_dim3
266
+ )
267
+ if advs:
268
+ mol_edge_dict[(i, j)] = Variable(
269
+ torch.tensor([1.0]), requires_grad=True
270
+ )
271
+
272
+ # add edges in both directions
273
+ edge_features_list.append(
274
+ torch.unsqueeze(mol_edge_dict[(i, j)] * edge_feature_oh, 0)
275
+ )
276
+ edge_features_list.append(
277
+ torch.unsqueeze(mol_edge_dict[(i, j)] * edge_feature_oh, 0)
278
+ )
279
+ else:
280
+ # add edges in both directions
281
+ edge_features_list.append(torch.unsqueeze(edge_feature_oh, 0))
282
+ edge_features_list.append(torch.unsqueeze(edge_feature_oh, 0))
283
+ if advs:
284
+ # Update edge dict
285
+ args.edge_dict[smiles_string] = mol_edge_dict
286
+
287
+ # data.edge_index: Graph connectivity in COO format with shape [2, num_edges]
288
+ edge_index = np.array(edges_list, dtype=np.int64).T
289
+ # data.edge_attr: Edge feature matrix with shape [num_edges, num_edge_features]
290
+
291
+ edge_attr = torch.cat(edge_features_list)
292
+
293
+ else: # mol has no bonds
294
+ edge_index = np.empty((2, 0), dtype=np.int64)
295
+ edge_attr = np.empty((0, num_bond_features), dtype=np.int64)
296
+ args.edge_dict[smiles_string] = {}
297
+
298
+ return edge_attr, edge_index, x
299
+
300
+
301
+ def get_dataset(
302
+ dataset, use_prot=False, target=None, args=None, advs=False, saliency=False
303
+ ):
304
+ total_dataset = []
305
+ if use_prot:
306
+ prot_graph = transform_molecule_pg(
307
+ target["Fasta"].item(), label=None, is_prot=use_prot
308
+ )
309
+
310
+ for mol, label in tqdm(
311
+ zip(dataset["Smiles"], dataset["Label"]), total=len(dataset["Smiles"])
312
+ ):
313
+ if use_prot:
314
+ total_dataset.append([transform_molecule_pg(mol,label,args, advs, saliency=saliency),prot_graph])
315
+ else:
316
+ total_dataset.append(
317
+ transform_molecule_pg(mol, label, args, advs, saliency=saliency)
318
+ )
319
+ return total_dataset
320
+
321
+
322
+ def get_perturbed_dataset(mols, labels, args):
323
+ total_dataset = []
324
+ for mol, label in zip(mols, labels):
325
+ total_dataset.append(transform_molecule_pg(mol, label, args, received_mol=True))
326
+ return total_dataset
327
+
328
+
329
+ def transform_molecule_pg(
330
+ smiles,
331
+ label,
332
+ args=None,
333
+ advs=False,
334
+ received_mol=False,
335
+ saliency=False,
336
+ is_prot=False,
337
+ ):
338
+
339
+ if is_prot:
340
+ edge_attr_p, edge_index_p, x_p = smiles_to_graph(smiles, is_prot)
341
+ x_p = torch.tensor(x_p)
342
+ edge_index_p = torch.tensor(edge_index_p)
343
+ edge_attr_p = torch.tensor(edge_attr_p)
344
+
345
+ return Data(edge_attr=edge_attr_p, edge_index=edge_index_p, x=x_p)
346
+
347
+ else:
348
+ if args.advs or received_mol:
349
+ if advs or received_mol:
350
+ edge_attr, edge_index, x = smiles_to_graph_advs(
351
+ smiles,
352
+ args,
353
+ advs=True,
354
+ received_mol=received_mol,
355
+ saliency=saliency,
356
+ )
357
+ else:
358
+ edge_attr, edge_index, x = smiles_to_graph_advs(
359
+ smiles, args, received_mol=received_mol, saliency=saliency
360
+ )
361
+ else:
362
+ edge_attr, edge_index, x = smiles_to_graph(smiles, saliency=saliency)
363
+
364
+ if not saliency:
365
+ x = torch.tensor(x)
366
+ y = torch.tensor([label])
367
+ edge_index = torch.tensor(edge_index)
368
+ if not args.advs and not received_mol:
369
+ edge_attr = torch.tensor(edge_attr)
370
+
371
+ if received_mol:
372
+ mol = smiles
373
+ else:
374
+ mol = Chem.MolFromSmiles(smiles)
375
+
376
+ return Data(
377
+ edge_attr=edge_attr, edge_index=edge_index, x=x, y=y, mol=mol, smiles=smiles
378
+ )
data/datasets/AD/Inference.csv ADDED
@@ -0,0 +1 @@
 
 
1
+ OCCCCCn1cnc2C(O)CN=CNc12,ada,0
data/datasets/AD/Smiles_AD_1.csv ADDED
The diff for this file is too large to render. See raw diff
 
data/datasets/AD/Smiles_AD_2.csv ADDED
The diff for this file is too large to render. See raw diff
 
data/datasets/AD/Smiles_AD_3.csv ADDED
The diff for this file is too large to render. See raw diff
 
data/datasets/AD/Smiles_AD_4.csv ADDED
The diff for this file is too large to render. See raw diff
 
data/datasets/AD/Smiles_AD_Test.csv ADDED
The diff for this file is too large to render. See raw diff
 
data/datasets/AD/Targets_Fasta.csv ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MSDVAIVKEGWLHKRGEYIKTWRPRYFLLKNDGTFIGYKERPQDVDQREAPLNNFSVAQCQLMKTERPRPNTFIIRCLQWTTVIERTFHVETPEEREEWTTAIQTVADGLKKQEEEEMDFRSGSPSDNSGAEEMEVSLAKPKHRVTMNEFEYLKLLGKGTFGKVILVKEKATGRYYAMKILKKEVIVAKDEVAHTLTENRVLQNSRHPFLTALKYSFQTHDRLCFVMEYANGGELFFHLSRERVFSEDRARFYGAEIVSALDYLHSEKNVVYRDLKLENLMLDKDGHIKITDFGLCKEGIKDGATMKTFCGTPEYLAPEVLEDNDYGRAVDWWGLGVVMYEMMCGRLPFYNQDHEKLFELILMEEIRFPRTLGPEAKSLLSGLLKKDPKQRLGGGSEDAKEIMQHRFFAGIVWQHVYEKKLSPPFKPQVTSETDTRYFDEEFTAQMITITPPDQDDSMECVDSERRPHFPQFSYSASGTA,akt1,0
2
+ MLSNSQGQSPPVPFPAPAPPPQPPTPALPHPPAQPPPPPPQQFPQFHVKSGLQIKKNAIIDDYKVTSQVLGLGINGKVLQIFNKRTQEKFALKMLQDCPKARREVELHWRASQCPHIVRIVDVYENLYAGRKCLLIVMECLDGGELFSRIQDRGDQAFTEREASEIMKSIGEAIQYLHSINIAHRDVKPENLLYTSKRPNAILKLTDFGFAKETTSHNSLTTPCYTPYYVAPEVLGPEKYDKSCDMWSLGVIMYILLCGYPPFYSNHGLAISPGMKTRIRMGQYEFPNPEWSEVSEEVKMLIRNLLKTEPTQRMTITEFMNHPWIMQSTKVPQTPLHTSRVLKEDKERWEDVKEEMTSALATMRVDYEQIKIKKIEDASNPLLLKRRKKARALEAAALAH,mapk2,1
3
+ MAHVRGLQLPGCLALAALCSLVHSQHVFLAPQQARSLLQRVRRANTFLEEVRKGNLERECVEETCSYEEAFEALESSTATDVFWAKYTACETARTPRDKLAACLEGNCAEGLGTNYRGHVNITRSGIECQLWRSRYPHKPEINSTTHPGADLQENFCRNPDSSTTGPWCYTTDPTVRRQECSIPVCGQDQVTVAMTPRSEGSSVNLSPPLEQCVPDRGQQYQGRLAVTTHGLPCLAWASAQAKALSKHQDFNSAVQLVENFCRNPDGDEEGVWCYVAGKPGDFGYCDLNYCEEAVEEETGDGLDEDSDRAIEGRTATSEYQTFFNPRTFGSGEADCGLRPLFEKKSLEDKTERELLESYIDGRIVEGSDAEIGMSPWQVMLFRKSPQELLCGASLISDRWVLTAAHCLLYPPWDKNFTENDLLVRIGKHSRTRYERNIEKISMLEKIYIHPRYNWRENLDRDIALMKLKKPVAFSDYIHPVCLPDRETAASLLQAGYKGRVTGWGNLKETWTANVGKGQPSVLQVVNLPIVERPVCKDSTRIRITDNMFCAGYKPDEGKRGDACEGDSGGPFVMKSPFNNRWYQMGIVSWGEGCDRDGKYGFYTHVFRLKKWIQKVIDQFGE,thrb,2
4
+ MWSWKCLLFWAVLVTATLCTARPSPTLPEQAQPWGAPVEVESFLVHPGDLLQLRCRLRDDVQSINWLRDGVQLAESNRTRITGEEVEVQDSVPADSGLYACVTSSPSGSDTTYFSVNVSDALPSSEDDDDDDDSSSEEKETDNTKPNRMPVAPYWTSPEKMEKKLHAVPAAKTVKFKCPSSGTPNPTLRWLKNGKEFKPDHRIGGYKVRYATWSIIMDSVVPSDKGNYTCIVENEYGSINHTYQLDVVERSPHRPILQAGLPANKTVALGSNVEFMCKVYSDPQPHIQWLKHIEVNGSKIGPDNLPYVQILKTAGVNTTDKEMEVLHLRNVSFEDAGEYTCLAGNSIGLSHHSAWLTVLEALEERPAVMTSPLYLEIIIYCTGAFLISCMVGSVIVYKMKSGTKKSDFHSQMAVHKLAKSIPLRRQVTVSADSSASMNSGVLLVRPSRLSSSGTPMLAGVSEYELPEDPRWELPRDRLVLGKPLGEGCFGQVVLAEAIGLDKDKPNRVTKVAVKMLKSDATEKDLSDLISEMEMMKMIGKHKNIINLLGACTQDGPLYVIVEYASKGNLREYLQARRPPGLEYCYNPSHNPEEQLSSKDLVSCAYQVARGMEYLASKKCIHRDLAARNVLVTEDNVMKIADFGLARDIHHIDYYKKTTNGRLPVKWMAPEALFDRIYTHQSDVWSFGVLLWEIFTLGGSPYPGVPVEELFKLLKEGHRMDKPSNCTNELYMMMRDCWHAVPSQRPTFKQLVEDLDRIVALTSNQEYLDLSMPLDQYSPSFPDTRSSTCSSGEDSVFSHEPLPEEPCLPRHPAQLANGGLKRR,fgfr1,3
5
+ MRALLARLLLCVLVVSDSKGSNELHQVPSNCDCLNGGTCVSNKYFSNIHWCNCPKKFGGQHCEIDKSKTCYEGNGHFYRGKASTDTMGRPCLPWNSATVLQQTYHAHRSDALQLGLGKHNYCRNPDNRRRPWCYVQVGLKPLVQECMVHDCADGKKPSSPPEELKFQCGQKTLRPRFKIIGGEFTTIENQPWFAAIYRRHRGGSVTYVCGGSLISPCWVISATHCFIDYPKKEDYIVYLGRSRLNSNTQGEMKFEVENLILHKDYSADTLAHHNDIALLKIRSKEGRCAQPSRTIQTICLPSMYNDPQFGTSCEITGFGKENSTDYLYPEQLKMTVVKLISHRECQQPHYYGSEVTTKMLCAADPQWKTDSCQGDSGGPLVCSLQGRMTLTGIVSWGRGCALKDKPGVYTRVSHFLPWIRSHTKEENGLAL,urok,4
6
+ MGMACLTMTEMEGTSTSSIYQNGDISGNANSMKQIDPVLQVYLYHSLGKSEADYLTFPSGEYVAEEICIAASKACGITPVYHNMFALMSETERIWYPPNHVFHIDESTRHNVLYRIRFYFPRWYCSGSNRAYRHGISRGAEAPLLDDFVMSYLFAQWRHDFVHGWIKVPVTHETQEECLGMAVLDMMRIAKENDQTPLAIYNSISYKTFLPKCIRAKIQDYHILTRKRIRYRFRRFIQQFSQCKATARNLKLKYLINLETLQSAFYTEKFEVKEPGSGPSGEEIFATIIITGNGGIQWSRGKHKESETLTEQDLQLYCDFPNIIDVSIKQANQEGSNESRVVTIHKQDGKNLEIELSSLREALSFVSLIDGYYRLTADAHHYLCKEVAPPAVLENIQSNCHGPISMDFAISKLKKAGNQTGLYVLRCSPKDFNKYFLTFAVERENVIEYKHCLITKNENEEYNLSGTKKNFSSLKDLLNCYQMETVRSDNIIFQFTKCCPPKPKDKSNLLVFRTNGVSDVPTSPTLQRPTHMNQMVFHKIRNEDLIFNESLGQGTFTKIFKGVRREVGDYGQLHETEVLLKVLDKAHRNYSESFFEAASMMSKLSHKHLVLNYGVCVCGDENILVQEFVKFGSLDTYLKKNKNCINILWKLEVAKQLAWAMHFLEENTLIHGNVCAKNILLIREEDRKTGNPPFIKLSDPGISITVLPKDILQERIPWVPPECIENPKNLNLATDKWSFGTTLWEICSGGDKPLSALDSQRKLQFYEDRHQLPAPKWAELANLINNCMDYEPDFRPSFRAIIRDLNSLFTPDYELLTENDMLPNMRIGALGFSGAFEDRDPTQFEERHLKFLQQLGKGNFGSVEMCRYDPLQDNTGEVVAVKKLQHSTEEHLRDFEREIEILKSLQHDNIVKYKGVCYSAGRRNLKLIMEYLPYGSLRDYLQKHKERIDHIKLLQYTSQICKGMEYLGTKRYIHRDLATRNILVENENRVKIGDFGLTKVLPQDKEYYKVKEPGESPIFWYAPESLTESKFSVASDVWSFGVVLYELFTYIEKSKSPPAEFMRMIGNDKQGQMIVFHLIELLKNNGRLPRPDGCPDEIYMIMTECWNNNVNQRPSFRDLALRVDQIRDNMAG,jak2,5
7
+ MKTLLLLAVIMIFGLLQAHGNLVNFHRMIKLTTGKEAALSYGFYGCHCGVGGRGSPKDATDRCCVTHDCCYKRLEKRGCGTKFLSYKFSNSGSRITCAKQDSCRSQLCECDKAAATCFARNKTTYNKKYQYYSNKHCRGSTPRC,pa2ga,6
8
+ MRQSLLFLTSVVPFVLAPRPPDDPGFGPHQRLEKLDSLLSDYDILSLSNIQQHSVRKRDLQTSTHVETLLTFSALKRHFKLYLTSSTERFSQNFKVVVVDGKNESEYTVKWQDFFTGHVVGEPDSRVLAHIRDDDVIIRINTDGAEYNIEPLWRFVNDTKDKRMLVYKSEDIKNVSRLQSPKVCGYLKVDNEELLPKGLVDREPPEELVHRVKRRADPDPMKNTCKLLVVADHRFYRYMGRGEESTTTNYLIELIDRVDDIYRNTSWDNAGFKGYGIQIEQIRILKSPQEVKPGEKHYNMAKSYPNEEKDAWDVKMLLEQFSFDIAEEASKVCLAHLFTYQDFDMGTLGLAYVGSPRANSHGGVCPKAYYSPVGKKNIYLNSGLTSTKNYGKTILTKEADLVTTHELGHNFGAEHDPDGLAECAPNEDQGGKYVMYPIAVSGDHENNKMFSNCSKQSIYKTIESKAQECFQERSNKVCGNSRVDEGEECDPGIMYLNNDTCCNSDCTLKEGVQCSDRNSPCCKNCQFETAQKKCQEAINATCKGVSYCTGNSSECPPPGNAEDDTVCLDLGKCKDGKCIPFCEREQQLESCACNETDNSCKVCCRDLSGRCVPYVDAEQKNLFLRKGKPCTVGFCDMNGKCEKRVQDVIERFWDFIDQLSINTFGKFLADNIVGSVLVFSLIFWIPFSILVHCVDKKLDKQYESLSLFHPSNVEMLSSMDSASVRIIKPFPAPQTPGRLQPAPVIPSAPAAPKLDHQRMDTIQEDPSTDSHMDEDGFEKDPFPNSSTAAKSFEDLTDHPVTRSEKAASFKLQRQNRVDSKETEC,ada17,7
9
+ MEVQLGLGRVYPRPPSKTYRGAFQNLFQSVREVIQNPGPRHPEAASAAPPGASLLLLQQQQQQQQQQQQQQQQQQQQQQQETSPRQQQQQQGEDGSPQAHRRGPTGYLVLDEEQQPSQPQSALECHPERGCVPEPGAAVAASKGLPQQLPAPPDEDDSAAPSTLSLLGPTFPGLSSCSADLKDILSEASTMQLLQQQQQEAVSEGSSSGRAREASGAPTSSKDNYLGGTSTISDNAKELCKAVSVSMGLGVEALEHLSPGEQLRGDCMYAPLLGVPPAVRPTPCAPLAECKGSLLDDSAGKSTEDTAEYSPFKGGYTKGLEGESLGCSGSAAAGSSGTLELPSTLSLYKSGALDEAAAYQSRDYYNFPLALAGPPPPPPPPHPHARIKLENPLDYGSAWAAAAAQCRYGDLASLHGAGAAGPGSGSPSAAASSSWHTLFTAEEGQLYGPCGGGGGGGGGGGGGGGGGGGGGGGEAGAVAPYGYTRPPQGLAGQESDFTAPDVWYPGGMVSRVPYPSPTCVKSEMGPWMDSYSGPYGDMRLETARDHVLPIDYYFPPQKTCLICGDEASGCHYGALTCGSCKVFFKRAAEGKQKYLCASRNDCTIDKFRRKNCPSCRLRKCYEAGMTLGARKLKKLGNLKLQEEGEASSTTSPTEETTQKLTVSHIEGYECQPIFLNVLEAIEPGVVCAGHDNNQPDSFAALLSSLNELGERQLVHVVKWAKALPGFRNLHVDDQMAVIQYSWMGLMVFAMGWRSFTNVNSRMLYFAPDLVFNEYRMHKSRMYSQCVRMRHLSQEFGWLQITPQEFLCMKALLLFSIIPVDGLKNQKFFDELRMNYIKELDRIIACKRKNPTSCSRRFYQLTKLLDSVQPIARELHQFTFDLLIKSHMVSVDFPEMMAEIISVQVPKILSGKVKPIYFHTQ,andr,8
10
+ MKDSCITVMAMALLSGFFFFAPASSYNLDVRGARSFSPPRAGRHFGYRVLQVGNGVIVGAPGEGNSTGSLYQCQSGTGHCLPVTLRGSNYTSKYLGMTLATDPTDGSILACDPGLSRTCDQNTYLSGLCYLFRQNLQGPMLQGRPGFQECIKGNVDLVFLFDGSMSLQPDEFQKILDFMKDVMKKLSNTSYQFAAVQFSTSYKTEFDFSDYVKRKDPDALLKHVKHMLLLTNTFGAINYVATEVFREELGARPDATKVLIIITDGEATDSGNIDAAKDIIRYIIGIGKHFQTKESQETLHKFASKPASEFVKILDTFEKLKDLFTELQKKIYVIEGTSKQDLTSFNMELSSSGISADLSRGHAVVGAVGAKDWAGGFLDLKADLQDDTFIGNEPLTPEVRAGYLGYTVTWLPSRQKTSLLASGAPRYQHMGRVLLFQEPQGGGHWSQVQTIHGTQIGSYFGGELCGVDVDQDGETELLLIGAPLFYGEQRGGRVFIYQRRQLGFEEVSELQGDPGYPLGRFGEAITALTDINGDGLVDVAVGAPLEEQGAVYIFNGRHGGLSPQPSQRIEGTQVLSGIQWFGRSIHGVKDLEGDGLADVAVGAESQMIVLSSRPVVDMVTLMSFSPAEIPVHEVECSYSTSNKMKEGVNITICFQIKSLIPQFQGRLVANLTYTLQLDGHRTRRRGLFPGGRHELRRNIAVTTSMSCTDFSFHFPVCVQDLISPINVSLNFSLWEEEGTPRDQRAQGKDIPPILRPSLHSETWEIPFEKNCGEDKKCEANLRVSFSPARSRALRLTAFASLSVELSLSNLEEDAYWVQLDLHFPPGLSFRKVEMLKPHSQIPVSCEELPEESRLLSRALSCNVSSPIFKAGHSVALQMMFNTLVNSSWGDSVELHANVTCNNEDSDLLEDNSATTIIPILYPINILIQDQEDSTLYVSFTPKGPKIHQVKHMYQVRIQPSIHDHNIPTLEAVVGVPQPPSEGPITHQWSVQMEPPVPCHYEDLERLPDAAEPCLPGALFRCPVVFRQEILVQVIGTLELVGEIEASSMFSLCSSLSISFNSSKHFHLYGSNASLAQVVMKVDVVYEKQMLYLYVLSGIGGLLLLLLIFIVLYKVGFFKRNLKEKMEAGRGVPNGIPAEDSEQLASGQEAGDPGCLKPLHEKDSESGGGKD,ital,9
11
+ MAALSGGGGGGAEPGQALFNGDMEPEAGAGAGAAASSAADPAIPEEVWNIKQMIKLTQEHIEALLDKFGGEHNPPSIYLEAYEEYTSKLDALQQREQQLLESLGNGTDFSVSSSASMDTVTSSSSSSLSVLPSSLSVFQNPTDVARSNPKSPQKPIVRVFLPNKQRTVVPARCGVTVRDSLKKALMMRGLIPECCAVYRIQDGEKKPIGWDTDISWLTGEELHVEVLENVPLTTHNFVRKTFFTLAFCDFCRKLLFQGFRCQTCGYKFHQRCSTEVPLMCVNYDQLDLLFVSKFFEHHPIPQEEASLAETALTSGSSPSAPASDSIGPQILTSPSPSKSIPIPQPFRPADEDHRNQFGQRDRSSSAPNVHINTIEPVNIDDLIRDQGFRGDGGSTTGLSATPPASLPGSLTNVKALQKSPGPQRERKSSSSSEDRNRMKTLGRRDSSDDWEIPDGQITVGQRIGSGSFGTVYKGKWHGDVAVKMLNVTAPTPQQLQAFKNEVGVLRKTRHVNILLFMGYSTKPQLAIVTQWCEGSSLYHHLHIIETKFEMIKLIDIARQTAQGMDYLHAKSIIHRDLKSNNIFLHEDLTVKIGDFGLATVKSRWSGSHQFEQLSGSILWMAPEVIRMQDKNPYSFQSDVYAFGIVLYELMTGQLPYSNINNRDQIIFMVGRGYLSPDLSKVRSNCPKAMKRLMAECLKKKRDERPLFPQILASIELLARSLPKIHRSASEPSLNRAGFQTEDFSLYACASPKTPIQAGGYGAFPVH,braf,10
12
+ MTPNSMTENGLTAWDKPKHCPDREHDWKLVGMSEACLHRKSHSERRSTLKNEQSSPHLIQTTWTSSIFHLDHDDVNDQSVSSAQTFQTEEKKCKGYIPSYLDKDELCVVCGDKATGYHYRCITCEGCKGFFRRTIQKNLHPSYSCKYEGKCVIDKVTRNQCQECRFKKCIYVGMATDLVLDDSKRLAKRKLIEENREKRRREELQKSIGHKPEPTDEEWELIKTVTEAHVATNAQGSHWKQKRKFLPEDIGQAPIVNAPEGGKVDLEAFSHFTKIITPAITRVVDFAKKLPMFCELPCEDQIILLKGCCMEIMSLRAAVRYDPESETLTLNGEMAVTRGQLKNGGLGVVSDAIFDLGMSLSSFNLDDTEVALLQAVLLMSSDRPGLACVERIEKYQDSFLLAFEHYINYRKHHVTHFWPKLLMKVTDLRMIGACHASRFLHMKVECPTELFPPLFLEVFED,thb,11
13
+ MEHGTLLAQPGLWTRDTSWALLYFLCYILPQTAPQVLRIGGIFETVENEPVNVEELAFKFAVTSINRNRTLMPNTTLTYDIQRINLFDSFEASRRACDQLALGVAALFGPSHSSSVSAVQSICNALEVPHIQTRWKHPSVDNKDLFYINLYPDYAAISRAILDLVLYYNWKTVTVVYEDSTGLIRLQELIKAPSRYNIKIKIRQLPSGNKDAKPLLKEMKKGKEFYVIFDCSHETAAEILKQILFMGMMTEYYHYFFTTLDLFALDLELYRYSGVNMTGFRLLNIDNPHVSSIIEKWSMERLQAPPRPETGLLDGMMTTEAALMYDAVYMVAIASHRASQLTVSSLQCHRHKPWRLGPRFMNLIKEARWDGLTGHITFNKTNGLRKDFDLDIISLKEEGTEKAAGEVSKHLYKVWKKIGIWNSNSGLNMTDSNKDKSSNITDSLANRTLIVTTILEEPYVMYRKSDKPLYGNDRFEGYCLDLLKELSNILGFIYDVKLVPDGKYGAQNDKGEWNGMVKELIDHRADLAVAPLTITYVREKVIDFSKPFMTLGISILYRKPNGTNPGVFSFLNPLSPDIWMYVLLACLGVSCVLFVIARFTPYEWYNPHPCNPDSDVVENNFTLLNSFWFGVGALMQQGSELMPKALSTRIVGGIWWFFTLIIISSYTANLAAFLTVERMESPIDSADDLAKQTKIEYGAVRDGSTMTFFKKSKISTYEKMWAFMSSRQQTALVRNSDEGIQRVLTTDYALLMESTSIEYVTQRNCNLTQIGGLIDSKGYGVGTPIGSPYRDKITIAILQLQEEGKLHMMKEKWWRGNGCPEEDNKEASALGVENIGGIFIVLAAGLVLSVFVAIGEFIYKSRKNNDIEQAFCFFYGLQCKQTHPTNSTSGTTLSTDLECGKLIREERGIRKQSSVHTV,grik1,12
14
+ MADPAAGPPPSEGEESTVRFARKGALRQKNVHEVKNHKFTARFFKQPTFCSHCTDFIWGFGKQGFQCQVCCFVVHKRCHEFVTFSCPGADKGPASDDPRSKHKFKIHTYSSPTFCDHCGSLLYGLIHQGMKCDTCMMNVHKRCVMNVPSLCGTDHTERRGRIYIQAHIDRDVLIVLVRDAKNLVPMDPNGLSDPYVKLKLIPDPKSESKQKTKTIKCSLNPEWNETFRFQLKESDKDRRLSVEIWDWDLTSRNDFMGSLSFGISELQKASVDGWFKLLSQEEGEYFNVPVPPEGSEANEELRQKFERAKISQGTKVPEEKTTNTVSKFDNNGNRDRMKLTDFNFLMVLGKGSFGKVMLSERKGTDELYAVKILKKDVVIQDDDVECTMVEKRVLALPGKPPFLTQLHSCFQTMDRLYFVMEYVNGGDLMYHIQQVGRFKEPHAVFYAAEIAIGLFFLQSKGIIYRDLKLDNVMLDSEGHIKIADFGMCKENIWDGVTTKTFCGTPDYIAPEIIAYQPYGKSVDWWAFGVLLYEMLAGQAPFEGEDEDELFQSIMEHNVAYPKSMSKEAVAICKGLMTKHPGKRLGCGPEGERDIKEHAFFRYIDWEKLERKEIQPPYKPKARDKRDTSNFDKEFTRQPVELTPTDKLFIMNLDQNEFAGFSYTNPEFVINV,kpcb,13
15
+ FLDGIDKAQEEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTVHTDNGSNFTSTTVKAACWWAGIKQEFGIPYNPQSQGVIESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRDPVWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED,hivint,14
16
+ MAWRHLKKRAQDAVIILGGGGLLFASYLMATGDERFYAEHLMPTLQGLLDPESAHRLAVRFTSLGLLPRARFQDSDMLEVRVLGHKFRNPVGIAAGFDKHGEAVDGLYKMGFGFVEIGSVTPKPQEGNPRPRVFRLPEDQAVINRYGFNSHGLSVVEHRLRARQQKQAKLTEDGLPLGVNLGKNKTSVDAAEDYAEGVRVLGPLADYLVVNVSSPNTAGLRSLQGKAELRRLLTKVLQERDGLRRVHRPAVLVKIAPDLTSQDKEDIASVVKELGIDGLIVTNTTVSRPAGLQGALRSETGGLSGKPLRDLSTQTIREMYALTQGRVPIIGVGGVSSGQDALEKIRAGASLVQLYTALTFWGPPVVGKVKRELEALLKEQGFGGVTDAIGADHRR,pyrd,15
17
+ MENFQKVEKIGEGTYGVVYKARNKLTGEVVALKKIRLDTETEGVPSTAIREISLLKELNHPNIVKLLDVIHTENKLYLVFEFLHQDLKKFMDASALTGIPLPLIKSYLFQLLQGLAFCHSHRVLHRDLKPQNLLINTEGAIKLADFGLARAFGVPVRTYTHEVVTLWYRAPEILLGCKYYSTAVDIWSLGCIFAEMVTRRALFPGDSEIDQLFRIFRTLGTPDEVVWPGVTSMPDYKPSFPKWARQDFSKVVPPLDEDGRSLLSQMLHYDPNKRISAKAALAHPFFQDVTKPVPHLRL,cdk2,16
18
+ MCDAFVGTWKLVSSENFDDYMKEVGVGFATRKVAGMAKPNMIISVNGDVITIKSESTFKNTEISFILGQEFDEVTADDRKVKSTITLDGGVLVHVQKWDGKSTTIKRKREDDKLVVECVMKGVTSTRVYERA,fabp4,17
19
+ MDTKHFLPLDFSTQVNSSLTSPTGRGSMAAPSLHPSLGPGIGSPGQLHSPISTLSSPINGMGPPFSVISSPMGPHSMSVPTTPTLGFSTGSPQLSSPMNPVSSSEDIKPPLGLNGVLKVPAHPSGNMASFTKHICAICGDRSSGKHYGVYSCEGCKGFFKRTVRKDLTYTCRDNKDCLIDKRQRNRCQYCRYQKCLAMGMKREAVQEERQRGKDRNENEVESTSSANEDMPVERILEAELAVEPKTETYVEANMGLNPSSPNDPVTNICQAADKQLFTLVEWAKRIPHFSELPLDDQVILLRAGWNELLIASFSHRSIAVKDGILLATGLHVHRNSAHSAGVGAIFDRVLTELVSKMRDMQMDKTELGCLRAIVLFNPDSKGLSNPAEVEALREKVYASLEAYCKHKYPEQPGRFAKLLLRLPALRSIGLKCLEHLFFFKLIGDTPIDTFLMEMLEAPHQMT,rxra,18
20
+ MNPLLILTFVAAALAAPFDDDDKIVGGYNCEENSVPYQVSLNSGYHFCGGSLINEQWVVSAGHCYKSRIQVRLGEHNIEVLEGNEQFINAAKIIRHPQYDRKTLNNDIMLIKLSSRAVINARVSTISLPTAPPATGTKCLISGWGNTASSGADYPDELQCLDAPVLSQAKCEASYPGKITSNMFCVGFLEGGKDSCQGDSGGPVVCNGQLQGVVSWGDGCAQKNKPGVYTKVYNYVKWIKNTIAANS,try1,19
21
+ MSCINLPTVLPGSPSKTRGQIQVILGPMFSGKSTELMRRVRRFQIAQYKCLVIKYAKDTRYSSSFCTHDRNTMEALPACLLRDVAQEALGVAVIGIDEGQFFPDIVEFCEAMANAGKTVIVAALDGTFQRKPFGAILNLVPLAESVVKLTAVCMECFREAAYTKRLGTEKEVEVIGGADKYHSVCRLCYFKKASGQPAGPDNKENCPVPGKPGEAVAARKLFAPQQILQCSPAN,kith,20
22
+ MALIPDLAMETWLLLAVSLVLLYLYGTHSHGLFKKLGIPGPTPLPFLGNILSYHKGFCMFDMECHKKYGKVWGFYDGQQPVLAITDPDMIKTVLVKECYSVFTNRRPFGPVGFMKSAISIAEDEEWKRLRSLLSPTFTSGKLKEMVPIIAQYGDVLVRNLRREAETGKPVTLKDVFGAYSMDVITSTSFGVNIDSLNNPQDPFVENTKKLLRFDFLDPFFLSITVFPFLIPILEVLNICVFPREVTNFLRKSVKRMKESRLEDTQKHRVDFLQLMIDSQNSKETESHKALSDLELVAQSIIFIFAGYETTSSVLSFIMYELATHPDVQQKLQEEIDAVLPNKAPPTYDTVLQMEYLDMVVNETLRLFPIAMRLERVCKKDVEINGMFIPKGVVVMIPSYALHRDPKYWTEPEKFLPERFSKKNKDNIDPYIYTPFGSGPRNCIGMRFALMNMKLALIRVLQNFSFKPCKETQIPLKLSLGGLLQPEKPVVLKVESRDGTVSGA,cp3a4,21
23
+ MAYSQGGGKKKVCYYYDGDIGNYYYGQGHPMKPHRIRMTHNLLLNYGLYRKMEIYRPHKATAEEMTKYHSDEYIKFLRSIRPDNMSEYSKQMQRFNVGEDCPVFDGLFEFCQLSTGGSVAGAVKLNRQQTDMAVNWAGGLHHAKKSEASGFCYVNDIVLAILELLKYHQRVLYIDIDIHHGDGVEEAFYTTDRVMTVSFHKYGEYFPGTGDLRDIGAGKGKYYAVNFPMRDGIDDESYGQIFKPIISKVMEMYQPSAVVLQCGADSLSGDRLGCFNLTVKGHAKCVEVVKTFNLPLLMLGGGGYTIRNVARCWTYETAVALDCEIPNELPYNDYFEYFGPDFKLHISPSNMTNQNTPEYMEKIKQRLFENLRMLPHAPGVQMQAIPEDAVHEDSGDEDGEDPDKRISIRASDKRIACDEEFSDSEDEGEGGRRNVADHKKGAKKARIEEDKKETEDKKTDVKEEDKSKDNSGEKTDTKGTKSEQLSNP,hdac2,22
24
+ MTELKAKGPRAPHVAGGPPSPEVGSPLLCRPAAGPFPGSQTSDTLPEVSAIPISLDGLLFPRPCQGQDPSDEKTQDQQSLSDVEGAYSRAEATRGAGGSSSSPPEKDSGLLDSVLDTLLAPSGPGQSQPSPPACEVTSSWCLFGPELPEDPPAAPATQRVLSPLMSRSGCKVGDSSGTAAAHKVLPRGLSPARQLLLPASESPHWSGAPVKPSPQAAAVEVEEEDGSESEESAGPLLKGKPRALGGAAAGGGAAAVPPGAAAGGVALVPKEDSRFSAPRVALVEQDAPMAPGRSPLATTVMDFIHVPILPLNHALLAARTRQLLEDESYDGGAGAASAFAPPRSSPCASSTPVAVGDFPDCAYPPDAEPKDDAYPLYSDFQPPALKIKEEEEGAEASARSPRSYLVAGANPAAFPDFPLGPPPPLPPRATPSRPGEAAVTAAPASASVSSASSSGSTLECILYKAEGAPPQQGPFAPPPCKAPGASGCLLPRDGLPSTSASAAAAGAAPALYPALGLNGLPQLGYQAAVLKEGLPQVYPPYLNYLRPDSEASQSPQYSFESLPQKICLICGDEASGCHYGVLTCGSCKVFFKRAMEGQHNYLCAGRNDCIVDKIRRKNCPACRLRKCCQAGMVLGGRKFKKFNKVRVVRALDAVALPQPVGVPNESQALSQRFTFSPGQDIQLIPPLINLLMSIEPDVIYAGHDNTKPDTSSSLLTSLNQLGERQLLSVVKWSKSLPGFRNLHIDDQITLIQYSWMSLMVFGLGWRSYKHVSGQMLYFAPDLILNEQRMKESSFYSLCLTMWQIPQEFVKLQVSQEEFLCMKVLLLLNTIPLEGLRSQTQFEEMRSSYIRELIKAIGLRQKGVVSSSQRFYQLTKLLDNLHDLVKQLHLYCLNTFIQSRALSVEFPEMMSEVIAAQLPKILAGMVKPLLFHKK,prgr,23
25
+ MGSNKSKPKDASQRRRSLEPAENVHGAGGGAFPASQTPSKPASADGHRGPSAAFAPAAAEPKLFGGFNSSDTVTSPQRAGPLAGGVTTFVALYDYESRTETDLSFKKGERLQIVNNTEGDWWLAHSLSTGQTGYIPSNYVAPSDSIQAEEWYFGKITRRESERLLLNAENPRGTFLVRESETTKGAYCLSVSDFDNAKGLNVKHYKIRKLDSGGFYITSRTQFNSLQQLVAYYSKHADGLCHRLTTVCPTSKPQTQGLAKDAWEIPRESLRLEVKLGQGCFGEVWMGTWNGTTRVAIKTLKPGTMSPEAFLQEAQVMKKLRHEKLVQLYAVVSEEPIYIVTEYMSKGSLLDFLKGETGKYLRLPQLVDMAAQIASGMAYVERMNYVHRDLRAANILVGENLVCKVADFGLARLIEDNEYTARQGAKFPIKWTAPEAALYGRFTIKSDVWSFGILLTELTTKGRVPYPGMVNREVLDQVERGYRMPCPPECPESLHDLMCQCWRKEPEERPTFEYLQAFLEDYFTSTEPQYQPGENL,src,24
26
+ MTFNSFEGSKTCVPADINKEEEFVEEFNRLKTFANFPSGSPVSASTLARAGFLYTGEGDTVRCFSCHAAVDRWQYGDSAVGRHRKVSPNCRFINGFYLENSATQSTNSGIQNGQYKVENYLGSRDHFALDRPSETHADYLLRTGQVVDISDTIYPRNPAMYSEEARLKSFQNWPDYAHLTPRELASAGLYYTGIGDQVQCFCCGGKLKNWEPCDRAWSEHRRHFPNCFFVLGRNLNIRSESDAVSSDRNFPNSTNLPRNPSMADYEARIFTFGTWIYSVNKEQLARAGFYALGEGDKVKCFHCGGGLTDWKPSEDPWEQHAKWYPGCKYLLEQKGQEYINNIHLTHSLEECLVRTTEKTPSLTRRIDDTIFQNPMVQEAIRMGFSFKDIKKIMEEKIQISGSNYKSLEVLVADLVNAQKDSMQDESSQTSLQKEISTEEQLRRLQEEKLCKICMDRNIAIVFVPCGHLVTCKQCAEAVDKCPMCYTVITFKQKIFMS,xiap,25
27
+ MSRSLLLWFLLFLLLLPPLPVLLADPGAPTPVNPCCYYPCQHQGICVRFGLDRYQCDCTRTGYSGPNCTIPGLWTWLRNSLRPSPSFTHFLLTHGRWFWEFVNATFIREMLMRLVLTVRSNLIPSPPTYNSAHDYISWESFSNVSYYTRILPSVPKDCPTPMGTKGKKQLPDAQLLARRFLLRRKFIPDPQGTNLMFAFFAQHFTHQFFKTSGKMGPGFTKALGHGVDLGHIYGDNLERQYQLRLFKDGKLKYQVLDGEMYPPSVEEAPVLMHYPRGIPPQSQMAVGQEVFGLLPGLMLYATLWLREHNRVCDLLKAEHPTWGDEQLFQTTRLILIGETIKIVIEEYVQQLSGYFLQLKFDPELLFGVQFQYRNRIAMEFNHLYHWHPLMPDSFKVGSQEYSYEQFLFNTSMLVDYGVEALVDAFSRQIAGRIGGGRNMDHHILHVAVDVIRESREMRLQPFNEYRKRFGMKPYTSFQELVGEKEMAAELEELYGDIDALEFYPGLLLEKCHPNSIFGESMIEIGAPFSLKGLLGNPICSPEYWKPSTFGGEVGFNIVKTATLKKLVCLNTKTCPYVSFRVPDASQDDGPAVERPSTEL,pgh1,26
28
+ MVDTESPLCPLSPLEAGDLESPLSEEFLQEMGNIQEISQSIGEDSSGSFGFTEYQYLGSCPGSDGSVITDTLSPASSPSSVTYPVVPGSVDESPSGALNIECRICGDKASGYHYGVHACEGCKGFFRRTIRLKLVYDKCDRSCKIQKKNRNKCQYCRFHKCLSVGMSHNAIRFGRMPRSEKAKLKAEILTCEHDIEDSETADLKSLAKRIYEAYLKNFNMNKVKARVILSGKASNNPPFVIHDMETLCMAEKTLVAKLVANGIQNKEAEVRIFHCCQCTSVETVTELTEFAKAIPGFANLDLNDQVTLLKYGVYEAIFAMLSSVMNKDGMLVAYGNGFITREFLKSLRKPFCDIMEPKFDFAMKFNALELDDSDISLFVAAIICCGDRPGLLNVGHIEKMQEGIVHVLRLHLQSNHPDDIFLFPKLLQKMADLRQLVTEHAQLVQIIKKTESDAALHPLLQEIYRDMY,ppara,27
29
+ MAATEGVGEAAQGGEPGQPAQPPPQPHPPPPQQQHKEEMAAEAGEAVASPMDDGFVSLDSPSYVLYRDRAEWADIDPVPQNDGPNPVVQIIYSDKFRDVYDYFRAVLQRDERSERAFKLTRDAIELNAANYTVWHFRRVLLKSLQKDLHEEMNYITAIIEEQPKNYQVWHHRRVLVEWLRDPSQELEFIADILNQDAKNYHAWQHRQWVIQEFKLWDNELQYVDQLLKEDVRNNSVWNQRYFVISNTTGYNDRAVLEREVQYTLEMIKLVPHNESAWNYLKGILQDRGLSKYPNLLNQLLDLQPSHSSPYLIAFLVDIYEDMLENQCDNKEDILNKALELCEILAKEKDTIRKEYWRYIGRSLQSKHSTENDSPTNVQQ,fnta,28
30
+ MPEAPPLLLAAVLLGLVLLVVLLLLLRHWGWGLCLIGWNEFILQPIHNLLMGDTKEQRILNHVLQHAEPGNAQSVLEAIDTYCEQKEWAMNVGDKKGKIVDAVIQEHQPSVLLELGAYCGYSAVRMARLLSPGARLITIEINPDCAAITQRMVDFAGVKDKVTLVVGASQDIIPQLKKKYDVDTLDMVFLDHWKDRYLPDTLLLEECGLLRKGTVLLADNVICPGAPDFLAHVRGSSCFECTHYQSFLEYREVVDGLEKAIYKGPGSEAGP,comt,29
31
+ MEGISIYTSDNYTEEMGSGDYDSMKEPCFREENANFNKIFLPTIYSIIFLTGIVGNGLVILVMGYQKKLRSMTDKYRLHLSVADLLFVITLPFWAVDAVANWYFGNFLCKAVHVIYTVNLYSSVLILAFISLDRYLAIVHATNSQRPRKLLAEKVVYVGVWIPALLLTIPDFIFANVSEADDRYICDRFYPNDLWVVVFQFQHIMVGLILPGIVILSCYCIIISKLSHSKGHQKRKALKTTVILILAFFACWLPYYIGISIDSFILLEIIKQGCEFENTVHKWISITEALAFFHCCLNPILYAFLGAKFKTSAQHALTSVSRGSSLKILSKGKRGGHSSVSTESESSSFHSS,cxcr4,30
32
+ MPVAGSELPRRPLPPAAQERDAEPRPPHGELQYLGQIQHILRCGVRKDDRTGTGTLSVFGMQARYSLRDEFPLLTTKRVFWKGVLEELLWFIKGSTNAKELSSKGVKIWDANGSRDFLDSLGFSTREEGDLGPVYGFQWRHFGAEYRDMESDYSGQGVDQLQRVIDTIKTNPDDRRIIMCAWNPRDLPLMALPPCHALCQFYVVNSELSCQLYQRSGDMGLGVPFNIASYALLTYMIAHITGLKPGDFIHTLGDAHIYLNHIEPLKIQLQREPRPFPKLRILRKVEKIDDFKAEDFQIEGYNPHPTIKMEMAV,tysy,31
33
+ PQITLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMNLPGKWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFP,hivpr,32
34
+ MKAPAVLAPGILVLLFTLVQRSNGECKEALAKSEMNVNMKYQLPNFTAETPIQNVILHEHHIFLGATNYIYVLNEEDLQKVAEYKTGPVLEHPDCFPCQDCSSKANLSGGVWKDNINMALVVDTYYDDQLISCGSVNRGTCQRHVFPHNHTADIQSEVHCIFSPQIEEPSQCPDCVVSALGAKVLSSVKDRFINFFVGNTINSSYFPDHPLHSISVRRLKETKDGFMFLTDQSYIDVLPEFRDSYPIKYVHAFESNNFIYFLTVQRETLDAQTFHTRIIRFCSINSGLHSYMEMPLECILTEKRKKRSTKKEVFNILQAAYVSKPGAQLARQIGASLNDDILFGVFAQSKPDSAEPMDRSAMCAFPIKYVNDFFNKIVNKNNVRCLQHFYGPNHEHCFNRTLLRNSSGCEARRDEYRTEFTTALQRVDLFMGQFSEVLLTSISTFIKGDLTIANLGTSEGRFMQVVVSRSGPSTPHVNFLLDSHPVSPEVIVEHTLNQNGYTLVITGKKITKIPLNGLGCRHFQSCSQCLSAPPFVQCGWCHDKCVRSEECLSGTWTQQICLPAIYKVFPNSAPLEGGTRLTICGWDFGFRRNNKFDLKKTRVLLGNESCTLTLSESTMNTLKCTVGPAMNKHFNMSIIISNGHGTTQYSTFSYVDPVITSISPKYGPMAGGTLLTLTGNYLNSGNSRHISIGGKTCTLKSVSNSILECYTPAQTISTEFAVKLKIDLANRETSIFSYREDPIVYEIHPTKSFISGGSTITGVGKNLNSVSVPRMVINVHEAGRNFTVACQHRSNSEIICCTTPSLQQLNLQLPLKTKAFFMLDGILSKYFDLIYVHNPVFKPFEKPVMISMGNENVLEIKGNDIDPEAVKGEVLKVGNKSCENIHLHSEAVLCTVPNDLLKLNSELNIEWKQAISSTVLGKVIVQPDQNFTGLIAGVVSISTALLLLLGFFLWLKKRKQIKDLGSELVRYDARVHTPHLDRLVSARSVSPTTEMVSNESVDYRATFPEDQFPNSSQNGSCRQVQYPLTDMSPILTSGDSDISSPLLQNTVHIDLSALNPELVQAVQHVVIGPSSLIVHFNEVIGRGHFGCVYHGTLLDNDGKKIHCAVKSLNRITDIGEVSQFLTEGIIMKDFSHPNVLSLLGICLRSEGSPLVVLPYMKHGDLRNFIRNETHNPTVKDLIGFGLQVAKGMKYLASKKFVHRDLAARNCMLDEKFTVKVADFGLARDMYDKEYYSVHNKTGAKLPVKWMALESLQTQKFTTKSDVWSFGVLLWELMTRGAPPYPDVNTFDITVYLLQGRRLLQPEYCPDPLYEVMLKCWHPKAEMRPSFSELVSRISAIFSTFIGEHYVHVNATYVNVKCVAPYPSLLSSEDNADDEVDTRPASFWETS,met,33
35
+ MVLHLLLFLLLTPQGGHSCQGLELARELVLAKVRALFLDALGPPAVTREGGDPGVRRLPRRHALGGFTHRGSEPEEEEDVSQAILFPATDASCEDKSAARGLAQEAEEGLFRYMFRPSQHTRSRQVTSAQLWFHTGLDRQGTAASNSSEPLLGLLALSPGGPVAVPMSLGHAPPHWAVLHLATSALSLLTHPVLVLLLRCPLCTCSARPEATPFLVAHTRTRPPSGGERARRSTPLMSWPWSPSALRLLQRPPEEPAAHANCHRVALNISFQELGWERWIVYPPSFIFHYCHGGCGLHIPPNLSLPVPGAPPTPAQPYSLLPGAQPCCAALPGTMRPLHVRTTSDGGYSFKYETVPNLLTQHCACI,inha,34
36
+ MEEPEEPADSGQSLVPVYIYSPEYVSMCDSLAKIPKRASMVHSLIEAYALHKQMRIVKPKVASMEEMATFHTDAYLQHLQKVSQEGDDDHPDSIEYGLGYDCPATEGIFDYAAAIGGATITAAQCLIDGMCKVAINWSGGWHHAKKDEASGFCYLNDAVLGILRLRRKFERILYVDLDLHHGDGVEDAFSFTSKVMTVSLHKFSPGFFPGTGDVSDVGLGKGRYYSVNVPIQDGIQDEKYYQICESVLKEVYQAFNPKAVVLQLGADTIAGDPMCSFNMTPVGIGKCLKYILQWQLATLILGGGGYNLANTARCWTYLTGVILGKTLSSEIPDHEFFTAYGPDYVLEITPSCRPDRNEPHRIQQILNYIKGNLKHVV,hdac8,35
37
+ MENTENSVDSKSIKNLEPKIIHGSESMDSGISLDNSYKMDYPEMGLCIIINNKNFHKSTGMTSRSGTDVDAANLRETFRNLKYEVRNKNDLTREEIVELMRDVSKEDHSKRSSFVCVLLSHGEEGIIFGTNGPVDLKKITNFFRGDRCRSLTGKPKLFIIQACRGTELDCGIETDSGVDDDMACHKIPVEADFLYAYSTAPGYYSWRNSKDGSWFIQSLCAMLKQYADKLEFMHILTRVNRKVATEFESFSFDATFHAKKQIPCIVSMLTKELYFYH,bace1,36
38
+ MSHHWGYGKHNGPEHWHKDFPIAKGERQSPVDIDTHTAKYDPSLKPLSVSYDQATSLRILNNGHAFNVEFDDSQDKAVLKGGPLDGTYRLIQFHFHWGSLDGQGSEHTVDKKKYAAELHLVHWNTKYGDFGKAVQQPDGLAVLGIFLKVGSAKPGLQKVVDVLDSIKTKGKSADFTNFDPRGLLPESLDYWTYPGSLTTPPLLECVTWIVLKEPISVSSEQVLKFRKLNFNGEGEPEELMVDNWRPAQPLKNRQIKASFK,cah2,37
39
+ MRGARGAWDFLCVLLLLLRVQTGSSQPSVSPGEPSPPSIHPGKSDLIVRVGDEIRLLCTDPGFVKWTFEILDETNENKQNEWITEKAEATNTGKYTCTNKHGLSNSIYVFVRDPAKLFLVDRSLYGKEDNDTLVRCPLTDPEVTNYSLKGCQGKPLPKDLRFIPDPKAGIMIKSVKRAYHRLCLHCSVDQEGKSVLSEKFILKVRPAFKAVPVVSVSKASYLLREGEEFTVTCTIKDVSSSVYSTWKRENSQTKLQEKYNSWHHGDFNYERQATLTISSARVNDSGVFMCYANNTFGSANVTTTLEVVDKGFINIFPMINTTVFVNDGENVDLIVEYEAFPKPEHQQWIYMNRTFTDKWEDYPKSENESNIRYVSELHLTRLKGTEGGTYTFLVSNSDVNAAIAFNVYVNTKPEILTYDRLVNGMLQCVAAGFPEPTIDWYFCPGTEQRCSASVLPVDVQTLNSSGPPFGKLVVQSSIDSSAFKHNGTVECKAYNDVGKTSAYFNFAFKGNNKEQIHPHTLFTPLLIGFVIVAGMMCIIVMILTYKYLQKPMYEVQWKVVEEINGNNYVYIDPTQLPYDHKWEFPRNRLSFGKTLGAGAFGKVVEATAYGLIKSDAAMTVAVKMLKPSAHLTEREALMSELKVLSYLGNHMNIVNLLGACTIGGPTLVITEYCCYGDLLNFLRRKRDSFICSKQEDHAEAALYKNLLHSKESSCSDSTNEYMDMKPGVSYVVPTKADKRRSVRIGSYIERDVTPAIMEDDELALDLEDLLSFSYQVAKGMAFLASKNCIHRDLAARNILLTHGRITKICDFGLARDIKNDSNYVVKGNARLPVKWMAPESIFNCVYTFESDVWSYGIFLWELFSLGSSPYPGMPVDSKFYKMIKEGFRMLSPEHAPAEMYDIMKTCWDADPLKRPTFKQIVQLIEKQISESTNHIYSNLANCSPNRQKPVVDHSVRINSVGSTASSSQPLLVHDDV,kit,38
40
+ MPKKKPTPIQLNPAPDGSAVNGTSSAETNLEALQKKLEELELDEQQRKRLEAFLTQKQKVGELKDDDFEKISELGAGNGGVVFKVSHKPSGLVMARKLIHLEIKPAIRNQIIRELQVLHECNSPYIVGFYGAFYSDGEISICMEHMDGGSLDQVLKKAGRIPEQILGKVSIAVIKGLTYLREKHKIMHRDVKPSNILVNSRGEIKLCDFGVSGQLIDSMANSFVGTRSYMSPERLQGTHYSVQSDIWSMGLSLVEMAVGRYPIPPPDAKELELMFGCQVEGDAAETPPRPRTPGRPLSSYGMDSRPPMAIFELLDYIVNEPPPKLPSGVFSLEFQDFVNKCLIKNPAERADLKQLMVHAFIKRSDAEEVDFAGWLCSTIGLNQPSTPTHAAGV,mp2k1,39
41
+ MPIMGSSVYITVELAIAVLAILGNVLVCWAVWLNSNLQNVTNYFVVSLAAADIAVGVLAIPFAITISTGFCAACHGCLFIACFVLVLTQSSIFSLLAIAIDRYIAIRIPLRYNGLVTGTRAKGIIAICWVLSFAIGLTPMLGWNNCGQPKEGKNHSQGCGEGQVACLFEDVVPMNYMVYFNFFACVLVPLLLMLGVYLRIFLAARRQLKQMESQPLPGERARSTLQKEVHAAKSLAIIVGLFALCWLPLHIINCFTFFCPDCSHAPLWLMYLAIVLSHTNSVVNPFIYAYRIREFRQTFRKIIRSHVLRQQEPFKAAGTSARVLAAHGSDGEQVSLRLNGHPPGVWANGSAPHPERRPNGYALGLVSGGSAQESQGNTGLPDVELLSHELKGVCPEPPGLDDPLAQDGAGVS,aa2ar,40
42
+ MVSQALRLLCLLLGLQGCLAAGGVAKASGGETRDMPWKPGPHRVFVTQEEAHGVLHRRRRANAFLEELRPGSLERECKEEQCSFEEAREIFKDAERTKLFWISYSDGDQCASSPCQNGGSCKDQLQSYICFCLPAFEGRNCETHKDDQLICVNENGGCEQYCSDHTGTKRSCRCHEGYSLLADGVSCTPTVEYPCGKIPILEKRNASKPQGRIVGGKVCPKGECPWQVLLLVNGAQLCGGTLINTIWVVSAAHCFDKIKNWRNLIAVLGEHDLSEHDGDEQSRRVAQVIIPSTYVPGTTNHDIALLRLHQPVVLTDHVVPLCLPERTFSERTLAFVRFSLVSGWGQLLDRGATALELMVLNVPRLMTQDCLQQSRKVGDSPNITEYMFCAGYSDGSKDSCKGDSGGPHATHYRGTWYLTGIVSWGQGCATVGHFGVYTRVSQYIEWLQKLMRSEPRPGVLLRAPFP,fa7,41
43
+ MEAAVAAPRPRLLLLVLAAAAAAAAALLPGATALQCFCHLCTKDNFTCVTDGLCFVSVTETTDKVIHNSMCIAEIDLIPRDRPFVCAPSSKTGSVTTTYCCNQDHCNKIELPTTVKSSPGLGPVELAAVIAGPVCFVCISLMLMVYICHNRTVIHHRVPNEEDPSLDRPFISEGTTLKDLIYDMTTSGSGSGLPLLVQRTIARTIVLQESIGKGRFGEVWRGKWRGEEVAVKIFSSREERSWFREAEIYQTVMLRHENILGFIAADNKDNGTWTQLWLVSDYHEHGSLFDYLNRYTVTVEGMIKLALSTASGLAHLHMEIVGTQGKPAIAHRDLKSKNILVKKNGTCCIADLGLAVRHDSATDTIDIAPNHRVGTKRYMAPEVLDDSINMKHFESFKRADIYAMGLVFWEIARRCSIGGIHEDYQLPYYDLVPSDPSVEEMRKVVCEQKLRPNIPNRWQSCEALRVMAKIMRECWYANGAARLTALRIKKTLSQLSQQEGIKM,tgfr1,42
44
+ MKSGSGGGSPTSLWGLLFLSAALSLWPTSGEICGPGIDIRNDYQQLKRLENCTVIEGYLHILLISKAEDYRSYRFPKLTVITEYLLLFRVAGLESLGDLFPNLTVIRGWKLFYNYALVIFEMTNLKDIGLYNLRNITRGAIRIEKNADLCYLSTVDWSLILDAVSNNYIVGNKPPKECGDLCPGTMEEKPMCEKTTINNEYNYRCWTTNRCQKMCPSTCGKRACTENNECCHPECLGSCSAPDNDTACVACRHYYYAGVCVPACPPNTYRFEGWRCVDRDFCANILSAESSDSEGFVIHDGECMQECPSGFIRNGSQSMYCIPCEGPCPKVCEEEKKTKTIDSVTSAQMLQGCTIFKGNLLINIRRGNNIASELENFMGLIEVVTGYVKIRHSHALVSLSFLKNLRLILGEEQLEGNYSFYVLDNQNLQQLWDWDHRNLTIKAGKMYFAFNPKLCVSEIYRMEEVTGTKGRQSKGDINTRNNGERASCESDVLHFTSTTTSKNRIIITWHRYRPPDYRDLISFTVYYKEAPFKNVTEYDGQDACGSNSWNMVDVDLPPNKDVEPGILLHGLKPWTQYAVYVKAVTLTMVENDHIRGAKSEILYIRTNASVPSIPLDVLSASNSSSQLIVKWNPPSLPNGNLSYYIVRWQRQPQDGYLYRHNYCSKDKIPIRKYADGTIDIEEVTENPKTEVCGGEKGPCCACPKTEAEKQAEKEEAEYRKVFENFLHNSIFVPRPERKRRDVMQVANTTMSSRSRNTTAADTYNITDPEELETEYPFFESRVDNKERTVISNLRPFTLYRIDIHSCNHEAEKLGCSASNFVFARTMPAEGADDIPGPVTWEPRPENSIFLKWPEPENPNGLILMYEIKYGSQVEDQRECVSRQEYRKYGGAKLNRLNPGNYTARIQATSLSGNGSWTDPVFFYVQAKTGYENFIHLIIALPVAVLLIVGGLVIMLYVFHRKRNNSRLGNGVLYASVNPEYFSAADVYVPDEWEVAREKITMSRELGQGSFGMVYEGVAKGVVKDEPETRVAIKTVNEAASMRERIEFLNEASVMKEFNCHHVVRLLGVVSQGQPTLVIMELMTRGDLKSYLRSLRPEMENNPVLAPPSLSKMIQMAGEIADGMAYLNANKFVHRDLAARNCMVAEDFTVKIGDFGMTRDIYETDYYRKGGKGLLPVRWMSPESLKDGVFTTYSDVWSFGVVLWEIATLAEQPYQGLSNEQVLRFVMEGGLLDKPDNCPDMLFELMRMCWQYNPKMRPSFLEIISSIKEEMEPGFREVSFYYSEENKLPEPEELDLEPENMESVPLDPSASSSSLPLPDRHSGHKAENGPGPGVLVLRASFDERQPYAHMNGGRKNERALPLPQSSTC,igf1r,43
45
+ MSAAVTAGKLARAPADPGKAGVPGVAAPGAPAAAPPAKEIPEVLVDPRSRRRYVRGRFLGKGGFAKCFEISDADTKEVFAGKIVPKSLLLKPHQREKMSMEISIHRSLAHQHVVGFHGFFEDNDFVFVVLELCRRRSLLELHKRRKALTEPEARYYLRQIVLGCQYLHRNRVIHRDLKLGNLFLNEDLEVKIGDFGLATKVEYDGERKKTLCGTPNYIAPEVLSKKGHSFEVDVWSIGCIMYTLLVGKPPFETSCLKETYLRIKKNEYSIPKHINPVAASLIQKMLQTDPTARPTINELLNDEFFTSGYIPARLPITCLTIPPRFSIAPSSLDPSNRKPLTVLNKGLENPLPERPREKEEPVVRETGEVVDCHLSDMLQQLHSVNASKPSERGLVRQEEAEDPACIPIFWVSKWVDYSDKYGLGYQLCDNSVGVLFNDSTRLILYNDGDSLQYIERDGTESYLTVSSHPNSLMKKITLLKYFRNYMSEHLLKAGANITPREGDELARLPYLRTWFRTRSAIILHLSNGSVQINFFQDHTKLILCPLMAAVTYIDEKRDFRTYRLSLLEEYGCCKELASRLRYARTMVDKLLSSRSASNRLKAS,plk1,44
46
+ MSVLQVLHIPDERLRKVAKPVEEVNAEIQRIVDDMFETMYAEEGIGLAATQVDIHQRIIVIDVSENRDERLVLINPELLEKSGETGIEEGCLSIPEQRALVPRAEKVKIRALDRDGKPFELEADGLLAICIQHEMDHLVGKLFMDYLSPLKQQRIRQKVEKLDRLKARA,def,45
47
+ MSDKLPYKVADIGLAAWGRKALDIAENEMPGLMRMRERYSASKPLKGARIAGCLHMTVETAVLIETLVTLGAEVQWSSCNIFSTQDHAAAAIAKAGIPVYAWKGETDEEYLWCIEQTLYFKDGPLNMILDDGGDLTNLIHTKYPQLLPGIRGISEETTTGVHNLYKMMANGILKVPAINVNDSVTKSKFDNLYGCRESLIDGIKRATDVMIAGKVAVVAGYGDVGKGCAQALRGFGARVIITEIDPINALQAAMEGYEVTTMDEACQEGNIFVTTTGCIDIILGRHFEQMKDDAIVCNIGHFDVEIDVKWLNENAVEKVNIKPQVDRYRLKNGRRIILLAEGRLVNLGCAMGHPSFVMSNSFTNQVMAQIELWTHPDKYPVGVHFLPKKLDEAVAEAHLGKLNVKLTKLTEKQAQYLGMSCDGPFKPDHYRY,sahh,46
48
+ MPLSRWLRSVGVFLLPAPYWAPRERWLGSLRRPSLVHGYPVLAWHSARCWCQAWTEEPRALCSSLRMNGDQNSDVYAQEKQDFVQHFSQIVRVLTEDEMGHPEIGDAIARLKEVLEYNAIGGKYNRGLTVVVAFRELVEPRKQDADSLQRAWTVGWCVELLQAFFLVADDIMDSSLTRRGQICWYQKPGVGLDAINDANLLEACIYRLLKLYCREQPYYLNLIELFLQSSYQTEIGQTLDLLTAPQGNVDLVRFTEKRYKSIVKYKTAFYSFYLPIAAAMYMAGIDGEKEHANAKKILLEMGEFFQIQDDYLDLFGDPSVTGKIGTDIQDNKCSWLVVQCLQRATPEQYQILKENYGQKEAEKVARVKALYEELDLPAVFLQYEEDSYSHIMALIEQYAAPLPPAVFLGLARKIYKRRK,fpps,47
49
+ MERAGPSFGQQRQQQQPQQQKQQQRDQDSVEAWLDDHWDFTFSYFVRKATREMVNAWFAERVHTIPVCKEGIRGHTESCSCPLQQSPRADNSAPGTPTRKISASEFDRPLRPIVVKDSEGTVSFLSDSEKKEQMPLTPPRFDHDEGDQCSRLLELVKDISSHLDVTALCHKIFLHIHGLISADRYSLFLVCEDSSNDKFLISRLFDVAEGSTLEEVSNNCIRLEWNKGIVGHVAALGEPLNIKDAYEDPRFNAEVDQITGYKTQSILCMPIKNHREEVVGVAQAINKKSGNGGTFTEKDEKDFAAYLAFCGIVLHNAQLYETSLLENKRNQVLLDLASLIFEEQQSLEVILKKIAATIISFMQVQKCTIFIVDEDCSDSFSSVFHMECEELEKSSDTLTREHDANKINYMYAQYVKNTMEPLNIPDVSKDKRFPWTTENTGNVNQQCIRSLLCTPIKNGKKNKVIGVCQLVNKMEENTGKVKPFNRNDEQFLEAFVIFCGLGIQNTQMYEAVERAMAKQMVTLEVLSYHASAAEEETRELQSLAAAVVPSAQTLKITDFSFSDFELSDLETALCTIRMFTDLNLVQNFQMKHEVLCRWILSVKKNYRKNVAYHNWRHAFNTAQCMFAALKAGKIQNKLTDLEILALLIAALSHDLDHRGVNNSYIQRSEHPLAQLYCHSIMEHHHFDQCLMILNSPGNQILSGLSIEEYKTTLKIIKQAILATDLALYIKRRGEFFELIRKNQFNLEDPHQKELFLAMLMTACDLSAITKPWPIQQRIAELVATEFFDQGDRERKELNIEPTDLMNREKKNKIPSMQVGFIDAICLQLYEALTHVSEDCFPLLDGCRKNRQKWQALAEQQEKMLINGESGQAKRN,pde5a,48
50
+ MDGWRRMPRWGLLLLLWGSCTFGLPTDTTTFKRIFLKRMPSIRESLKERGVDMARLGPEWSQPMKRLTLGNTTSSVILTNYMDTQYYGEIGIGTPPQTFKVVFDTGSSNVWVPSSKCSRLYTACVYHKLFDASDSSSYKHNGTELTLRYSTGTVSGFLSQDIITVGGITVTQMFGEVTEMPALPFMLAEFDGVVGMGFIEQAIGRVTPIFDNIISQGVLKEDVFSFYYNRDSENSQSLGGQIVLGGSDPQHYEGNFHYINLIKTGVWQIQMKGVSVGSSTLLCEDGCLALVDTGASYISGSTSSIEKLMEALGAKKRLFDYVVKCNEGPTLPDISFHLGGKEYTLTSADYVFQESYSSKKLCTLAIHAMDIPPPTGPTWALGATFIRKFYTEFDRRNNRIGFALAR,reni,49
51
+ MLDDRARMEAAKKEKVEQILAEFQLQEEDLKKVMRRMQKEMDRGLRLETHEEASVKMLPTYVRSTPEGSEVGDFLSLDLGGTNFRVMLVKVGEGEEGQWSVKTKHQMYSIPEDAMTGTAEMLFDYISECISDFLDKHQMKHKKLPLGFTFSFPVRHEDIDKGILLNWTKGFKASGAEGNNVVGLLRDAIKRRGDFEMDVVAMVNDTVATMISCYYEDHQCEVGMIVGTGCNACYMEEMQNVELVEGDEGRMCVNTEWGAFGDSGELDEFLLEYDRLVDESSANPGQQLYEKLIGGKYMGELVRLVLLRLVDENLLFHGEASEQLRTRGAFETRFVSQVESDTGDRKQIYNILSTLGLRPSTTDCDIVRRACESVSTRAAHMCSAGLAGVINRMRESRSEDVMRITVGVDGSVYKLHPSFKERFHASVRRLTPSCEITFIESEEGSGRGAALVSAVACKKACMLGQ,hxk4,50
52
+ MRPSGTAGAALLALLAALCPASRALEEKKVCQGTSNKLTQLGTFEDHFLSLQRMFNNCEVVLGNLEITYVQRNYDLSFLKTIQEVAGYVLIALNTVERIPLENLQIIRGNMYYENSYALAVLSNYDANKTGLKELPMRNLQEILHGAVRFSNNPALCNVESIQWRDIVSSDFLSNMSMDFQNHLGSCQKCDPSCPNGSCWGAGEENCQKLTKIICAQQCSGRCRGKSPSDCCHNQCAAGCTGPRESDCLVCRKFRDEATCKDTCPPLMLYNPTTYQMDVNPEGKYSFGATCVKKCPRNYVVTDHGSCVRACGADSYEMEEDGVRKCKKCEGPCRKVCNGIGIGEFKDSLSINATNIKHFKNCTSISGDLHILPVAFRGDSFTHTPPLDPQELDILKTVKEITGFLLIQAWPENRTDLHAFENLEIIRGRTKQHGQFSLAVVSLNITSLGLRSLKEISDGDVIISGNKNLCYANTINWKKLFGTSGQKTKIISNRGENSCKATGQVCHALCSPEGCWGPEPRDCVSCRNVSRGRECVDKCNLLEGEPREFVENSECIQCHPECLPQAMNITCTGRGPDNCIQCAHYIDGPHCVKTCPAGVMGENNTLVWKYADAGHVCHLCHPNCTYGCTGPGLEGCPTNGPKIPSIATGMVGALLLLLVVALGIGLFMRRRHIVRKRTLRRLLQERELVEPLTPSGEAPNQALLRILKETEFKKIKVLGSGAFGTVYKGLWIPEGEKVKIPVAIKELREATSPKANKEILDEAYVMASVDNPHVCRLLGICLTSTVQLITQLMPFGCLLDYVREHKDNIGSQYLLNWCVQIAKGMNYLEDRRLVHRDLAARNVLVKTPQHVKITDFGLAKLLGAEEKEYHAEGGKVPIKWMALESILHRIYTHQSDVWSYGVTVWELMTFGSKPYDGIPASEISSILEKGERLPQPPICTIDVYMIMVKCWMIDADSRPKFRELIIEFSKMARDPQRYLVIQGDERMHLPSPTDSNFYRALMDEEDMDDVVDADEYLIPQQGFFSSPSTSRTPLLSSLSATSNNSTVACIDRNGLQSCPIKEDSFLQRYSSDPTGALTEDSIDDTFLPVPEYINQSVPKRPAGSVQNPVYHNQPLNPAPSRDPHYQDPHSTAVGNPEYLNTVQPTCVNSTFDSPAHWAQKGSHQISLDNPDYQQDFFPKEAKPNGIFKGSTAENAEYLRVAPQSSEFIGA,egfr,51
53
+ MPEIVDTCSLASPASVCRTKHLHLRCSVDFTRRTLTGTAALTVQSQEDNLRSLVLDTKDLTIEKVVINGQEVKYALGERQSYKGSPMEISLPIALSKNQEIVIEISFETSPKSSALQWLTPEQTSGKEHPYLFSQCQAIHCRAILPCQDTPSVKLTYTAEVSVPKELVALMSAIRDGETPDPEDPSRKIYKFIQKVPIPCYLIALVVGALESRQIGPRTLVWSEKEQVEKSAYEFSETESMLKIAEDLGGPYVWGQYDLLVLPPSFPYGGMENPCLTFVTPTLLAGDKSLSNVIAHEISHSWTGNLVTNKTWDHFWLNEGHTVYLERHICGRLFGEKFRHFNALGGWGELQNSVKTFGETHPFTKLVVDLTDIDPDVAYSSVPYEKGFALLFYLEQLLGGPEIFLGFLKAYVEKFSYKSITTDDWKDFLYSYFKDKVDVLNQVDWNAWLYSPGLPPIKPNYDMTLTNACIALSQRWITAKEDDLNSFNATDLKDLSSHQLNEFLAQTLQRAPLPLGHIKRMQEVYNFNAINNSEIRFRWLRLCIQSKWEDAIPLALKMATEQGRMKFTRPLFKDLAAFDKSHDQAVRTYQEHKASMHPVTAMLVGKDLKVD,lkha4,52
54
+ MSLHFLYYCSEPTLDVKIAFCQGFDKQVDVSYIAKHYNMSKSKVDNQFYSVEVGDSTFTVLKRYQNLKPIGSGAQGIVCAAYDAVLDRNVAIKKLSRPFQNQTHAKRAYRELVLMKCVNHKNIISLLNVFTPQKTLEEFQDVYLVMELMDANLCQVIQMELDHERMSYLLYQMLCGIKHLHSAGIIHRDLKPSNIVVKSDCTLKILDFGLARTAGTSFMMTPYVVTRYYRAPEVILGMGYKENVDIWSVGCIMGEMVRHKILFPGRDYIDQWNKVIEQLGTPCPEFMKKLQPTVRNYVENRPKYAGLTFPKLFPDSLFPADSEHNKLKASQARDLLSKMLVIDPAKRISVDDALQHPYINVWYDPAEVEAPPPQIYDKQLDEREHTIEEWKELIYKEVMNSEEKTKNGVVKGQPSPSGAAVNSSESLPPSSSVNDISSMSTDQTLASDTDSSLEASAGPLGCCR,mk10,53
55
+ MLSRLFRMHGLFVASHPWEVIVGTVTLTICMMSMNMFTGNNKICGWNYECPKFEEDVLSSDIIILTITRCIAILYIYFQFQNLRQLGSKYILGIAGLFTIFSSFVFSTVVIHFLDKELTGLNEALPFFLLLIDLSRASTLAKFALSSNSQDEVRENIARGMAILGPTFTLDALVECLVIGVGTMSGVRQLEIMCCFGCMSVLANYFVFMTFFPACVSLVLELSRESREGRPIWQLSHFARVLEEEENKPNPVTQRVKMIMSLGLVLVHAHSRWIADPSPQNSTADTSKVSLGLDENVSKRIEPSVSLWQFYLSKMISMDIEQVITLSLALLLAVKYIFFEQTETESTLSLKNPITSPVVTQKKVPDNCCRREPMLVRNNQKCDSVEEETGINRERKVEVIKPLVAETDTPNRATFVVGNSSLLDTSSVLVTQEPEIELPREPRPNEECLQILGNAEKGAKFLSDAEIIQLVNAKHIPAYKLETLMETHERGVSIRRQLLSKKLSEPSSLQYLPYRDYNYSLVMGACCENVIGYMPIPVGVAGPLCLDEKEFQVPMATTEGCLVASTNRGCRAIGLGGGASSRVLADGMTRGPVVRLPRACDSAEVKAWLETSEGFAVIKEAFDSTSRFARLQKLHTSIAGRNLYIRFQSRSGDAMGMNMISKGTEKALSKLHEYFPEMQILAVSGNYCTDKKPAAINWIEGRGKSVVCEAVIPAKVVREVLKTTTEAMIEVNINKNLVGSAMAGSIGGYNAHAANIVTAIYIACGQDAAQNVGSSNCITLMEASGPTNEDLYISCTMPSIEIGTVGGGTNLLPQQACLQMLGVQGACKDNPGENARQLARIVCGTVMAGELSLMAALAAGHLVKSHMIHNRSKINLQDLQGACTKKTA,hmdh,54
56
+ MKTPWKVLLGLLGAAALVTIITVPVVLLNKGTDDATADSRKTYTLTDYLKNTYRLKLYSLRWISDHEYLYKQENNILVFNAEYGNSSVFLENSTFDEFGHSINDYSISPDGQFILLEYNYVKQWRHSYTASYDIYDLNKRQLITEERIPNNTQWVTWSPVGHKLAYVWNNDIYVKIEPNLPSYRITWTGKEDIIYNGITDWVYEEEVFSAYSALWWSPNGTFLAYAQFNDTEVPLIEYSFYSDESLQYPKTVRVPYPKAGAVNPTVKFFVVNTDSLSSVTNATSIQITAPASMLIGDHYLCDVTWATQERISLQWLRRIQNYSVMDICDYDESSGRWNCLVARQHIEMSTTGWVGRFRPSEPHFTLDGNSFYKIISNEEGYRHICYFQIDKKDCTFITKGTWEVIGIEALTSDYLYYISNEYKGMPGGRNLYKIQLSDYTKVTCLSCELNPERCQYYSVSFSKEAKYYQLRCSGPGLPLYTLHSSVNDKGLRVLEDNSALDKMLQNVQMPSKKLDFIILNETKFWYQMILPPHFDKSKKYPLLLDVYAGPCSQKADTVFRLNWATYLASTENIIVASFDGRGSGYQGDKIMHAINRRLGTFEVEDQIEAARQFSKMGFVDNKRIAIWGWSYGGYVTSMVLGSGSGVFKCGIAVAPVSRWEYYDSVYTERYMGLPTPEDNLDHYRNSTVMSRAENFKQVEYLLIHGTADDNVHFQQSAQISKALVDVGVDFQAMWYTDEDHGIASSTAHQHIYTHMSHFIKQCFSLP,dpp4,55
57
+ MDSKESLTPGREENPSSVLAQERGDVMDFYKTLRGGATVKVSASSPSLAVASQSDSKQRRLLVDFPKGSVSNAQQPDLSKAVSLSMGLYMGETETKVMGNDLGFPQQGQISLSSGETDLKLLEESIANLNRSTSVPENPKSSASTAVSAAPTEKEFPKTHSDVSSEQQHLKGQTGTNGGNVKLYTTDQSTFDILQDLEFSSGSPGKETNESPWRSDLLIDENCLLSPLAGEDDSFLLEGNSNEDCKPLILPDTKPKIKDNGDLVLSSPSNVTLPQVKTEKEDFIELCTPGVIKQEKLGTVYCQASFPGANIIGNKMSAISVHGVSTSGGQMYHYDMNTASLSQQQDQKPIFNVIPPIPVGSENWNRCQGSGDDNLTSLGTLNFPGRTVFSNGYSSPSMRPDVSSPPSSSSTATTGPPPKLCLVCSDEASGCHYGVLTCGSCKVFFKRAVEGQHNYLCAGRNDCIIDKIRRKNCPACRYRKCLQAGMNLEARKTKKKIKGIQQATTGVSQETSENPGNKTIVPATLPQLTPTLVSLLEVIEPEVLYAGYDSSVPDSTWRIMTTLNMLGGRQVIAAVKWAKAIPGFRNLHLDDQMTLLQYSWMFLMAFALGWRSYRQSSANLLCFAPDLIINEQRMTLPCMYDQCKHMLYVSSELHRLQVSYEEYLCMKTLLLLSSVPKDGLKSQELFDEIRMTYIKELGKAIVKREGNSSQNWQRFYQLTKLLDSMHEVVENLLNYCFQTFLDKTMSIEFPEMLAEIITNQIPKYSNGNIKKLLFHQK,gcr,56
58
+ MGETLGDSPIDPESDSFTDTLSANISQEMTMVDTEMPFWPTNFGISSVDLSVMEDHSHSFDIKPFTTVDFSSISTPHYEDIPFTRTDPVVADYKYDLKLQEYQSAIKVEPASPPYYSEKTQLYNKPHEEPSNSLMAIECRVCGDKASGFHYGVHACEGCKGFFRRTIRLKLIYDRCDLNCRIHKKSRNKCQYCRFQKCLAVGMSHNAIRFGRMPQAEKEKLLAEISSDIDQLNPESADLRALAKHLYDSYIKSFPLTKAKARAILTGKTTDKSPFVIYDMNSLMMGEDKIKFKHITPLQEQSKEVAIRIFQGCQFRSVEAVQEITEYAKSIPGFVNLDLNDQVTLLKYGVHEIIYTMLASLMNKDGVLISEGQGFMTREFLKSLRKPFGDFMEPKFEFAVKFNALELDDSDLAIFIAVIILSGDRPGLLNVKPIEDIQDNLLQALELQLKLNHPESSQLFAKLLQKMTDLRQIVTEHVQLLQVIKKTETDMSLHPLLQEIYKDLY,pparg,57
59
+ MEFSSPSREECPKPLSRVSIMAGSLTGLLLLQAVSWASGARPCIPKSFGYSSVVCVCNATYCDSFDPPTFPALGTFSRYESTRSGRRMELSMGPIQANHTGTGLLLTLQPEQKFQKVKGFGGAMTDAAALNILALSPPAQNLLLKSYFSEEGIGYNIIRVPMASCDFSIRTYTYADTPDDFQLHNFSLPEEDTKLKIPLIHRALQLAQRPVSLLASPWTSPTWLKTNGAVNGKGSLKGQPGDIYHQTWARYFVKFLDAYAEHKLQFWAVTAENEPSAGLLSGYPFQCLGFTPEHQRDFIARDLGPTLANSTHHNVRLLMLDDQRLLLPHWAKVVLTDPEAAKYVHGIAVHWYLDFLAPAKATLGETHRLFPNTMLFASEACVGSKFWEQSVRLGSWDRGMQYSHSIITNLLYHVVGWTDWNLALNPEGGPNWVRNFVDSPIIVDITKDTFYKQPMFYHLGHFSKFIPEGSQRVGLVASQKNDLDAVALMHPDGSAVVVVLNRSSKDVPLTIKDPAVGFLETISPGYSIHTYLWRRQ,glcm,58
60
+ MEDHMFGVQQIQPNVISVRLFKRKVGGLGFLVKERVSKPPVIISDLIRGGAAEQSGLIQAGDIILAVNGRPLVDLSYDSALEVLRGIASETHVVLILRGPEGFTTHLETTFTGDGTPKTIRVTQPLGPPTKAVDLSHQPPAGKEQPLAVDGASGPGNGPQHAYDDGQEAGSLPHANGLAPRPPGQDPAKKATRVSLQGRGENNELLKEIEPVLSLLTSGSRGVKGGAPAKAEMKDMGIQVDRDLDGKSHKPLPLGVENDRVFNDLWGKGNVPVVLNNPYSEKEQPPTSGKQSPTKNGSPSKCPRFLKVKNWETEVVLTDTLHLKSTLETGCTEYICMGSIMHPSQHARRPEDVRTKGQLFPLAKEFIDQYYSSIKRFGSKAHMERLEEVNKEIDTTSTYQLKDTELIYGAKHAWRNASRCVGRIQWSKLQVFDARDCTTAHGMFNYICNHVKYATNKGNLRSAITIFPQRTDGKHDFRVWNSQLIRYAGYKQPDGSTLGDPANVQFTEICIQQGWKPPRGRFDVLPLLLQANGNDPELFQIPPELVLEVPIRHPKFEWFKDLGLKWYGLPAVSNMLLEIGGLEFSACPFSGWYMGTEIGVRDYCDNSRYNILEEVAKKMNLDMRKTSSLWKDQALVEINIAVLYSFQSDKVTIVDHHSATESFIKHMENEYRCRGGCPADWVWIVPPMSGSITPVFHQEMLNYRLTPSFEYQPDPWNTHVWKGTNGTPTKRRAIGFKKLAEAVKFSAKLMGQAMAKRVKATILYATETGKSQAYAKTLCEIFKHAFDAKVMSMEEYDIVHLEHETLVLVVTSTFGNGDPPENGEKFGCALMEMRHPNSVQEERKSYKVRFNSVSSYSDSQKSSGDGPDLRDNFESAGPLANVRFSVFGLGSRAYPHFCAFGHAVDTLLEELGGERILKMREGDELCGQEEAFRTWAKKVFKAACDVFCVGDDVNIEKANNSLISNDRSWKRNKFRLTFVAEAPELTQGLSNVHKKRVSAARLLSRQNLQSPKSSRSTIFVRLHTNGSQELQYQPGDHLGVFPGNHEDLVNALIERLEDAPPVNQMVKVELLEERNTALGVISNWTDELRLPPCTIFQAFKYYLDITTPPTPLQLQQFASLATSEKEKQRLLVLSKGLQEYEEWKWGKNPTIVEVLEEFPSIQMPATLLLTQLSLLQPRYYSISSSPDMYPDEVHLTVAIVSYRTRDGEGPIHHGVCSSWLNRIQADELVPCFVRGAPSFHLPRNPQVPCILVGPGTGIAPFRSFWQQRQFDIQHKGMNPCPMVLVFGCRQSKIDHIYREETLQAKNKGVFRELYTAYSREPDKPKKYVQDILQEQLAESVYRALKEQGGHIYVCGDVTMAADVLKAIQRIMTQQGKLSAEDAGVFISRMRDDNRYHEDIFGVTLRTYEVTNRLRSESIAFIEESKKDTDEVFSS,nos1,59
61
+ MDIKNSPSSLNSPSSYNCSQSILPLEHGSIYIPSSYVDSHHEYPAMTFYSPAVMNYSIPSNVTNLEGGPGRQTTSPNVLWPTPGHLSPLVVHRQLSHLYAEPQKSPWCEARSLEHTLPVNRETLKRKVSGNRCASPVTGPGSKRDAHFCAVCSDYASGYHYGVWSCEGCKAFFKRSIQGHNDYICPATNQCTIDKNRRKSCQACRLRKCYEVGMVKCGSRRERCGYRLVRRQRSADEQLHCAGKAKRSGGHAPRVRELLLDALSPEQLVLTLLEAEPPHVLISRPSAPFTEASMMMSLTKLADKELVHMISWAKKIPGFVELSLFDQVRLLESCWMEVLMMGLMWRSIDHPGKLIFAPDLVLDRDEGKCVEGILEIFDMLLATTSRFRELKLQHKEYLCVKAMILLNSSMYPLVTATQDADSSRKLAHLLNAVTDALVWVIAKSGISSQQQSMRLANLLMLLSHVRHASNKGMEHLLNMKCKNVVPVYDLLLEMLNAHVLRGCKSSITGSECSPAEDSKSKEGSQNPQSQ,esr2,60
62
+ MSNKCDVVVVGGGISGMAAAKLLHDSGLNVVVLEARDRVGGRTYTLRNQKVKYVDLGGSYVGPTQNRILRLAKELGLETYKVNEVERLIHHVKGKSYPFRGPFPPVWNPITYLDHNNFWRTMDDMGREIPSDAPWKAPLAEEWDNMTMKELLDKLCWTESAKQLATLFVNLCVTAETHEVSALWFLWYVKQCGGTTRIISTTNGGQERKFVGGSGQVSERIMDLLGDRVKLERPVIYIDQTRENVLVETLNHEMYEAKYVISAIPPTLGMKIHFNPPLPMMRNQMITRVPLGSVIKCIVYYKEPFWRKKDYCGTMIIDGEEAPVAYTLDDTKPEGNYAAIMGFILAHKARKLARLTKEERLKKLCELYAKVLGSLEALEPVHYEEKNWCEEQYSGGCYTTYFPPGILTQYGRVLRQPVDRIYFAGTETATHWSGYMEGAVEAGERAAREILHAMGKIPEDEIWQSEPESVDVPAQPITTTFLERHLPSVPGLLRLIGLTTIFSATALGFLAHKRGLLVRV,aofb,61
63
+ MNPNQKILCTSATALVIGTIAVLIGITNLGLNIGLHLKPSCNCSHSQPEATNASQTIINNYYNDTNITQISNTNIQVEERAIRDFNNLTKGLCTINSWHIYGKDNAVRIGEDSDVLVTREPYVSCDPDECRFYALSQGTTIRGKHSNGTIHDRSQYRALISWPLSSPPTVYNSRVECIGWSSTSCHDGKTRMSICISGPNNNASAVIWYNRRPVTEINTWARNILRTQESECVCHNGVCPVVFTDGSATGPAETRIYYFKEGKILKWEPLAGTAKHIEECSCYGERAEITCTCRDNWQGSNRPVIRIDPVAMTHTSQYICSPVLTDNPRPNDPTVGKCNDPYPGNNNNGVKGFSYLDGVNTWLGRTISIASRSGYEMLKVPNALTDDKSKPTQGQTIVLNTDWSGYSGSFMDYWAEGECYRACFYVELIRGRPKEDKVWWTSNSIVSMCSSTEFLGQWDWPDGAKIEYFL,nram,62
64
+ MLARALLLCAVLALSHTANPCCSHPCQNRGVCMSVGFDQYKCDCTRTGFYGENCSTPEFLTRIKLFLKPTPNTVHYILTHFKGFWNVVNNIPFLRNAIMSYVLTSRSHLIDSPPTYNADYGYKSWEAFSNLSYYTRALPPVPDDCPTPLGVKGKKQLPDSNEIVEKLLLRRKFIPDPQGSNMMFAFFAQHFTHQFFKTDHKRGPAFTNGLGHGVDLNHIYGETLARQRKLRLFKDGKMKYQIIDGEMYPPTVKDTQAEMIYPPQVPEHLRFAVGQEVFGLVPGLMMYATIWLREHNRVCDVLKQEHPEWGDEQLFQTSRLILIGETIKIVIEDYVQHLSGYHFKLKFDPELLFNKQFQYQNRIAAEFNTLYHWHPLLPDTFQIHDQKYNYQQFIYNNSILLEHGITQFVESFTRQIAGRVAGGRNVPPAVQKVSQASIDQSRQMKYQSFNEYRKRFMLKPYESFEELTGEKEMSAELEALYGDIDAVELYPALLVEKPRPDAIFGETMVEVGAPFSLKGLMGNVICSPAYWKPSTFGGEVGFQIINTASIQSLICNNVKGCPFTSFSVPDPELIKTVTINASSSRSGLDDINPTVLLKERSTEL,pgh2,63
65
+ MAAAAAAGAGPEMVRGQVFDVGPRYTNLSYIGEGAYGMVCSAYDNVNKVRVAIKKISPFEHQTYCQRTLREIKILLRFRHENIIGINDIIRAPTIEQMKDVYIVQDLMETDLYKLLKTQHLSNDHICYFLYQILRGLKYIHSANVLHRDLKPSNLLLNTTCDLKICDFGLARVADPDHDHTGFLTEYVATRWYRAPEIMLNSKGYTKSIDIWSVGCILAEMLSNRPIFPGKHYLDQLNHILGILGSPSQEDLNCIINLKARNYLLSLPHKNKVPWNRLFPNADSKALDLLDKMLTFNPHKRIEVEQALAHPYLEQYYDPSDEPIAEAPFKFDMELDDLPKEKLKELIFEETARFQPGYRS,mk01,64
66
+ MSTGDSFETRFEKMDNLLRDPKSEVNSDCLLDGLDALVYDLDFPALRKNKNIDNFLSRYKDTINKIRDLRMKAEDYEVVKVIGRGAFGEVQLVRHKSTRKVYAMKLLSKFEMIKRSDSAFFWEERDIMAFANSPWVVQLFYAFQDDRYLYMVMEYMPGGDLVNLMSNYDVPEKWARFYTAEVVLALDAIHSMGFIHRDVKPDNMLLDKSGHLKLADFGTCMKMNKEGMVRCDTAVGTPDYISPEVLKSQGGDGYYGRECDWWSVGVFLYEMLVGDTPFYADSLVGTYSKIMNHKNSLTFPDDNDISKEAKNLICAFLTDREVRLGRNGVEEIKRHLFFKNDQWAWETLRDTVAPVVPDLSSDIDTSNFDDLEEDKGEEETFPIPKAFVGNQLPFVGFTYYSNRRYLSSANPNDNRTSSNADKSLQESLQKTIYKLEEQLHNEMQLKDEMEQKCRTSNIKLDKIMKELDEEGNQRRNLESTVSQIEKEKMLLQHRINEYQRKAEQENEKRRNVENEVSTLKDQLEDLKKVSQNSQLANEKLSQLQKQLEEANDLLRTESDTAVRLRKSHTEMSKSISQLESLNRELQERNRILENSKSQTDKDYYQLQAILEAERRDRGHDSEMIGDLQARITSLQEEVKHLKHNLEKVEGERKEAQDMLNHSEKEKNNLEIDLNYKLKSLQQRLEQEVNEHKVTKARLTDKHQSIEEAKSVAMCEMEKKLKEEREAREKAENRVVQIEKQCSMLDVDLKQSQQKLEHLTGNKERMEDEVKNLTLQLEQESNKRLLLQNELKTQAFEADNLKGLEKQMKQEINTLLEAKRLLEFELAQLTKQYRGNEGQMRELQDQLEAEQYFSTLYKTQVKELKEEIEEKNRENLKKIQELQNEKETLATQLDLAETKAESEQLARGLLEEQYFELTQESKKAASRNRQEITDKDHTVSRLEEANSMLTKDIEILRRENEELTEKMKKAEEEYKLEKEEEISNLKAAFEKNINTERTLKTQAVNKLAEIMNRKDFKIDRKKANTQDLRKKEKENRKLQLELNQEREKFNQMVVKHQKELNDMQAQLVEECAHRNELQMQLASKESDIEQLRAKLLDLSDSTSVASFPSADETDGNLPESRIEGWLSVPNRGNIKRYGWKKQYVVVSSKKILFYNDEQDKEQSNPSMVLDIDKLFHVRPVTQGDVYRAETEEIPKIFQILYANEGECRKDVEMEPVQQAEKTNFQNHKGHEFIPTLYHFPANCDACAKPLWHVFKPPPALECRRCHVKCHRDHLDKKEDLICPCKVSYDVTSARDMLLLACSQDEQKKWVTHLVKKIPKNPPSGFVRASPRTLSTRSTANQSFRKVVKNTSGKTS,rock1,65
67
+ MGVQVETISPGDGRTFPKRGQTCVVHYTGMLEDGKKFDSSRDRNKPFKFMLGKQEVIRGWEEGVAQMSVGQRAKLTISPDYAYGATGHPGIIPPHATLVFDVELLKLE,fkb1a,66
68
+ MENTENSVDSKSIKNLEPKIIHGSESMDSGISLDNSYKMDYPEMGLCIIINNKNFHKSTGMTSRSGTDVDAANLRETFRNLKYEVRNKNDLTREEIVELMRDVSKEDHSKRSSFVCVLLSHGEEGIIFGTNGPVDLKKITNFFRGDRCRSLTGKPKLFIIQACRGTELDCGIETDSGVDDDMACHKIPVEADFLYAYSTAPGYYSWRNSKDGSWFIQSLCAMLKQYADKLEFMHILTRVNRKVATEFESFSFDATFHAKKQIPCIVSMLTKELYFYH,casp3,67
69
+ MSQERPTFYRQELNKTIWEVPERYQNLSPVGSGAYGSVCAAFDTKTGLRVAVKKLSRPFQSIIHAKRTYRELRLLKHMKHENVIGLLDVFTPARSLEEFNDVYLVTHLMGADLNNIVKCQKLTDDHVQFLIYQILRGLKYIHSADIIHRDLKPSNLAVNEDCELKILDFGLARHTDDEMTGYVATRWYRAPEIMLNWMHYNQTVDIWSVGCIMAELLTGRTLFPGTDHIDQLKLILRLVGTPGAELLKKISSESARNYIQSLTQMPKMNFANVFIGANPLAVDLLEKMLVLDSDKRITAAQALAHAYFAQYHDPDDEPVADPYDQSFESRDLLIDEWKSLTYDEVISFVPPPLDQEEMES,mk14,68
70
+ MSFLSRQQPPPPRRAGAACTLRQKLIFSPCSDCEEEEEEEEEEGSGHSTGEDSAFQEPDSPLPPARSPTEPGPERRRSPGPAPGSPGELEEDLLLPGACPGADEAGGGAEGDSWEEEGFGSSSPVKSPAAPYFLGSSFSPVRCGGPGDASPRGCGARRAGEGRRSPRPDHPGTPPHKTFRKLRLFDTPHTPKSLLSKARGIDSSSVKLRGSSLFMDTEKSGKREFDVRQTPQVNINPFTPDSLLLHSSGQCRRRKRTYWNDSCGEDMEASDYELEDETRPAKRITITESNMKSRYTTEFHELEKIGSGEFGSVFKCVKRLDGCIYAIKRSKKPLAGSVDEQNALREVYAHAVLGQHSHVVRYFSAWAEDDHMLIQNEYCNGGSLADAISENYRIMSYFKEAELKDLLLQVGRGLRYIHSMSLVHMDIKPSNIFISRTSIPNAASEEGDEDDWASNKVMFKIGDLGHVTRISSPQVEEGDSRFLANEVLQENYTHLPKADIFALALTVVCAAGAEPLPRNGDQWHEIRQGRLPRIPQVLSQEFTELLKVMIHPDPERRPSAMALVKHSVLLSASRKSAEQLRIELNAEKFKNSLLQKELKKAQMAKAAAEERALFTDRMATRSTTQSNRTSRLIGKKMNRSVSLTIY,wee1,69
71
+ MAAAYLDPNLNHTPNSSTKTHLGTGMERSPGAMERVLKVFHYFESNSEPTTWASIIRHGDATDVRGIIQKIVDSHKVKHVACYGFRLSHLRSEEVHWLHVDMGVSSVREKYELAHPPEEWKYELRIRYLPKGFLNQFTEDKPTLNFFYQQVKSDYMLEIADQVDQEIALKLGCLEIRRSYWEMRGNALEKKSNYEVLEKDVGLKRFFPKSLLDSVKAKTLRKLIQQTFRQFANLNREESILKFFEILSPVYRFDKECFKCALGSSWIISVELAIGPEEGISYLTDKGCNPTHLADFTQVQTIQYSNSEDKDRKGMLQLKIAGAPEPLTVTAPSLTIAENMADLIDGYCRLVNGTSQSFIIRPQKEGERALPSIPKLANSEKQGMRTHAVSVSETDDYAEIIDEEDTYTMPSTRDYEIQRERIELGRCIGEGQFGDVHQGIYMSPENPALAVAIKTCKNCTSDSVREKFLQEALTMRQFDHPHIVKLIGVITENPVWIIMELCTLGELRSFLQVRKYSLDLASLILYAYQLSTALAYLESKRFVHRDIAARNVLVSSNDCVKLGDFGLSRYMEDSTYYKASKGKLPIKWMAPESINFRRFTSASDVWMFGVCMWEILMHGVKPFQGVKNNDVIGRIENGERLPMPPNCPPTLYSLMTKCWAYDPSRRPRFTELKAQLSTILEEEKAQQEERMRMESRRQATVSWDSGGSDEAPPKPSRPGYPSPRSSEGFYPSPQHMVQTNHYQVSGYPGSHGITAMAGSIYPGQASLLDQTDSWNHRPQEIAMWQPNVEDSTVLDLRGIGQVLPTHLMEERLIRQQQEMEEDQRWLEKEERFLKPDVRLSRGSIDREDGSLQGPIGNQHIYQPVGKPDPAAPPKKPPRPGAPGHLGSLASLSSPADSYNEGVKLQPQEISPPPTANLDRSNDKVYENVTGLVKAVIEMSSKIQPAPPEEYVPMVKEVGLALRTLLATVDETIPLLPASTHREIEMAQKLLNSDLGELINKMKLAQQYVMTSLQQEYKKQMLTAAHALAVDAKNLLDVIDQARLKMLGQTRPH,fak1,70
72
+ MHPGVLAAFLFLSWTHCRALPLPSGGDEDDLSEEDLQFAERYLRSYYHPTNLAGILKENAASSMTERLREMQSFFGLEVTGKLDDNTLDVMKKPRCGVPDVGEYNVFPRTLKWSKMNLTYRIVNYTPDMTHSEVEKAFKKAFKVWSDVTPLNFTRLHDGIADIMISFGIKEHGDFYPFDGPSGLLAHAFPPGPNYGGDAHFDDDETWTSSSKGYNLFLVAAHEFGHSLGLDHSKDPGALMFPIYTYTGKSHFMLPDDDVQGIQSLYGPGDEDPNPKHPKTPDKCDPSLSLDAITSLRGETMIFKDRFFWRLHPQQVDAELFLTKSFWPELPNRIDAAYEHPSHDLIFIFRGRKFWALNGYDILEGYPKKISELGLPKEVKKISAAVHFEDTGKTLLFSGNQVWRYDDTNHIMDKDYPRLIEEDFPGIGDKVDAVYEKNGYIYFFNGPIQFEYSIWSNRIVRVMPANSILWC,mmp13,71
73
+ PISPIEPVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTRWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKRSVTVLDVGDAYFSVPLDKEFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLKWGFTTPDKKHQKEPPFLWMGYEHHPDKWTVQPIVLPEKDSWTVNDIQK,hivrt,72
74
+ MASRLLLNNGAKMPILGLGTWKSPPGQVTEAVKVAIDVGYRHIDCAHVYQNENEVGVAIQEKLREQVVKREELFIVSKLWCTYHEKGLVKGACQKTLSDLKLDYLDLYLIHWPTGFKPGKEFFPLDESGNVVPSDTNILDTWAAMEELVDEGLVKAIGISNFNHLQVEMILNKPGLKYKPAVNQIECHPYLTQEKLIQYCQSKGIVVTAYSPLGSPDRPWAKPEDPSLLEDPRIKAIAAKHNKTTAQVLIRFPMQRNLVVIPKSVTPERIAENFKVFDFELSSQDMTTLLSYNRNWRVCALLSCTSHKDYPFHEEF,aldr,73
75
+ MVGSLNCIVAVSQNMGIGKNGDLPWPPLRNEFRYFQRMTTTSSVEGKQNLVIMGKKTWFSIPEKNRPLKGRINLVLSRELKEPPQGAHFLSRSLDDALKLTEQPELANKVDMVWIVGGSSVYKEAMNHPGHLKLFVTRIMQDFESDTFFPEIDLEKYKLLPEYPGVLSDVQEEKGIKYKFEVYEKND,dyr,74
76
+ MEMEKEFEQIDKSGSWAAIYQDIRHEASDFPCRVAKLPKNKNRNRYRDVSPFDHSRIKLHQEDNDYINASLIKMEEAQRSYILTQGPLPNTCGHFWEMVWEQKSRGVVMLNRVMEKGSLKCAQYWPQKEEKEMIFEDTNLKLTLISEDIKSYYTVRQLELENLTTQETREILHFHYTTWPDFGVPESPASFLNFLFKVRESGSLSPEHGPVVVHCSAGIGRSGTFCLADTCLLLMDKRKDPSSVDIKKVLLEMRKFRMGLIQTADQLRFSYLAVIEGAKFIMGDSSVQDQWKELSHEDLEPPPEHIPPPPRPPKRILEPHNGKCREFFPNHQWVKEETQEDKDCPIKEEKGSPLNAAPYGIESMSQDTEVRSRVVGGSLRGAQAASPAKGEPSLPEKDEDHALSYWKPFLVNMCVATVLTAGAYLCYRFLFNSNT,ptn1,75
77
+ MAQTPAFDKPKVELHVHLDGSIKPETILYYGRRRGIALPANTAEGLLNVIGMDKPLTLPDFLAKFDYYMPAIAGCREAIKRIAYEFVEMKAKEGVVYVEVRYSPHLLANSKVEPIPWNQAEGDLTPDEVVALVGQGLQEGERDFGVKARSILCCMRHQPNWSPKVVELCKKYQQQTVVAIDLAGDETIPGSSLLPGHVQAYQEAVKSGIHRTVHAGEVGSAEVVKEAVDILKTERLGHGYHTLEDQALYNRLRQENMHFEICPWSSYLTGAWKPDTEHAVIRLKNDQANYSLNTDDPLIFKSTLDTDYQMTKRDMGFTEEEFKRLNINAAKSSFLPEDEKRELLDLLYKAYGMPPSASAGQNL,ada,76
78
+ MGCGCSSHPEDDWMENIDVCENCHYPIVPLDGKGTLLIRNGSEVRDPLVTYEGSNPPASPLQDNLVIALHSYEPSHDGDLGFEKGEQLRILEQSGEWWKAQSLTTGQEGFIPFNFVAKANSLEPEPWFFKNLSRKDAERQLLAPGNTHGSFLIRESESTAGSFSLSVRDFDQNQGEVVKHYKIRNLDNGGFYISPRITFPGLHELVRHYTNASDGLCTRLSRPCQTQKPQKPWWEDEWEVPRETLKLVERLGAGQFGEVWMGYYNGHTKVAVKSLKQGSMSPDAFLAEANLMKQLQHQRLVRLYAVVTQEPIYIITEYMENGSLVDFLKTPSGIKLTINKLLDMAAQIAEGMAFIEERNYIHRDLRAANILVSDTLSCKIADFGLARLIEDNEYTAREGAKFPIKWTAPEAINYGTFTIKSDVWSFGILLTEIVTHGRIPYPGMTNPEVIQNLERGYRMVRPDNCPEELYQLMRLCWKERPEDRPTFDYLRSVLEDFFTATEGQYQPQP,lck,77
79
+ MGRPLHLVLLSASLAGLLLLGESLFIRREQANNILARVTRANSFLEEMKKGHLERECMEETCSYEEAREVFEDSDKTNEFWNKYKDGDQCETSPCQNQGKCKDGLGEYTCTCLEGFEGKNCELFTRKLCSLDNGDCDQFCHEEQNSVVCSCARGYTLADNGKACIPTGPYPCGKQTLERRKRSVAQATSSSGEAPDSITWKPYDAADLDPTENPFDLLDFNQTQPERGDNNLTRIVGGQECKDGECPWQALLINEENEGFCGGTILSEFYILTAAHCLYQAKRFKVRVGDRNTEQEEGGEAVHEVEVVIKHNRFTKETYDFDIAVLRLKTPITFRMNVAPACLPERDWAESTLMTQKTGIVSGFGRTHEKGRQSTRLKMLEVPYVDRNSCKLSSSFIITQNMFCAGYDTKQEDACQGDSGGPHVTRFKDTYFVTGIVSWGEGCARKGKYGIYTKVTAFLKWIDRSMKTRGLPKAKSHAPEVITSSPLK,fa10,78
80
+ MNEVSVIKEGWLHKRGEYIKTWRPRYFLLKSDGSFIGYKERPEAPDQTLPPLNNFSVAECQLMKTERPRPNTFVIRCLQWTTVIERTFHVDSPDEREEWMRAIQMVANSLKQRAPGEDPMDYKCGSPSDSSTTEEMEVAVSKARAKVTMNDFDYLKLLGKGTFGKVILVREKATGRYYAMKILRKEVIIAKDEVAHTVTESRVLQNTRHPFLTALKYAFQTHDRLCFVMEYANGGELFFHLSRERVFTEERARFYGAEIVSALEYLHSRDVVYRDIKLENLMLDKDGHIKITDFGLCKEGISDGATMKTFCGTPEYLAPEVLEDNDYGRAVDWWGLGVVMYEMMCGRLPFYNQDHERLFELILMEEIRFPRTLSPEAKSLLAGLLKKDPKQRLGGGPSDAKEVMEHRFFLSINWQDVVQKKLLPPFKPQVTSEVDTRYFDDEFTAQSITITPPDRYDSLGLLELDQRTHFPQFSYSASIRE,akt2,79
81
+ MESKALLAVALWFCVETRAASVGLPGDFLHPPKLSTQKDILTILANTTLQITCRGQRDLDWLWPNAQRDSEERVLVTECGGGDSIFCKTLTIPRVVGNDTGAYKCSYRDVDIASTVYVYVRDYRSPFIASVSDQHGIVYITENKNKTVVIPCRGSISNLNVSLCARYPEKRFVPDGNRISWDSEIGFTLPSYMISYAGMVFCEAKINDETYQSIMYIVVVVGYRIYDVILSPPHEIELSAGEKLVLNCTARTELNVGLDFTWHSPPSKSHHKKIVNRDVKPFPGTVAKMFLSTLTIESVTKSDQGEYTCVASSGRMIKRNRTFVRVHTKPFIAFGSGMKSLVEATVGSQVRIPVKYLSYPAPDIKWYRNGRPIESNYTMIVGDELTIMEVTERDAGNYTVILTNPISMEKQSHMVSLVVNVPPQIGEKALISPMDSYQYGTMQTLTCTVYANPPLHHIQWYWQLEEACSYRPGQTSPYACKEWRHVEDFQGGNKIEVTKNQYALIEGKNKTVSTLVIQAANVSALYKCEAINKAGRGERVISFHVIRGPEITVQPAAQPTEQESVSLLCTADRNTFENLTWYKLGSQATSVHMGESLTPVCKNLDALWKLNGTMFSNSTNDILIVAFQNASLQDQGDYVCSAQDKKTKKRHCLVKQLIILERMAPMITGNLENQTTTIGETIEVTCPASGNPTPHITWFKDNETLVEDSGIVLRDGNRNLTIRRVRKEDGGLYTCQACNVLGCARAETLFIIEGAQEKTNLEVIILVGTAVIAMFFWLLLVILVRTVKRANEGELKTGYLSIVMDPDELPLDERCERLPYDASKWEFPRDRLKLGKPLGRGAFGQVIEADAFGIDKTATCKTVAVKMLKEGATHSEHRALMSELKILIHIGHHLNVVNLLGACTKPGGPLMVIVEFSKFGNLSTYLRGKRNEFVPYKSKGARFRQGKDYVGELSVDLKRRLDSITSSQSSASSGFVEEKSLSDVEEEEASEELYKDFLTLEHLICYSFQVAKGMEFLASRKCIHRDLAARNILLSEKNVVKICDFGLARDIYKDPDYVRKGDARLPLKWMAPETIFDRVYTIQSDVWSFGVLLWEIFSLGASPYPGVKIDEEFCRRLKEGTRMRAPDYTTPEMYQTMLDCWHEDPNQRPSFSELVEHLGNLLQANAQQDGKDYIVLPMSETLSMEEDSGLSLPTSPVSCMEEEEVCDPKFHYDNTAGISHYLQNSKRKSRPVSVKTFEDIPLEEPEVKVIPDDSQTDSGMVLASEELKTLEDRNKLSPSFGGMMPSKSRESVASEGSNQTSGYQSGYHSDDTDTTVYSSDEAGLLKMVDAAVHADSGTTLQLTSCLNGSGPVPAPPPTPGNHERGAA,vgfr2,80
82
+ MFKTTLCALLITASCSTFAAPQQINDIVHRTITPLIEQQKIPGMAVAVIYQGKPYYFTWGYADIAKKQPVTQQTLFELGSVSKTFTGVLGGDAIARGEIKLSDPTTKYWPELTAKQWNGITLLHLATYTAGGLPLQVPDEVKSSSDLLRFYQNWQPAWAPGTQRLYANSSIGLFGALAVKPSGLSFEQAMQTRVFQPLKLNHTWINVPPAEEKNYAWGYREGKAVHVSPGALDAEAYGVKSTIEDMARWVQSNLKPLDINEKTLQQGIQLAQSRYWQTGDMYQGLGWEMLDWPVNPDSIINGSDNKIALAARPVKAITPPTPAVRASWVHKTGATGGFGSYVAFIPEKELGIVMLANKNYPNPARVDAAWQILNALQ,ampc,81
83
+ MAFMKKYLLPILGLFMAYYYYSANEEFRPEMLQGKKVIVTGASKGIGREMAYHLAKMGAHVVVTARSKETLQKVVSHCLELGAASAHYIAGTMEDMTFAEQFVAQAGKLMGGLDMLILNHITNTSLNLFHDDIHHVRKSMEVNFLSYVVLTVAALPMLKQSNGSIVVVSSLAGKVAYPMVAAYSASKFALDGFFSSIRKEYSVSRVNVSITLCVLGLIDTETAMKAVSGIVHMQAAPKEECALEIIKGGALRQEEVYYDSSLWTTLLIRNPCRKILEFLYSTSYNMDRFINK,dhi1,82
84
+ MASLSQLSSHLNYTCGAENSTGASQARPHAYYALSYCALILAIVFGNGLVCMAVLKERALQTTTNYLVVSLAVADLLVATLVMPWVVYLEVTGGVWNFSRICCDVFVTLDVMMCTASILNLCAISIDRYTAVVMPVHYQHGTGQSSCRRVALMITAVWVLAFAVSCPLLFGFNTTGDPTVCSISNPDFVIYSSVVSFYLPFGVTVLVYARIYVVLKQRRRKRILTRQNSQCNSVRPGFPQQTLSPDPAHLELKRYYSICQDTALGGPGFQERGGELKREEKTRNSLSPTIAPKLSLEVRKLSNGRLSTSLKLGPLQPRGVPLREKKATQMVAIVLGAFIVCWLPFFLTHVLNTHCQTCHVSPELYSATTWLGYVNSALNPVIYTTFNIEFRKAFLKILSC,drd3,83
85
+ MGAGVLVLGASEPGNLSSAAPLPDGAATAARLLVPASPPASLLPPASESPEPLSQQWTAGMGLLMALIVLLIVAGNVLVIVAIAKTPRLQTLTNLFIMSLASADLVMGLLVVPFGATIVVWGRWEYGSFFCELWTSVDVLCVTASIETLCVIALDRYLAITSPFRYQSLLTRARARGLVCTVWAISALVSFLPILMHWWRAESDEARRCYNDPKCCDFVTNRAYAIASSVVSFYVPLCIMAFVYLRVFREAQKQVKKIDSCERRFLGGPARPPSPSPSPVPAPAPPPGPPRPAAAAATAPLANGRAGKRRPSRLVALREQKALKTLGIIMGVFTLCWLPFFLANVVKAFHRELVPDRLFVFFNWLGYANSAFNPIIYCRSPDFRKAFQRLLCCARRAARRRHATHGDRPRASGCLARPGPPPSPGAASDDDDDDVVGATPPARLLEPWAGCNGGAAADSDSSLDEPCRPGFASESKV,adrb1,84
86
+ MQKIMHISVLLSPVLWGLIFGVSSNSIQIGGLFPRGADQEYSAFRVGMVQFSTSEFRLTPHIDNLEVANSFAVTNAFCSQFSRGVYAIFGFYDKKSVNTITSFCGTLHVSFITPSFPTDGTHPFVIQMRPDLKGALLSLIEYYQWDKFAYLYDSDRGLSTLQAVLDSAAEKKWQVTAINVGNINNDKKDEMYRSLFQDLELKKERRVILDCERDKVNDIVDQVITIGKHVKGYHYIIANLGFTDGDLLKIQFGGANVSGFQIVDYDDSLVSKFIERWSTLEEKEYPGAHTTTIKYTSALTYDAVQVMTEAFRNLRKQRIEISRRGNAGDCLANPAVPWGQGVEIERALKQVQVEGLSGNIKFDQNGKRINYTINIMELKTNGPRKIGYWSEVDKMVVTLTELPSGNDTSGLENKTVVVTTILESPYVMMKKNHEMLEGNERYEGYCVDLAAEIAKHCGFKYKLTIVGDGKYGARDADTKIWNGMVGELVYGKADIAIAPLTITLVREEVIDFSKPFMSLGISIMIKKPQKSKPGVFSFLDPLAYEIWMCIVFAYIGVSVVLFLVSRFSPYEWHTEEFEDGRETQSSESTNEFGIFNSLWFSLGAFMQQGCDISPRSLSGRIVGGVWWFFTLIIISSYTANLAAFLTVERMVSPIESAEDLSKQTEIAYGTLDSGSTKEFFRRSKIAVFDKMWTYMRSAEPSVFVRTTAEGVARVRKSKGKYAYLLESTMNEYIEQRKPCDTMKVGGNLDSKGYGIATPKGSSLRNAVNLAVLKLNEQGLLDKLKNKWWYDKGECGSGGGDSKEKTSALSLSNVAGVFYILVGGLGLAMLVALIEFCYKSRAEAKRMKVAKNAQNINPSSSQNSQNFATYKEGYNVYGIESVKI,gria2,85
87
+ MLNLLLLALPVLASRAYAAPAPGQALQRVGIVGGQEAPRSKWPWQVSLRVHGPYWMHFCGGSLIHPQWVLTAAHCVGPDVKDLAALRVQLREQHLYYQDQLLPVSRIIVHPQFYTAQIGADIALLELEEPVNVSSHVHTVTLPPASETFPPGMPCWVTGWGDVDNDERLPPPFPLKQVKVPIMENHICDAKYHLGAYTGDDVRIVRDDMLCAGNTRRDSCQGDSGGPLVCKVNGTWLQAGVVSWGEGCAQPNRPGIYTRVTYYLDWIHHYVPKKP,tryb1,86
88
+ MAARVLIIGSGGREHTLAWKLAQSHHVKQVLVAPGNAGTACSEKISNTAISISDHTALAQFCKEKKIEFVVVGPEAPLAAGIVGNLRSAGVQCFGPTAEAAQLESSKRFAKEFMDRHGIPTAQWKAFTKPEEACSFILSADFPALVVKASGLAAGKGVIVAKSKEEACKAVQEIMQEKAFGAAGETIVIEELLDGEEVSCLCFTDGKTVAPMPPAQDHKRLLEGDGGPNTGGMGAYCPAPQVSNDLLLKIKDTVLQRTVDGMQQEGTPYTGILYAGIMLTKNGPKVLEFNCRFGDPECQVILPLLKSDLYEVIQSTLDGLLCTSLPVWLENHTALTVVMASKGYPGDYTKGVEITGFPEAQALGLEVFHAGTALKNGKVVTHGGRVLAVTAIRENLISALEEAKKGLAAIKFEGAIYRKDVGFRAIAFLQQPRSLTYKESGVDIAAGNMLVKKIQPLAKATSRSGCKVDLGGFAGLFDLKAAGFKDPLLASGTDGVGTKLKIAQLCNKHDTIGQDLVAMCVNDILAQGAEPLFFLDYFSCGKLDLSVTEAVVAGIAKACGKAGCALLGGETAEMPDMYPPGEYDLAGFAVGAMERDQKLPHLERITEGDVVVGIASSGLHSNGFSLVRKIVAKSSLQYSSPAPDGCGDQTLGDLLLTPTRIYSHSLLPVLRSGHVKAFAHITGGGLLENIPRVLPEKLGVDLDAQTWRIPRVFSWLQQEGHLSEEEMARTFNCGVGAVLVVSKEQTEQILRDIQQHKEEAWVIGSVVARAEGSPRVKVKNLIESMQINGSVLKNGSLTNHFSFEKKKARVAVLISGTGSNLQALIDSTREPNSSAQIDIVISNKAAVAGLDKAERAGIPTRVINHKLYKNRVEFDSAIDLVLEEFSIDIVCLAGFMRILSGPFVQKWNGKMLNIHPSLLPSFKGSNAHEQALETGVTVTGCTVHFVAEDVDAGQIILQEAVPVKRGDTVATLSERVKLAEHKIFPAALQLVASGTVQLGENGKICWVKEE,pur2,87
89
+ MGPGVLLLLLVATAWHGQGIPVIEPSVPELVVKPGATVTLRCVGNGSVEWDGPPSPHWTLYSDGSSSILSTNNATFQNTGTYRCTEPGDPLGGSAAIHLYVKDPARPWNVLAQEVVVFEDQDALLPCLLTDPVLEAGVSLVRVRGRPLMRHTNYSFSPWHGFTIHRAKFIQSQDYQCSALMGGRKVMSISIRLKVQKVIPGPPALTLVPAELVRIRGEAAQIVCSASSVDVNFDVFLQHNNTKLAIPQQSDFHNNRYQKVLTLNLDQVDFQHAGNYSCVASNVQGKHSTSMFFRVVESAYLNLSSEQNLIQEVTVGEGLNLKVMVEAYPGLQGFNWTYLGPFSDHQPEPKLANATTKDTYRHTFTLSLPRLKPSEAGRYSFLARNPGGWRALTFELTLRYPPEVSVIWTFINGSGTLLCAASGYPQPNVTWLQCSGHTDRCDEAQVLQVWDDPYPEVLSQEPFHKVTVQSLLTVETLEHNQTYECRAHNSVGSGSWAFIPISAGAHTHPPDEFLFTPVVVACMSIMALLLLLLLLLLYKYKQKPKYQVRWKIIESYEGNSYTFIDPTQLPYNEKWEFPRNNLQFGKTLGAGAFGKVVEATAFGLGKEDAVLKVAVKMLKSTAHADEKEALMSELKIMSHLGQHENIVNLLGACTHGGPVLVITEYCCYGDLLNFLRRKAEAMLGPSLSPGQDPEGGVDYKNIHLEKKYVRRDSGFSSQGVDTYVEMRPVSTSSNDSFSEQDLDKEDGRPLELRDLLHFSSQVAQGMAFLASKNCIHRDVAARNVLLTNGHVAKIGDFGLARDIMNDSNYIVKGNARLPVKWMAPESIFDCVYTVQSDVWSYGILLWEIFSLGLNPYPGILVNSKFYKLVKDGYQMAQPAFAPKNIYSIMQACWALEPTHRPTFQQICSFLQEQAQEDRRERDYTNLPSSSRSGGSGSSSSELEEESSSEHLTCCEQGDIAQPLLQPNNYQFC,csf1r,88
90
+ MASQPNSSAKKKEEKGKNIQVVVRCRPFNLAERKASAHSIVECDPVRKEVSVRTGGLADKSSRKTYTFDMVFGASTKQIDVYRSVVCPILDEVIMGYNCTIFAYGQTGTGKTFTMEGERSPNEEYTWEEDPLAGIIPRTLHQIFEKLTDNGTEFSVKVSLLEIYNEELFDLLNPSSDVSERLQMFDDPRNKRGVIIKGLEEITVHNKDEVYQILEKGAAKRTTAATLMNAYSSRSHSVFSVTIHMKETTIDGEELVKIGKLNLVDLAGSENIGRSGAVDKRAREAGNINQSLLTLGRVITALVERTPHVPYRESKLTRILQDSLGGRTRTSIIATISPASLNLEETLSTLEYAHRAKNILNKPEVNQKLTKKALIKEYTEEIERLKRDLAAAREKNGVYISEENFRVMSGKLTVQEEQIVELIEKIGAVEEELNRVTELFMDNKNELDQCKSDLQNKTQELETTQKHLQETKLQLVKEEYITSALESTEEKLHDAASKLLNTVEETTKDVSGLHSKLDRKKAVDQHNAEAQDIFGKNLNSLFNNMEELIKDGSSKQKAMLEVHKTLFGNLLSSSVSALDTITTVALGSLTSIPENVSTHVSQIFNMILKEQSLAAESKTVLQELINVLKTDLLSSLEMILSPTVVSILKINSQLKHIFKTSLTVADKIEDQKKELDGFLSILCNNLHELQENTICSLVESQKQCGNLTEDLKTIKQTHSQELCKLMNLWTERFCALEEKCENIQKPLSSVQENIQQKSKDIVNKMTFHSQKFCADSDGFSQELRNFNQEGTKLVEESVKHSDKLNGNLEKISQETEQRCESLNTRTVYFSEQWVSSLNEREQELHNLLEVVSQCCEASSSDITEKSDGRKAAHEKQHNIFLDQMTIDEDKLIAQNLELNETIKIGLTKLNCFLEQDLKLDIPTGTTPQRKSYLYPSTLVRTEPREHLLDQLKRKQPELLMMLNCSENNKEETIPDVDVEEAVLGQYTEEPLSQEPSVDAGVDCSSIGGVPFFQHKKSHGKDKENRGINTLERSKVEETTEHLVTKSRLPLRAQINL,kif11,89
91
+ MGQPGNGSAFLLAPNGSHAPDHDVTQERDEVWVVGMGIVMSLIVLAIVFGNVLVITAIAKFERLQTVTNYFITSLACADLVMGLAVVPFGAAHILMKMWTFGNFWCEFWTSIDVLCVTASIETLCVIAVDRYFAITSPFKYQSLLTKNKARVIILMVWIVSGLTSFLPIQMHWYRATHQEAINCYANETCCDFFTNQAYAIASSIVSFYVPLVIMVFVYSRVFQEAKRQLQKIDKSEGRFHVQNLSQVEQDGRTGHGLRRSSKFCLKEHKALKTLGIIMGTFTLCWLPFFIVNIVHVIQDNLIRKEVYILLNWIGYVNSGFNPLIYCRSPDFRIAFQELLCLRRSSLKAYGNGYSSNGNTGEQSGYHVEQEKENKLLCEDLPGTEDFVGHQGTVPSDNIDSQGRNCSTNDSLL,adrb2,90
92
+ MAESSDKLYRVEYAKSGRASCKKCSESIPKDSLRMAIMVQSPMFDGKVPHWYHFSCFWKVGHSIRHPDVEVDGFSELRWDDQQKVKKTAEAGGVTGKGQDGIGSKAEKTLGDFAAEYAKSNRSTCKGCMEKIEKGQVRLSKKMVDPEKPQLGMIDRWYHPGCFVKNREELGFRPEYSASQLKGFSLLATEDKEALKKQLPGVKSEGKRKGDEVDGVDEVAKKKSKKEKDKDSKLEKALKAQNDLIWNIKDELKKVCSTNDLKELLIFNKQQVPSGESAILDRVADGMVFGALLPCEECSGQLVFKSDAYYCTGDVTAWTKCMVKTQTPNRKEWVTPKEFREISYLKKLKVKKQDRIFPPETSASVAATPPPSTASAPAAVNSSASADKPLSNMKILTLGKLSRNKDEVKAMIEKLGGKLTGTANKASLCISTKKEVEKMNKKMEEVKEANIRVVSEDFLQDVSASTKSLQELFLAHILSPWGAEVKAEPVEVVAPRGKSGAALSKKSKGQVKEEGINKSEKRMKLTLKGGAAVDPDSGLEHSAHVLEKGGKVFSATLGLVDIVKGTNSYYKLQLLEDDKENRYWIFRSWGRVGTVIGSNKLEQMPSKEDAIEHFMKLYEEKTGNAWHSKNFTKYPKKFYPLEIDYGQDEEAVKKLTVNPGTKSKLPKPVQDLIKMIFDVESMKKAMVEYEIDLQKMPLGKLSKRQIQAAYSILSEVQQAVSQGSSDSQILDLSNRFYTLIPHDFGMKKPPLLNNADSVQAKVEMLDNLLDIEVAYSLLRGGSDDSSKDPIDVNYEKLKTDIKVVDRDSEEAEIIRKYVKNTHATTHNAYDLEVIDIFKIEREGECQRYKPFKQLHNRRLLWHGSRTTNFAGILSQGLRIAPPEAPVTGYMFGKGIYFADMVSKSANYCHTSQGDPIGLILLGEVALGNMYELKHASHISKLPKGKHSVKGLGKTTPDPSANISLDGVDVPLGTGISSGVNDTSLLYNEYIVYDIAQVNLKYLLKLKFNFKTSLW,parp1,91
93
+ MSRPLSDQEKRKQISVRGLAGVENVTELKKNFNRHLHFTLVKDRNVATPRDYYFALAHTVRDHLVGRWIRTQQHYYEKDPKRIYYLSLEFYMGRTLQNTMVNLALENACDEATYQLGLDMEELEEIEEDAGLGNGGLGRLAACFLDSMATLGLAAYGYGIRYEFGIFNQKISGGWQMEEADDWLRYGNPWEKARPEFTLPVHFYGHVEHTSQGAKWVDTQVVLAMPYDTPVPGYRNNVVNTMRLWSAKAPNDFNLKDFNVGGYIQAVLDRNLAENISRVLYPNDNFFEGKELRLKQEYFVVAATLQDIIRRFKSSKFGCRDPVRTNFDAFPDKVAIQLNDTHPSLAIPELMRILVDLERMDWDKAWDVTVRTCAYTNHTVLPEALERWPVHLLETLLPRHLQIIYEINQRFLNRVAAAFPGDVDRLRRMSLVEEGAVKRINMAHLCIAGSHAVNGVARIHSEILKKTIFKDFYELEPHKFQNKTNGITPRRWLVLCNPGLAEVIAERIGEDFISDLDQLRKLLSFVDDEAFIRDVAKVKQENKLKFAAYLEREYKVHINPNSLFDIQVKRIHEYKRQLLNCLHVITLYNRIKREPNKFFVPRTVMIGGKAAPGYHMAKMIIRLVTAIGDVVNHDPAVGDRLRVIFLENYRVSLAEKVIPAADLSEQISTAGTEASGTGNMKFMLNGALTIGTMDGANVEMAEEAGEENFFIFGMRVEDVDKLDQRGYNAQEYYDRIPELRQVIEQLSSGFFSPKQPDLFKDIVNMLMHHDRFKVFADYEDYIKCQEKVSALYKNPREWTRMVIRNIATSGKFSSDRTIAQYAREIWGVEPSRQRLPAPDEAI,pygm,92
94
+ MTMTLHTKASGMALLHQIQGNELEPLNRPQLKIPLERPLGEVYLDSSKPAVYNYPEGAAYEFNAAAAANAQVYGQTGLPYGPGSEAAAFGSNGLGGFPPLNSVSPSPLMLLHPPPQLSPFLQPHGQQVPYYLENEPSGYTVREAGPPAFYRPNSDNRRQGGRERLASTNDKGSMAMESAKETRYCAVCNDYASGYHYGVWSCEGCKAFFKRSIQGHNDYMCPATNQCTIDKNRRKSCQACRLRKCYEVGMMKGGIRKDRRGGRMLKHKRQRDDGEGRGEVGSAGDMRAANLWPSPLMIKRSKKNSLALSLTADQMVSALLDAEPPILYSEYDPTRPFSEASMMGLLTNLADRELVHMINWAKRVPGFVDLTLHDQVHLLECAWLEILMIGLVWRSMEHPGKLLFAPNLLLDRNQGKCVEGMVEIFDMLLATSSRFRMMNLQGEEFVCLKSIILLNSGVYTFLSSTLKSLEEKDHIHRVLDKITDTLIHLMAKAGLTLQQQHQRLAQLLLILSHIRHMSNKGMEHLYSMKCKNVVPLYDLLLEMLDAHRLHAPTSRGGASVEETDQSHLATAGSTSSHSLQKYYITGEAEGFPATV,esr1,93
95
+ MPEETQTQDQPMEEEEVETFAFQAEIAQLMSLIINTFYSNKEIFLRELISNSSDALDKIRYESLTDPSKLDSGKELHINLIPNKQDRTLTIVDTGIGMTKADLINNLGTIAKSGTKAFMEALQAGADISMIGQFGVGFYSAYLVAEKVTVITKHNDDEQYAWESSAGGSFTVRTDTGEPMGRGTKVILHLKEDQTEYLEERRIKEIVKKHSQFIGYPITLFVEKERDKEVSDDEAEEKEDKEEEKEKEEKESEDKPEIEDVGSDEEEEKKDGDKKKKKKIKEKYIDQEELNKTKPIWTRNPDDITNEEYGEFYKSLTNDWEDHLAVKHFSVEGQLEFRALLFVPRRAPFDLFENRKKKNNIKLYVRRVFIMDNCEELIPEYLNFIRGVVDSEDLPLNISREMLQQSKILKVIRKNLVKKCLELFTELAEDKENYKKFYEQFSKNIKLGIHEDSQNRKKLSELLRYYTSASGDEMVSLKDYCTRMKENQKHIYYITGETKDQVANSAFVERLRKHGLEVIYMIEPIDEYCVQQLKEFEGKTLVSVTKEGLELPEDEEEKKKQEEKKTKFENLCKIMKDILEKKVEKVVVSNRLVTSPCCIVTSTYGWTANMERIMKAQALRDNSTMGYMAAKKHLEINPDHSIIETLRQKAEADKNDKSVKDLVILLYETALLSSGFSLEDPQTHANRIYRMIKLGLGIDEDDPTADDTSAAVTEEMPPLEGDDDTSRMEEVD,hs90a,94
96
+ MGAASGRRGPGLLLPLPLLLLLPPQPALALDPGLQPGNFSADEAGAQLFAQSYNSSAEQVLFQSVAASWAHDTNITAENARRQEEAALLSQEFAEAWGQKAKELYEPIWQNFTDPQLRRIIGAVRTLGSANLPLAKRQQYNALLSNMSRIYSTAKVCLPNKTATCWSLDPDLTNILASSRSYAMLLFAWEGWHNAAGIPLKPLYEDFTALSNEAYKQDGFTDTGAYWRSWYNSPTFEDDLEHLYQQLEPLYLNLHAFVRRALHRRYGDRYINLRGPIPAHLLGDMWAQSWENIYDMVVPFPDKPNLDVTSTMLQQGWNATHMFRVAEEFFTSLELSPMPPEFWEGSMLEKPADGREVVCHASAWDFYNRKDFRIKQCTRVTMDQLSTVHHEMGHIQYYLQYKDLPVSLRRGANPGFHEAIGDVLALSVSTPEHLHKIGLLDRVTNDTESDINYLLKMALEKIAFLPFGYLVDQWRWGVFSGRTPPSRYNFDWWYLRTKYQGICPPVTRNETHFDAGAKFHVPNVTPYIRYFVSFVLQFQFHEALCKEAGYEGPLHQCDIYRSTKAGAKLRKVLQAGSSRPWQEVLKDMVGLDALDAQPLLKYFQPVTQWLQEQNQQNGEVLGWPEYQWHPPLPDNYPEGIDLVTDEAEASKFVEEYDRTSQVVWNEYAEANWNYNTNITTETSKILLQKNMQIANHTLKYGTQARKFDVNQLQNTTIKRIIKKVQDLERAALPAQELEEYNKILLDMETTYSVATVCHPNGSCLQLEPDLTNVMATSRKYEDLLWAWEGWRDKAGRAILQFYPKYVELINQAARLNGYVDAGDSWRSMYETPSLEQDLERLFQELQPLYLNLHAYVRRALHRHYGAQHINLEGPIPAHLLGNMWAQTWSNIYDLVVPFPSAPSMDTTEAMLKQGWTPRRMFKEADDFFTSLGLLPVPPEFWNKSMLEKPTDGREVVCHASAWDFYNGKDFRIKQCTTVNLEDLVVAHHEMGHIQYFMQYKDLPVALREGANPGFHEAIGDVLALSVSTPKHLHSLNLLSSEGGSDEHDINFLMKMALDKIAFIPFSYLVDQWRWRVFDGSITKENYNQEWWSLRLKYQGLCPPVPRTQGDFDPGAKFHIPSSVPYIRYFVSFIIQFQFHEALCQAAGHTGPLHKCDIYQSKEAGQRLATAMKLGFSRPWPEAMQLITGQPNMSASAMLSYFKPLLDWLRTENELHGEKLGWPQYNWTPNSARSEGPLPDSGRVSFLGLDLDAQQARVGQWLLLFLGIALLVATLGLSQRLFSIRHRSLHRHSHGPQFGSEVELRHS,ace,95
97
+ MRPPQCLLHTPSLASPLLLLLLWLLGGGVGAEGREDAELLVTVRGGRLRGIRLKTPGGPVSAFLGIPFAEPPMGPRRFLPPEPKQPWSGVVDATTFQSVCYQYVDTLYPGFEGTEMWNPNRELSEDCLYLNVWTPYPRPTSPTPVLVWIYGGGFYSGASSLDVYDGRFLVQAERTVLVSMNYRVGAFGFLALPGSREAPGNVGLLDQRLALQWVQENVAAFGGDPTSVTLFGESAGAASVGMHLLSPPSRGLFHRAVLQSGAPNGPWATVGMGEARRRATQLAHLVGCPPGGTGGNDTELVACLRTRPAQVLVNHEWHVLPQESVFRFSFVPVVDGDFLSDTPEALINAGDFHGLQVLVGVVKDEGSYFLVYGAPGFSKDNESLISRAEFLAGVRVGVPQVSDLAAEAVVLHYTDWLHPEDPARLREALSDVVGDHNVVCPVAQLAGRLAAQGARVYAYVFEHRASTLSWPLWMGVPHGYEIEFIFGIPLDPSRNYTAEEKIFAQRLMRYWANFARTGDPNEPRDPKAPQWPPYTAGAQQYVSLDLRPLEVRRGLRAQACAFWNRFLPKLLSATDTLDEAERQWKAEFHRWSSYMVHWKNQFDHYSKQDRCSDL,aces,96
98
+ MEQPQEEAPEVREEEEKEEVAEAEGAPELNGGPQHALPSSSYTDLSRSSSPPSLLDQLQMGCDGASCGSLNMECRVCGDKASGFHYGVHACEGCKGFFRRTIRMKLEYEKCERSCKIQKKNRNKCQYCRFQKCLALGMSHNAIRFGRMPEAEKRKLVAGLTANEGSQYNPQVADLKAFSKHIYNAYLKNFNMTKKKARSILTGKASHTAPFVIHDIETLWQAEKGLVWKQLVNGLPPYKEISVHVFYRCQCTTVETVRELTEFAKSIPSFSSLFLNDQVTLLKYGVHEAIFAMLASIVNKDGLLVANGSGFVTREFLRSLRKPFSDIIEPKFEFAVKFNALELDDSDLALFIAAIILCGDRPGLMNVPRVEAIQDTILRALEFHLQANHPDAQYLFPKLLQKMADLRQLVTEHAQMMQRIKKTETETSLHPLLQEIYKDMY,ppard,97
99
+ MENGYTYEDYKNTAEWLLSHTKHRPQVAIICGSGLGGLTDKLTQAQIFDYGEIPNFPRSTVPGHAGRLVFGFLNGRACVMMQGRFHMYEGYPLWKVTFPVRVFHLLGVDTLVVTNAAGGLNPKFEVGDIMLIRDHINLPGFSGQNPLRGPNDERFGDRFPAMSDAYDRTMRQRALSTWKQMGEQRELQEGTYVMVAGPSFETVAECRVLQKLGADAVGMSTVPEVIVARHCGLRVFGFSLITNKVIMDYESLEKANHEEVLAAGKQAAQKLEQFVSILMASIPLPDKAS,pnph,98
100
+ MLEICLKLVGCKSKKGLSSSSSCYLEEALQRPVASDFEPQGLSEAARWNSKENLLAGPSENDPNLFVALYDFVASGDNTLSITKGEKLRVLGYNHNGEWCEAQTKNGQGWVPSNYITPVNSLEKHSWYHGPVSRNAAEYLLSSGINGSFLVRESESSPGQRSISLRYEGRVYHYRINTASDGKLYVSSESRFNTLAELVHHHSTVADGLITTLHYPAPKRNKPTVYGVSPNYDKWEMERTDITMKHKLGGGQYGEVYEGVWKKYSLTVAVKTLKEDTMEVEEFLKEAAVMKEIKHPNLVQLLGVCTREPPFYIITEFMTYGNLLDYLRECNRQEVNAVVLLYMATQISSAMEYLEKKNFIHRDLAARNCLVGENHLVKVADFGLSRLMTGDTYTAHAGAKFPIKWTAPESLAYNKFSIKSDVWAFGVLLWEIATYGMSPYPGIDLSQVYELLEKDYRMERPEGCPEKVYELMRACWQWNPSDRPSFAEIHQAFETMFQESSISDEVEKELGKQGVRGAVSTLLQAPELPTKTRTSRRAAEHRDTTDVPEMPHSKGQGESDPLDHEPAVSPLLPRKERGPPEGGLNEDERLLPKDKKTNLFSALIKKKKKTAPTPPKRSSSFREMDGQPERRGAGEEEGRDISNGALAFTPLDTADPAKSPKPSNGAGVPNGALRESGGSGFRSPHLWKKSSTLTSSRLATGEEEGGGSSSKRFLRSCSASCVPHGAKDTEWRSVTLPRDLQSTGRQFDSSTFGGHKSEKPALPRKRAGENRSDQVTRGTVTPPPRLVKKNEEAADEVFKDIMESSPGSSPPNLTPKPLRRQVTVAPASGLPHKEEAGKGSALGTPAAAEPVTPTSKAGSGAPGGTSKGPAEESRVRRHKHSSESPGRDKGKLSRLKPAPPPPPAASAGKAGGKPSQSPSQEAAGEAVLGAKTKATSLVDAVNSDAAKPSQPGEGLKKPVLPATPKPQSAKPSGTPISPAPVPSTLPSASSALAGDQPSSTAFIPLISTRVSLRKTRQPPERIASGAITKGVVLDSTEALCLAISRNSEQMASHSAVLEAGKNLYTFCVSYVDSIQQMRNKFAFREAINKLENNLRELQICPATAGSGPAATQDFSKLLSSVKEISDIVQR,abl1,99
101
+ MDSLVVLVLCLSCLLLLSLWRQSSGRGKLPPGPTPLPVIGNILQIGIKDISKSLTNLSKVYGPVFTLYFGLKPIVVLHGYEAVKEALIDLGEEFSGRGIFPLAERANRGFGIVFSNGKKWKEIRRFSLMTLRNFGMGKRSIEDRVQEEARCLVEELRKTKASPCDPTFILGCAPCNVICSIIFHKRFDYKDQQFLNLMEKLNENIKILSSPWIQICNNFSPIIDYFPGTHNKLLKNVAFMKSYILEKVKEHQESMDMNNPQDFIDCFLMKMEKEKHNQPSEFTIESLENTAVDLFGAGTETTSTTLRYALLLLLKHPEVTAKVQEEIERVIGRNRSPCMQDRSHMPYTDAVVHEVQRYIDLLPTSLPHAVTCDIKFRNYLIPKGTTILISLTSVLHDNKEFPNPEMFDPHHFLDEGGNFKKSKYFMPFSAGKRICVGEALAGMELFLFLTSILQNFNLKSLVDPKNLDTTPVVNGFASVPPFYQLCFIPV,cp2c9,100
102
+ METKGYHSLPEGLDMERRWGQVSQAVERSSLGPTERTDENNYMEIVNVSCVSGAIPNNSTQGSSKEKQELLPCLQQDNNRPGILTSDIKTELESKELSATVAESMGLYMDSVRDADYSYEQQNQQGSMSPAKIYQNVEQLVKFYKGNGHRPSTLSCVNTPLRSFMSDSGSSVNGGVMRAVVKSPIMCHEKSPSVCSPLNMTSSVCSPAGINSVSSTTASFGSFPVHSPITQGTPLTCSPNVENRGSRSHSPAHASNVGSPLSSPLSSMKSSISSPPSHCSVKSPVSSPNNVTLRSSVSSPANINNSRCSVSSPSNTNNRSTLSSPAASTVGSICSPVNNAFSYTASGTSAGSSTLRDVVPSPDTQEKGAQEVPFPKTEEVESAISNGVTGQLNIVQYIKPEPDGAFSSSCLGGNSKINSDSSFSVPIKQESTKHSCSGTSFKGNPTVNPFPFMDGSYFSFMDDKDYYSLSGILGPPVPGFDGNCEGSGFPVGIKQEPDDGSYYPEASIPSSAIVGVNSGGQSFHYRIGAQGTISLSRSARDQSFQHLSSFPPVNTLVESWKSHGDLSSRRSDGYPVLEYIPENVSSSTLRSVSTGSSRPSKICLVCGDEASGCHYGVVTCGSCKVFFKRAVEGQHNYLCAGRNDCIIDKIRRKNCPACRLQKCLQAGMNLGARKSKKLGKLKGIHEEQPQQQQPPPPPPPPQSPEEGTTYIAPAKEPSVNTALVPQLSTISRALTPSPVMVLENIEPEIVYAGYDSSKPDTAENLLSTLNRLAGKQMIQVVKWAKVLPGFKNLPLEDQITLIQYSWMCLSSFALSWRSYKHTNSQFLYFAPDLVFNEEKMHQSAMYELCQGMHQISLQFVRLQLTFEEYTIMKVLLLLSTIPKDGLKSQAAFEEMRTNYIKELRKMVTKCPNNSGQSWQRFYQLTKLLDSMHDLVSDLLEFCFYTFRESHALKVEFPAMLVEIISDQLPKVESGNAKPLYFHRK,mcr,101
data/datasets/AD/saliency.csv ADDED
@@ -0,0 +1 @@
 
 
1
+ CCCCCCC(C(C)O)n1cncn1,ada,1
data/datasets/DUDE/Smiles_1.csv ADDED
The diff for this file is too large to render. See raw diff
 
data/datasets/DUDE/Smiles_2.csv ADDED
The diff for this file is too large to render. See raw diff
 
data/datasets/DUDE/Smiles_3.csv ADDED
The diff for this file is too large to render. See raw diff
 
data/datasets/DUDE/Smiles_4.csv ADDED
The diff for this file is too large to render. See raw diff
 
data/datasets/DUDE/Smiles_Test.csv ADDED
The diff for this file is too large to render. See raw diff
 
data/features.py ADDED
@@ -0,0 +1,136 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # allowable multiple choice node and edge features
2
+ allowable_features = {
3
+ 'possible_atomic_num_list' : list(range(1, 119)) + ['misc'],
4
+ 'possible_chirality_list' : [
5
+ 'CHI_UNSPECIFIED',
6
+ 'CHI_TETRAHEDRAL_CW',
7
+ 'CHI_TETRAHEDRAL_CCW',
8
+ 'CHI_OTHER'
9
+ ],
10
+ 'possible_degree_list' : [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 'misc'],
11
+ 'possible_formal_charge_list' : [-5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 'misc'],
12
+ 'possible_numH_list' : [0, 1, 2, 3, 4, 5, 6, 7, 8, 'misc'],
13
+ 'possible_number_radical_e_list': [0, 1, 2, 3, 4, 'misc'],
14
+ 'possible_hybridization_list' : [
15
+ 'SP', 'SP2', 'SP3', 'SP3D', 'SP3D2', 'misc'
16
+ ],
17
+ 'possible_is_aromatic_list': [False, True],
18
+ 'possible_is_in_ring_list': [False, True],
19
+ 'possible_bond_type_list' : [
20
+ 'SINGLE',
21
+ 'DOUBLE',
22
+ 'TRIPLE',
23
+ 'AROMATIC',
24
+ 'misc'
25
+ ],
26
+ 'possible_bond_stereo_list': [
27
+ 'STEREONONE',
28
+ 'STEREOZ',
29
+ 'STEREOE',
30
+ 'STEREOCIS',
31
+ 'STEREOTRANS',
32
+ 'STEREOANY',
33
+ ],
34
+ 'possible_is_conjugated_list': [False, True],
35
+ 'posible_explicit_valence': [1, 2, 3, 4, 5, 6, 7, 'misc'],
36
+ 'posible_implicit_valence': [1, 2, 3, 4, 5, 6, 7, 'misc']
37
+ }
38
+
39
+ def safe_index(l, e):
40
+ """
41
+ Return index of element e in list l. If e is not present, return the last index
42
+ """
43
+ try:
44
+ return l.index(e)
45
+ except:
46
+ return len(l) - 1
47
+
48
+ def atom_to_feature_vector(atom):
49
+ """
50
+ Converts rdkit atom object to feature list of indices
51
+ :param mol: rdkit atom object
52
+ :return: list
53
+ """
54
+ atom_feature = [
55
+ safe_index(allowable_features['possible_atomic_num_list'], atom.GetAtomicNum()),
56
+ allowable_features['possible_chirality_list'].index(str(atom.GetChiralTag())),
57
+ safe_index(allowable_features['possible_degree_list'], atom.GetTotalDegree()),
58
+ safe_index(allowable_features['possible_formal_charge_list'], atom.GetFormalCharge()),
59
+ safe_index(allowable_features['possible_numH_list'], atom.GetTotalNumHs()),
60
+ safe_index(allowable_features['possible_number_radical_e_list'], atom.GetNumRadicalElectrons()),
61
+ safe_index(allowable_features['possible_hybridization_list'], str(atom.GetHybridization())),
62
+ allowable_features['possible_is_aromatic_list'].index(atom.GetIsAromatic()),
63
+ allowable_features['possible_is_in_ring_list'].index(atom.IsInRing()),
64
+ ]
65
+ return atom_feature
66
+
67
+ def get_atom_feature_dims():
68
+ return list(map(len, [
69
+ allowable_features['possible_atomic_num_list'],
70
+ allowable_features['possible_chirality_list'],
71
+ allowable_features['possible_degree_list'],
72
+ allowable_features['possible_formal_charge_list'],
73
+ allowable_features['possible_numH_list'],
74
+ allowable_features['possible_number_radical_e_list'],
75
+ allowable_features['possible_hybridization_list'],
76
+ allowable_features['possible_is_aromatic_list'],
77
+ allowable_features['possible_is_in_ring_list'],
78
+ ]))
79
+
80
+ def bond_to_feature_vector(bond):
81
+ """
82
+ Converts rdkit bond object to feature list of indices
83
+ :param mol: rdkit bond object
84
+ :return: list
85
+ """
86
+ bond_feature = [
87
+ safe_index(allowable_features['possible_bond_type_list'], str(bond.GetBondType())),
88
+ allowable_features['possible_bond_stereo_list'].index(str(bond.GetStereo())),
89
+ allowable_features['possible_is_conjugated_list'].index(bond.GetIsConjugated()),
90
+ ]
91
+ return bond_feature
92
+
93
+ def get_bond_feature_dims():
94
+ return list(map(len, [
95
+ allowable_features['possible_bond_type_list'],
96
+ allowable_features['possible_bond_stereo_list'],
97
+ allowable_features['possible_is_conjugated_list']
98
+ ]))
99
+
100
+ def atom_feature_vector_to_dict(atom_feature):
101
+ [atomic_num_idx,
102
+ chirality_idx,
103
+ degree_idx,
104
+ formal_charge_idx,
105
+ num_h_idx,
106
+ number_radical_e_idx,
107
+ hybridization_idx,
108
+ is_aromatic_idx,
109
+ is_in_ring_idx] = atom_feature
110
+
111
+ feature_dict = {
112
+ 'atomic_num': allowable_features['possible_atomic_num_list'][atomic_num_idx],
113
+ 'chirality': allowable_features['possible_chirality_list'][chirality_idx],
114
+ 'degree': allowable_features['possible_degree_list'][degree_idx],
115
+ 'formal_charge': allowable_features['possible_formal_charge_list'][formal_charge_idx],
116
+ 'num_h': allowable_features['possible_numH_list'][num_h_idx],
117
+ 'num_rad_e': allowable_features['possible_number_radical_e_list'][number_radical_e_idx],
118
+ 'hybridization': allowable_features['possible_hybridization_list'][hybridization_idx],
119
+ 'is_aromatic': allowable_features['possible_is_aromatic_list'][is_aromatic_idx],
120
+ 'is_in_ring': allowable_features['possible_is_in_ring_list'][is_in_ring_idx]
121
+ }
122
+
123
+ return feature_dict
124
+
125
+ def bond_feature_vector_to_dict(bond_feature):
126
+ [bond_type_idx,
127
+ bond_stereo_idx,
128
+ is_conjugated_idx] = bond_feature
129
+
130
+ feature_dict = {
131
+ 'bond_type': allowable_features['possible_bond_type_list'][bond_type_idx],
132
+ 'bond_stereo': allowable_features['possible_bond_stereo_list'][bond_stereo_idx],
133
+ 'is_conjugated': allowable_features['possible_is_conjugated_list'][is_conjugated_idx]
134
+ }
135
+
136
+ return feature_dict
example/input_smiles.csv ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ smiles
2
+ Cn4c(CCC(=O)Nc3ccc2ccn(CC[C@H](CO)n1cnc(C(N)=O)c1)c2c3)nc5ccccc45
3
+ OCCCCCn1cnc2C(O)CN=CNc12
4
+ Nc4nc(c1ccco1)c3ncn(C(=O)NCCc2ccccc2)c3n4
gcn_lib/__init__.py ADDED
File without changes
gcn_lib/dense/__init__.py ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ from .torch_nn import *
2
+ from .torch_edge import *
3
+ from .torch_vertex import *
4
+
gcn_lib/dense/torch_edge.py ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from torch import nn
3
+ from torch_cluster import knn_graph
4
+
5
+
6
+ class DenseDilated(nn.Module):
7
+ """
8
+ Find dilated neighbor from neighbor list
9
+
10
+ edge_index: (2, batch_size, num_points, k)
11
+ """
12
+ def __init__(self, k=9, dilation=1, stochastic=False, epsilon=0.0):
13
+ super(DenseDilated, self).__init__()
14
+ self.dilation = dilation
15
+ self.stochastic = stochastic
16
+ self.epsilon = epsilon
17
+ self.k = k
18
+
19
+ def forward(self, edge_index):
20
+ if self.stochastic:
21
+ if torch.rand(1) < self.epsilon and self.training:
22
+ num = self.k * self.dilation
23
+ randnum = torch.randperm(num)[:self.k]
24
+ edge_index = edge_index[:, :, :, randnum]
25
+ else:
26
+ edge_index = edge_index[:, :, :, ::self.dilation]
27
+ else:
28
+ edge_index = edge_index[:, :, :, ::self.dilation]
29
+ return edge_index
30
+
31
+
32
+ def pairwise_distance(x):
33
+ """
34
+ Compute pairwise distance of a point cloud.
35
+ Args:
36
+ x: tensor (batch_size, num_points, num_dims)
37
+ Returns:
38
+ pairwise distance: (batch_size, num_points, num_points)
39
+ """
40
+ x_inner = -2*torch.matmul(x, x.transpose(2, 1))
41
+ x_square = torch.sum(torch.mul(x, x), dim=-1, keepdim=True)
42
+ return x_square + x_inner + x_square.transpose(2, 1)
43
+
44
+
45
+ def dense_knn_matrix(x, k=16):
46
+ """Get KNN based on the pairwise distance.
47
+ Args:
48
+ x: (batch_size, num_dims, num_points, 1)
49
+ k: int
50
+ Returns:
51
+ nearest neighbors: (batch_size, num_points ,k) (batch_size, num_points, k)
52
+ """
53
+ with torch.no_grad():
54
+ x = x.transpose(2, 1).squeeze(-1)
55
+ batch_size, n_points, n_dims = x.shape
56
+ _, nn_idx = torch.topk(-pairwise_distance(x.detach()), k=k)
57
+ center_idx = torch.arange(0, n_points, device=x.device).repeat(batch_size, k, 1).transpose(2, 1)
58
+ return torch.stack((nn_idx, center_idx), dim=0)
59
+
60
+
61
+ class DenseDilatedKnnGraph(nn.Module):
62
+ """
63
+ Find the neighbors' indices based on dilated knn
64
+ """
65
+ def __init__(self, k=9, dilation=1, stochastic=False, epsilon=0.0):
66
+ super(DenseDilatedKnnGraph, self).__init__()
67
+ self.dilation = dilation
68
+ self.stochastic = stochastic
69
+ self.epsilon = epsilon
70
+ self.k = k
71
+ self._dilated = DenseDilated(k, dilation, stochastic, epsilon)
72
+ self.knn = dense_knn_matrix
73
+
74
+ def forward(self, x):
75
+ edge_index = self.knn(x, self.k * self.dilation)
76
+ return self._dilated(edge_index)
77
+
78
+
79
+ class DilatedKnnGraph(nn.Module):
80
+ """
81
+ Find the neighbors' indices based on dilated knn
82
+ """
83
+ def __init__(self, k=9, dilation=1, stochastic=False, epsilon=0.0):
84
+ super(DilatedKnnGraph, self).__init__()
85
+ self.dilation = dilation
86
+ self.stochastic = stochastic
87
+ self.epsilon = epsilon
88
+ self.k = k
89
+ self._dilated = DenseDilated(k, dilation, stochastic, epsilon)
90
+ self.knn = knn_graph
91
+
92
+ def forward(self, x):
93
+ x = x.squeeze(-1)
94
+ B, C, N = x.shape
95
+ edge_index = []
96
+ for i in range(B):
97
+ edgeindex = self.knn(x[i].contiguous().transpose(1, 0).contiguous(), self.k * self.dilation)
98
+ edgeindex = edgeindex.view(2, N, self.k * self.dilation)
99
+ edge_index.append(edgeindex)
100
+ edge_index = torch.stack(edge_index, dim=1)
101
+ return self._dilated(edge_index)
gcn_lib/dense/torch_nn.py ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from torch import nn
3
+ from torch.nn import Sequential as Seq, Linear as Lin, Conv2d
4
+
5
+
6
+ ##############################
7
+ # Basic layers
8
+ ##############################
9
+ def act_layer(act, inplace=False, neg_slope=0.2, n_prelu=1):
10
+ # activation layer
11
+
12
+ act = act.lower()
13
+ if act == 'relu':
14
+ layer = nn.ReLU(inplace)
15
+ elif act == 'leakyrelu':
16
+ layer = nn.LeakyReLU(neg_slope, inplace)
17
+ elif act == 'prelu':
18
+ layer = nn.PReLU(num_parameters=n_prelu, init=neg_slope)
19
+ else:
20
+ raise NotImplementedError('activation layer [%s] is not found' % act)
21
+ return layer
22
+
23
+
24
+ def norm_layer(norm, nc):
25
+ # normalization layer 2d
26
+ norm = norm.lower()
27
+ if norm == 'batch':
28
+ layer = nn.BatchNorm2d(nc, affine=True)
29
+ elif norm == 'instance':
30
+ layer = nn.InstanceNorm2d(nc, affine=False)
31
+ else:
32
+ raise NotImplementedError('normalization layer [%s] is not found' % norm)
33
+ return layer
34
+
35
+
36
+ class MLP(Seq):
37
+ def __init__(self, channels, act='relu', norm=None, bias=True):
38
+ m = []
39
+ for i in range(1, len(channels)):
40
+ m.append(Lin(channels[i - 1], channels[i], bias))
41
+ if act is not None and act.lower() != 'none':
42
+ m.append(act_layer(act))
43
+ if norm is not None and norm.lower() != 'none':
44
+ m.append(norm_layer(norm, channels[-1]))
45
+ super(MLP, self).__init__(*m)
46
+
47
+
48
+ class BasicConv(Seq):
49
+ def __init__(self, channels, act='relu', norm=None, bias=True, drop=0.):
50
+ m = []
51
+ for i in range(1, len(channels)):
52
+ m.append(Conv2d(channels[i - 1], channels[i], 1, bias=bias))
53
+ if act is not None and act.lower() != 'none':
54
+ m.append(act_layer(act))
55
+ if norm is not None and norm.lower() != 'none':
56
+ m.append(norm_layer(norm, channels[-1]))
57
+ if drop > 0:
58
+ m.append(nn.Dropout2d(drop))
59
+
60
+ super(BasicConv, self).__init__(*m)
61
+
62
+ self.reset_parameters()
63
+
64
+ def reset_parameters(self):
65
+ for m in self.modules():
66
+ if isinstance(m, nn.Conv2d):
67
+ nn.init.kaiming_normal_(m.weight)
68
+ if m.bias is not None:
69
+ nn.init.zeros_(m.bias)
70
+ elif isinstance(m, nn.BatchNorm2d) or isinstance(m, nn.InstanceNorm2d):
71
+ m.weight.data.fill_(1)
72
+ m.bias.data.zero_()
73
+
74
+
75
+ def batched_index_select(inputs, index):
76
+ """
77
+
78
+ :param inputs: torch.Size([batch_size, num_dims, num_vertices, 1])
79
+ :param index: torch.Size([batch_size, num_vertices, k])
80
+ :return: torch.Size([batch_size, num_dims, num_vertices, k])
81
+ """
82
+
83
+ batch_size, num_dims, num_vertices, _ = inputs.shape
84
+ k = index.shape[2]
85
+ idx = torch.arange(0, batch_size) * num_vertices
86
+ idx = idx.view(batch_size, -1)
87
+
88
+ inputs = inputs.transpose(2, 1).contiguous().view(-1, num_dims)
89
+ index = index.view(batch_size, -1) + idx.type(index.dtype).to(inputs.device)
90
+ index = index.view(-1)
91
+
92
+ return torch.index_select(inputs, 0, index).view(batch_size, -1, num_dims).transpose(2, 1).view(batch_size, num_dims, -1, k)
93
+
gcn_lib/dense/torch_vertex.py ADDED
@@ -0,0 +1,115 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from torch import nn
3
+ from .torch_nn import BasicConv, batched_index_select
4
+ from .torch_edge import DenseDilatedKnnGraph, DilatedKnnGraph
5
+ import torch.nn.functional as F
6
+
7
+
8
+ class MRConv2d(nn.Module):
9
+ """
10
+ Max-Relative Graph Convolution (Paper: https://arxiv.org/abs/1904.03751) for dense data type
11
+ """
12
+ def __init__(self, in_channels, out_channels, act='relu', norm=None, bias=True):
13
+ super(MRConv2d, self).__init__()
14
+ self.nn = BasicConv([in_channels*2, out_channels], act, norm, bias)
15
+
16
+ def forward(self, x, edge_index):
17
+ x_i = batched_index_select(x, edge_index[1])
18
+ x_j = batched_index_select(x, edge_index[0])
19
+ x_j, _ = torch.max(x_j - x_i, -1, keepdim=True)
20
+ return self.nn(torch.cat([x, x_j], dim=1))
21
+
22
+
23
+ class EdgeConv2d(nn.Module):
24
+ """
25
+ Edge convolution layer (with activation, batch normalization) for dense data type
26
+ """
27
+ def __init__(self, in_channels, out_channels, act='relu', norm=None, bias=True):
28
+ super(EdgeConv2d, self).__init__()
29
+ self.nn = BasicConv([in_channels * 2, out_channels], act, norm, bias)
30
+
31
+ def forward(self, x, edge_index):
32
+ x_i = batched_index_select(x, edge_index[1])
33
+ x_j = batched_index_select(x, edge_index[0])
34
+ max_value, _ = torch.max(self.nn(torch.cat([x_i, x_j - x_i], dim=1)), -1, keepdim=True)
35
+ return max_value
36
+
37
+
38
+ class GraphConv2d(nn.Module):
39
+ """
40
+ Static graph convolution layer
41
+ """
42
+ def __init__(self, in_channels, out_channels, conv='edge', act='relu', norm=None, bias=True):
43
+ super(GraphConv2d, self).__init__()
44
+ if conv == 'edge':
45
+ self.gconv = EdgeConv2d(in_channels, out_channels, act, norm, bias)
46
+ elif conv == 'mr':
47
+ self.gconv = MRConv2d(in_channels, out_channels, act, norm, bias)
48
+ else:
49
+ raise NotImplementedError('conv:{} is not supported'.format(conv))
50
+
51
+ def forward(self, x, edge_index):
52
+ return self.gconv(x, edge_index)
53
+
54
+
55
+ class DynConv2d(GraphConv2d):
56
+ """
57
+ Dynamic graph convolution layer
58
+ """
59
+ def __init__(self, in_channels, out_channels, kernel_size=9, dilation=1, conv='edge', act='relu',
60
+ norm=None, bias=True, stochastic=False, epsilon=0.0, knn='matrix'):
61
+ super(DynConv2d, self).__init__(in_channels, out_channels, conv, act, norm, bias)
62
+ self.k = kernel_size
63
+ self.d = dilation
64
+ if knn == 'matrix':
65
+ self.dilated_knn_graph = DenseDilatedKnnGraph(kernel_size, dilation, stochastic, epsilon)
66
+ else:
67
+ self.dilated_knn_graph = DilatedKnnGraph(kernel_size, dilation, stochastic, epsilon)
68
+
69
+ def forward(self, x):
70
+ edge_index = self.dilated_knn_graph(x)
71
+ return super(DynConv2d, self).forward(x, edge_index)
72
+
73
+
74
+ class PlainDynBlock2d(nn.Module):
75
+ """
76
+ Plain Dynamic graph convolution block
77
+ """
78
+ def __init__(self, in_channels, kernel_size=9, dilation=1, conv='edge', act='relu', norm=None,
79
+ bias=True, stochastic=False, epsilon=0.0, knn='matrix'):
80
+ super(PlainDynBlock2d, self).__init__()
81
+ self.body = DynConv2d(in_channels, in_channels, kernel_size, dilation, conv,
82
+ act, norm, bias, stochastic, epsilon, knn)
83
+
84
+ def forward(self, x):
85
+ return self.body(x)
86
+
87
+
88
+ class ResDynBlock2d(nn.Module):
89
+ """
90
+ Residual Dynamic graph convolution block
91
+ """
92
+ def __init__(self, in_channels, kernel_size=9, dilation=1, conv='edge', act='relu', norm=None,
93
+ bias=True, stochastic=False, epsilon=0.0, knn='matrix', res_scale=1):
94
+ super(ResDynBlock2d, self).__init__()
95
+ self.body = DynConv2d(in_channels, in_channels, kernel_size, dilation, conv,
96
+ act, norm, bias, stochastic, epsilon, knn)
97
+ self.res_scale = res_scale
98
+
99
+ def forward(self, x):
100
+ return self.body(x) + x*self.res_scale
101
+
102
+
103
+ class DenseDynBlock2d(nn.Module):
104
+ """
105
+ Dense Dynamic graph convolution block
106
+ """
107
+ def __init__(self, in_channels, out_channels=64, kernel_size=9, dilation=1, conv='edge',
108
+ act='relu', norm=None,bias=True, stochastic=False, epsilon=0.0, knn='matrix'):
109
+ super(DenseDynBlock2d, self).__init__()
110
+ self.body = DynConv2d(in_channels, out_channels, kernel_size, dilation, conv,
111
+ act, norm, bias, stochastic, epsilon, knn)
112
+
113
+ def forward(self, x):
114
+ dense = self.body(x)
115
+ return torch.cat((x, dense), 1)
gcn_lib/sparse/__init__.py ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ from .torch_nn import *
2
+ from .torch_edge import *
3
+ from .torch_vertex import *
4
+
gcn_lib/sparse/torch_edge.py ADDED
@@ -0,0 +1,113 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from torch import nn
3
+ from torch_cluster import knn_graph
4
+
5
+
6
+ class Dilated(nn.Module):
7
+ """
8
+ Find dilated neighbor from neighbor list
9
+ """
10
+ def __init__(self, k=9, dilation=1, stochastic=False, epsilon=0.0):
11
+ super(Dilated, self).__init__()
12
+ self.dilation = dilation
13
+ self.stochastic = stochastic
14
+ self.epsilon = epsilon
15
+ self.k = k
16
+
17
+ def forward(self, edge_index, batch=None):
18
+ if self.stochastic:
19
+ if torch.rand(1) < self.epsilon and self.training:
20
+ num = self.k * self.dilation
21
+ randnum = torch.randperm(num)[:self.k]
22
+ edge_index = edge_index.view(2, -1, num)
23
+ edge_index = edge_index[:, :, randnum]
24
+ return edge_index.view(2, -1)
25
+ else:
26
+ edge_index = edge_index[:, ::self.dilation]
27
+ else:
28
+ edge_index = edge_index[:, ::self.dilation]
29
+ return edge_index
30
+
31
+
32
+ class DilatedKnnGraph(nn.Module):
33
+ """
34
+ Find the neighbors' indices based on dilated knn
35
+ """
36
+ def __init__(self, k=9, dilation=1, stochastic=False, epsilon=0.0, knn='matrix'):
37
+ super(DilatedKnnGraph, self).__init__()
38
+ self.dilation = dilation
39
+ self.stochastic = stochastic
40
+ self.epsilon = epsilon
41
+ self.k = k
42
+ self._dilated = Dilated(k, dilation, stochastic, epsilon)
43
+ if knn == 'matrix':
44
+ self.knn = knn_graph_matrix
45
+ else:
46
+ self.knn = knn_graph
47
+
48
+ def forward(self, x, batch):
49
+ edge_index = self.knn(x, self.k * self.dilation, batch)
50
+ return self._dilated(edge_index, batch)
51
+
52
+
53
+ def pairwise_distance(x):
54
+ """
55
+ Compute pairwise distance of a point cloud.
56
+ Args:
57
+ x: tensor (batch_size, num_points, num_dims)
58
+ Returns:
59
+ pairwise distance: (batch_size, num_points, num_points)
60
+ """
61
+ x_inner = -2*torch.matmul(x, x.transpose(2, 1))
62
+ x_square = torch.sum(torch.mul(x, x), dim=-1, keepdim=True)
63
+ return x_square + x_inner + x_square.transpose(2, 1)
64
+
65
+
66
+ def knn_matrix(x, k=16, batch=None):
67
+ """Get KNN based on the pairwise distance.
68
+ Args:
69
+ pairwise distance: (num_points, num_points)
70
+ k: int
71
+ Returns:
72
+ nearest neighbors: (num_points*k ,1) (num_points, k)
73
+ """
74
+ with torch.no_grad():
75
+ if batch is None:
76
+ batch_size = 1
77
+ else:
78
+ batch_size = batch[-1] + 1
79
+ x = x.view(batch_size, -1, x.shape[-1])
80
+
81
+ neg_adj = -pairwise_distance(x.detach())
82
+ _, nn_idx = torch.topk(neg_adj, k=k)
83
+ del neg_adj
84
+
85
+ n_points = x.shape[1]
86
+ start_idx = torch.arange(0, n_points*batch_size, n_points).long().view(batch_size, 1, 1)
87
+ if x.is_cuda:
88
+ start_idx = start_idx.cuda()
89
+ nn_idx += start_idx
90
+ del start_idx
91
+
92
+ if x.is_cuda:
93
+ torch.cuda.empty_cache()
94
+
95
+ nn_idx = nn_idx.view(1, -1)
96
+ center_idx = torch.arange(0, n_points*batch_size).repeat(k, 1).transpose(1, 0).contiguous().view(1, -1)
97
+ if x.is_cuda:
98
+ center_idx = center_idx.cuda()
99
+ return nn_idx, center_idx
100
+
101
+
102
+ def knn_graph_matrix(x, k=16, batch=None):
103
+ """Construct edge feature for each point
104
+ Args:
105
+ x: (num_points, num_dims)
106
+ batch: (num_points, )
107
+ k: int
108
+ Returns:
109
+ edge_index: (2, num_points*k)
110
+ """
111
+ nn_idx, center_idx = knn_matrix(x, k, batch)
112
+ return torch.cat((nn_idx, center_idx), dim=0)
113
+
gcn_lib/sparse/torch_message.py ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn.functional as F
3
+ from torch_geometric.nn import MessagePassing
4
+ from torch_scatter import scatter, scatter_softmax
5
+ from torch_geometric.utils import degree
6
+
7
+
8
+ class GenMessagePassing(MessagePassing):
9
+ def __init__(self, aggr='softmax',
10
+ t=1.0, learn_t=False,
11
+ p=1.0, learn_p=False,
12
+ y=0.0, learn_y=False):
13
+
14
+ if aggr in ['softmax_sg', 'softmax', 'softmax_sum']:
15
+
16
+ super(GenMessagePassing, self).__init__(aggr=None)
17
+ self.aggr = aggr
18
+
19
+ if learn_t and (aggr == 'softmax' or aggr == 'softmax_sum'):
20
+ self.learn_t = True
21
+ self.t = torch.nn.Parameter(torch.Tensor([t]), requires_grad=True)
22
+ else:
23
+ self.learn_t = False
24
+ self.t = t
25
+
26
+ if aggr == 'softmax_sum':
27
+ self.y = torch.nn.Parameter(torch.Tensor([y]), requires_grad=learn_y)
28
+
29
+ elif aggr in ['power', 'power_sum']:
30
+
31
+ super(GenMessagePassing, self).__init__(aggr=None)
32
+ self.aggr = aggr
33
+
34
+ if learn_p:
35
+ self.p = torch.nn.Parameter(torch.Tensor([p]), requires_grad=True)
36
+ else:
37
+ self.p = p
38
+
39
+ if aggr == 'power_sum':
40
+ self.y = torch.nn.Parameter(torch.Tensor([y]), requires_grad=learn_y)
41
+ else:
42
+ super(GenMessagePassing, self).__init__(aggr=aggr)
43
+
44
+ def aggregate(self, inputs, index, ptr=None, dim_size=None):
45
+
46
+ if self.aggr in ['add', 'mean', 'max', None]:
47
+ return super(GenMessagePassing, self).aggregate(inputs, index, ptr, dim_size)
48
+
49
+ elif self.aggr in ['softmax_sg', 'softmax', 'softmax_sum']:
50
+
51
+ if self.learn_t:
52
+ out = scatter_softmax(inputs*self.t, index, dim=self.node_dim)
53
+ else:
54
+ with torch.no_grad():
55
+ out = scatter_softmax(inputs*self.t, index, dim=self.node_dim)
56
+
57
+ out = scatter(inputs*out, index, dim=self.node_dim,
58
+ dim_size=dim_size, reduce='sum')
59
+
60
+ if self.aggr == 'softmax_sum':
61
+ self.sigmoid_y = torch.sigmoid(self.y)
62
+ degrees = degree(index, num_nodes=dim_size).unsqueeze(1)
63
+ out = torch.pow(degrees, self.sigmoid_y) * out
64
+
65
+ return out
66
+
67
+
68
+ elif self.aggr in ['power', 'power_sum']:
69
+ min_value, max_value = 1e-7, 1e1
70
+ torch.clamp_(inputs, min_value, max_value)
71
+ out = scatter(torch.pow(inputs, self.p), index, dim=self.node_dim,
72
+ dim_size=dim_size, reduce='mean')
73
+ torch.clamp_(out, min_value, max_value)
74
+ out = torch.pow(out, 1/self.p)
75
+
76
+ if self.aggr == 'power_sum':
77
+ self.sigmoid_y = torch.sigmoid(self.y)
78
+ degrees = degree(index, num_nodes=dim_size).unsqueeze(1)
79
+ out = torch.pow(degrees, self.sigmoid_y) * out
80
+
81
+ return out
82
+
83
+ else:
84
+ raise NotImplementedError('To be implemented')
85
+
86
+
87
+ class MsgNorm(torch.nn.Module):
88
+ def __init__(self, learn_msg_scale=False):
89
+ super(MsgNorm, self).__init__()
90
+
91
+ self.msg_scale = torch.nn.Parameter(torch.Tensor([1.0]),
92
+ requires_grad=learn_msg_scale)
93
+
94
+ def forward(self, x, msg, p=2):
95
+ msg = F.normalize(msg, p=p, dim=1)
96
+ x_norm = x.norm(p=p, dim=1, keepdim=True)
97
+ msg = msg * x_norm * self.msg_scale
98
+ return msg
gcn_lib/sparse/torch_nn.py ADDED
@@ -0,0 +1,160 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from torch import nn
3
+ from torch.nn import Sequential as Seq, Linear as Lin
4
+ from utils.data_util import get_atom_feature_dims, get_bond_feature_dims
5
+
6
+
7
+ ##############################
8
+ # Basic layers
9
+ ##############################
10
+ def act_layer(act_type, inplace=False, neg_slope=0.2, n_prelu=1):
11
+ # activation layer
12
+ act = act_type.lower()
13
+ if act == 'relu':
14
+ layer = nn.ReLU(inplace)
15
+ elif act == 'leakyrelu':
16
+ layer = nn.LeakyReLU(neg_slope, inplace)
17
+ elif act == 'prelu':
18
+ layer = nn.PReLU(num_parameters=n_prelu, init=neg_slope)
19
+ else:
20
+ raise NotImplementedError('activation layer [%s] is not found' % act)
21
+ return layer
22
+
23
+
24
+ def norm_layer(norm_type, nc):
25
+ # normalization layer 1d
26
+ norm = norm_type.lower()
27
+ if norm == 'batch':
28
+ layer = nn.BatchNorm1d(nc, affine=True)
29
+ elif norm == 'layer':
30
+ layer = nn.LayerNorm(nc, elementwise_affine=True)
31
+ elif norm == 'instance':
32
+ layer = nn.InstanceNorm1d(nc, affine=False)
33
+ else:
34
+ raise NotImplementedError('normalization layer [%s] is not found' % norm)
35
+ return layer
36
+
37
+
38
+ class MultiSeq(Seq):
39
+ def __init__(self, *args):
40
+ super(MultiSeq, self).__init__(*args)
41
+
42
+ def forward(self, *inputs):
43
+ for module in self._modules.values():
44
+ if type(inputs) == tuple:
45
+ inputs = module(*inputs)
46
+ else:
47
+ inputs = module(inputs)
48
+ return inputs
49
+
50
+
51
+ class MLP(Seq):
52
+ def __init__(self, channels, act='relu',
53
+ norm=None, bias=True,
54
+ drop=0., last_lin=False):
55
+ m = []
56
+
57
+ for i in range(1, len(channels)):
58
+
59
+ m.append(Lin(channels[i - 1], channels[i], bias))
60
+
61
+ if (i == len(channels) - 1) and last_lin:
62
+ pass
63
+ else:
64
+ if norm is not None and norm.lower() != 'none':
65
+ m.append(norm_layer(norm, channels[i]))
66
+ if act is not None and act.lower() != 'none':
67
+ m.append(act_layer(act))
68
+ if drop > 0:
69
+ m.append(nn.Dropout2d(drop))
70
+
71
+ self.m = m
72
+ super(MLP, self).__init__(*self.m)
73
+
74
+
75
+ class AtomEncoder(nn.Module):
76
+
77
+ def __init__(self, emb_dim):
78
+ super(AtomEncoder, self).__init__()
79
+
80
+ self.atom_embedding_list = nn.ModuleList()
81
+ full_atom_feature_dims = get_atom_feature_dims()
82
+
83
+ for i, dim in enumerate(full_atom_feature_dims):
84
+ emb = nn.Embedding(dim, emb_dim)
85
+ nn.init.xavier_uniform_(emb.weight.data)
86
+ self.atom_embedding_list.append(emb)
87
+
88
+ def forward(self, x):
89
+ x_embedding = 0
90
+ for i in range(x.shape[1]):
91
+ x_embedding += self.atom_embedding_list[i](x[:, i])
92
+
93
+ return x_embedding
94
+
95
+
96
+ class BondEncoder(nn.Module):
97
+
98
+ def __init__(self, emb_dim):
99
+ super(BondEncoder, self).__init__()
100
+
101
+ self.bond_embedding_list = nn.ModuleList()
102
+ full_bond_feature_dims = get_bond_feature_dims()
103
+
104
+ for i, dim in enumerate(full_bond_feature_dims):
105
+ emb = nn.Embedding(dim, emb_dim)
106
+ nn.init.xavier_uniform_(emb.weight.data)
107
+ self.bond_embedding_list.append(emb)
108
+
109
+ def forward(self, edge_attr):
110
+ bond_embedding = 0
111
+ for i in range(edge_attr.shape[1]):
112
+ bond_embedding += self.bond_embedding_list[i](edge_attr[:, i])
113
+
114
+ return bond_embedding
115
+
116
+ class MM_BondEncoder(nn.Module):
117
+ #Replaces de lookup in embedding module by one-hot-encoding
118
+ # followed by matrix multiplication to allow Float type input
119
+ # instead of Long type input (backpropagate through layer)
120
+
121
+ def __init__(self, emb_dim):
122
+ super(MM_BondEncoder, self).__init__()
123
+
124
+ self.bond_embedding_list = nn.ModuleList()
125
+ self.full_bond_feature_dims = get_bond_feature_dims()
126
+
127
+ for i, dim in enumerate(self.full_bond_feature_dims):
128
+ emb = nn.Linear(dim, emb_dim, bias=False)
129
+ nn.init.xavier_uniform_(emb.weight.data)
130
+ self.bond_embedding_list.append(emb)
131
+
132
+ def forward(self, edge_attr):
133
+ #Change each feature in edge_attr to one-hot-vector and embed
134
+ edge_attr1, edge_attr2, edge_attr3 = torch.split(edge_attr, self.full_bond_feature_dims, dim=1)
135
+ bond_embedding = self.bond_embedding_list[0](edge_attr1) + self.bond_embedding_list[1](edge_attr2) + self.bond_embedding_list[2](edge_attr3)
136
+ return bond_embedding
137
+
138
+ class MM_AtomEncoder(nn.Module):
139
+ #Replaces de lookup in embedding module by one-hot-encoding
140
+ # followed by matrix multiplication to allow Float type input
141
+ # instead of Long type input (backpropagate through layer)
142
+
143
+ def __init__(self, emb_dim):
144
+ super(MM_AtomEncoder, self).__init__()
145
+
146
+ self.atom_embedding_list = nn.ModuleList()
147
+ self.full_atom_feature_dims = get_atom_feature_dims()
148
+
149
+ for i, dim in enumerate(self.full_atom_feature_dims):
150
+ emb = nn.Linear(dim, emb_dim, bias=False)
151
+ nn.init.xavier_uniform_(emb.weight.data)
152
+ self.atom_embedding_list.append(emb)
153
+
154
+ def forward(self, x):
155
+ #Change each feature in edge_attr to one-hot-vector and embed
156
+ split = torch.split(x, self.full_atom_feature_dims, dim=1)
157
+ atom_embedding = 0
158
+ for i in range(len(self.full_atom_feature_dims)):
159
+ atom_embedding += self.atom_embedding_list[i](split[i])
160
+ return atom_embedding
gcn_lib/sparse/torch_vertex.py ADDED
@@ -0,0 +1,355 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from torch import nn
3
+ import torch.nn.functional as F
4
+ import torch_geometric as tg
5
+ from .torch_nn import MLP, act_layer, norm_layer, BondEncoder, MM_BondEncoder
6
+ from .torch_edge import DilatedKnnGraph
7
+ from .torch_message import GenMessagePassing, MsgNorm
8
+ from torch_geometric.utils import remove_self_loops, add_self_loops
9
+
10
+
11
+ class GENConv(GenMessagePassing):
12
+ """
13
+ GENeralized Graph Convolution (GENConv): https://arxiv.org/pdf/2006.07739.pdf
14
+ SoftMax & PowerMean Aggregation
15
+ """
16
+ def __init__(self, in_dim, emb_dim, args,
17
+ aggr='softmax',
18
+ t=1.0, learn_t=False,
19
+ p=1.0, learn_p=False,
20
+ y=0.0, learn_y=False,
21
+ msg_norm=False, learn_msg_scale=True,
22
+ encode_edge=False, bond_encoder=False,
23
+ edge_feat_dim=None,
24
+ norm='batch', mlp_layers=2,
25
+ eps=1e-7):
26
+
27
+ super(GENConv, self).__init__(aggr=aggr,
28
+ t=t, learn_t=learn_t,
29
+ p=p, learn_p=learn_p,
30
+ y=y, learn_y=learn_y)
31
+
32
+ channels_list = [in_dim]
33
+
34
+ for i in range(mlp_layers-1):
35
+ channels_list.append(in_dim*2)
36
+
37
+ channels_list.append(emb_dim)
38
+
39
+ self.mlp = MLP(channels=channels_list,
40
+ norm=norm,
41
+ last_lin=True)
42
+
43
+ self.msg_encoder = torch.nn.ReLU()
44
+ self.eps = eps
45
+
46
+ self.msg_norm = msg_norm
47
+ self.encode_edge = encode_edge
48
+ self.bond_encoder = bond_encoder
49
+ self.advs = args.advs
50
+ if msg_norm:
51
+ self.msg_norm = MsgNorm(learn_msg_scale=learn_msg_scale)
52
+ else:
53
+ self.msg_norm = None
54
+
55
+ if self.encode_edge:
56
+ if self.bond_encoder:
57
+ if self.advs:
58
+ self.edge_encoder = MM_BondEncoder(emb_dim=in_dim)
59
+ else:
60
+ self.edge_encoder = BondEncoder(emb_dim=in_dim)
61
+ else:
62
+ self.edge_encoder = torch.nn.Linear(edge_feat_dim, in_dim)
63
+
64
+ def forward(self, x, edge_index, edge_attr=None):
65
+ x = x
66
+
67
+ if self.encode_edge and edge_attr is not None:
68
+ edge_emb = self.edge_encoder(edge_attr)
69
+ else:
70
+ edge_emb = edge_attr
71
+
72
+ m = self.propagate(edge_index, x=x, edge_attr=edge_emb)
73
+
74
+ if self.msg_norm is not None:
75
+ m = self.msg_norm(x, m)
76
+
77
+ h = x + m
78
+ out = self.mlp(h)
79
+
80
+ return out
81
+
82
+ def message(self, x_j, edge_attr=None):
83
+
84
+ if edge_attr is not None:
85
+ msg = x_j + edge_attr
86
+ else:
87
+ msg = x_j
88
+
89
+ return self.msg_encoder(msg) + self.eps
90
+
91
+ def update(self, aggr_out):
92
+ return aggr_out
93
+
94
+
95
+ class MRConv(nn.Module):
96
+ """
97
+ Max-Relative Graph Convolution (Paper: https://arxiv.org/abs/1904.03751)
98
+ """
99
+ def __init__(self, in_channels, out_channels, act='relu', norm=None, bias=True, aggr='max'):
100
+ super(MRConv, self).__init__()
101
+ self.nn = MLP([in_channels*2, out_channels], act, norm, bias)
102
+ self.aggr = aggr
103
+
104
+ def forward(self, x, edge_index):
105
+ """"""
106
+ x_j = tg.utils.scatter_(self.aggr, torch.index_select(x, 0, edge_index[0]) - torch.index_select(x, 0, edge_index[1]), edge_index[1], dim_size=x.shape[0])
107
+ return self.nn(torch.cat([x, x_j], dim=1))
108
+
109
+
110
+ class EdgConv(tg.nn.EdgeConv):
111
+ """
112
+ Edge convolution layer (with activation, batch normalization)
113
+ """
114
+ def __init__(self, in_channels, out_channels, act='relu', norm=None, bias=True, aggr='max'):
115
+ super(EdgConv, self).__init__(MLP([in_channels*2, out_channels], act, norm, bias), aggr)
116
+
117
+ def forward(self, x, edge_index):
118
+ return super(EdgConv, self).forward(x, edge_index)
119
+
120
+
121
+ class GATConv(nn.Module):
122
+ """
123
+ Graph Attention Convolution layer (with activation, batch normalization)
124
+ """
125
+ def __init__(self, in_channels, out_channels, act='relu', norm=None, bias=True, heads=8):
126
+ super(GATConv, self).__init__()
127
+ self.gconv = tg.nn.GATConv(in_channels, out_channels, heads, bias=bias)
128
+ m =[]
129
+ if act:
130
+ m.append(act_layer(act))
131
+ if norm:
132
+ m.append(norm_layer(norm, out_channels))
133
+ self.unlinear = nn.Sequential(*m)
134
+
135
+ def forward(self, x, edge_index):
136
+ out = self.unlinear(self.gconv(x, edge_index))
137
+ return out
138
+
139
+
140
+ class SAGEConv(tg.nn.SAGEConv):
141
+ r"""The GraphSAGE operator from the `"Inductive Representation Learning on
142
+ Large Graphs" <https://arxiv.org/abs/1706.02216>`_ paper
143
+
144
+ .. math::
145
+ \mathbf{\hat{x}}_i &= \mathbf{\Theta} \cdot
146
+ \mathrm{mean}_{j \in \mathcal{N(i) \cup \{ i \}}}(\mathbf{x}_j)
147
+
148
+ \mathbf{x}^{\prime}_i &= \frac{\mathbf{\hat{x}}_i}
149
+ {\| \mathbf{\hat{x}}_i \|_2}.
150
+
151
+ Args:
152
+ in_channels (int): Size of each input sample.
153
+ out_channels (int): Size of each output sample.
154
+ normalize (bool, optional): If set to :obj:`False`, output features
155
+ will not be :math:`\ell_2`-normalized. (default: :obj:`True`)
156
+ bias (bool, optional): If set to :obj:`False`, the layer will not learn
157
+ an additive bias. (default: :obj:`True`)
158
+ **kwargs (optional): Additional arguments of
159
+ :class:`torch_geometric.nn.conv.MessagePassing`.
160
+ """
161
+
162
+ def __init__(self,
163
+ in_channels,
164
+ out_channels,
165
+ nn,
166
+ norm=True,
167
+ bias=True,
168
+ relative=False,
169
+ **kwargs):
170
+ self.relative = relative
171
+ if norm is not None:
172
+ super(SAGEConv, self).__init__(in_channels, out_channels, True, bias, **kwargs)
173
+ else:
174
+ super(SAGEConv, self).__init__(in_channels, out_channels, False, bias, **kwargs)
175
+ self.nn = nn
176
+
177
+ def forward(self, x, edge_index, size=None):
178
+ """"""
179
+ if size is None:
180
+ edge_index, _ = remove_self_loops(edge_index)
181
+ edge_index, _ = add_self_loops(edge_index, num_nodes=x.size(0))
182
+
183
+ x = x.unsqueeze(-1) if x.dim() == 1 else x
184
+ return self.propagate(edge_index, size=size, x=x)
185
+
186
+ def message(self, x_i, x_j):
187
+ if self.relative:
188
+ x = torch.matmul(x_j - x_i, self.weight)
189
+ else:
190
+ x = torch.matmul(x_j, self.weight)
191
+ return x
192
+
193
+ def update(self, aggr_out, x):
194
+ out = self.nn(torch.cat((x, aggr_out), dim=1))
195
+ if self.bias is not None:
196
+ out = out + self.bias
197
+ if self.normalize:
198
+ out = F.normalize(out, p=2, dim=-1)
199
+ return out
200
+
201
+
202
+ class RSAGEConv(SAGEConv):
203
+ """
204
+ Residual SAGE convolution layer (with activation, batch normalization)
205
+ """
206
+
207
+ def __init__(self, in_channels, out_channels, act='relu', norm=None, bias=True, relative=False):
208
+ nn = MLP([out_channels + in_channels, out_channels], act, norm, bias)
209
+ super(RSAGEConv, self).__init__(in_channels, out_channels, nn, norm, bias, relative)
210
+
211
+
212
+ class SemiGCNConv(nn.Module):
213
+ """
214
+ SemiGCN convolution layer (with activation, batch normalization)
215
+ """
216
+
217
+ def __init__(self, in_channels, out_channels, act='relu', norm=None, bias=True):
218
+ super(SemiGCNConv, self).__init__()
219
+ self.gconv = tg.nn.GCNConv(in_channels, out_channels, bias=bias)
220
+ m = []
221
+ if act:
222
+ m.append(act_layer(act))
223
+ if norm:
224
+ m.append(norm_layer(norm, out_channels))
225
+ self.unlinear = nn.Sequential(*m)
226
+
227
+ def forward(self, x, edge_index):
228
+ out = self.unlinear(self.gconv(x, edge_index))
229
+ return out
230
+
231
+
232
+ class GinConv(tg.nn.GINConv):
233
+ """
234
+ GINConv layer (with activation, batch normalization)
235
+ """
236
+ def __init__(self, in_channels, out_channels, act='relu', norm=None, bias=True, aggr='add'):
237
+ super(GinConv, self).__init__(MLP([in_channels, out_channels], act, norm, bias))
238
+
239
+ def forward(self, x, edge_index):
240
+ return super(GinConv, self).forward(x, edge_index)
241
+
242
+
243
+ class GraphConv(nn.Module):
244
+ """
245
+ Static graph convolution layer
246
+ """
247
+ def __init__(self, in_channels, out_channels, conv='edge',
248
+ act='relu', norm=None, bias=True, heads=8):
249
+ super(GraphConv, self).__init__()
250
+ if conv.lower() == 'edge':
251
+ self.gconv = EdgConv(in_channels, out_channels, act, norm, bias)
252
+ elif conv.lower() == 'mr':
253
+ self.gconv = MRConv(in_channels, out_channels, act, norm, bias)
254
+ elif conv.lower() == 'gat':
255
+ self.gconv = GATConv(in_channels, out_channels//heads, act, norm, bias, heads)
256
+ elif conv.lower() == 'gcn':
257
+ self.gconv = SemiGCNConv(in_channels, out_channels, act, norm, bias)
258
+ elif conv.lower() == 'gin':
259
+ self.gconv = GinConv(in_channels, out_channels, act, norm, bias)
260
+ elif conv.lower() == 'sage':
261
+ self.gconv = RSAGEConv(in_channels, out_channels, act, norm, bias, False)
262
+ elif conv.lower() == 'rsage':
263
+ self.gconv = RSAGEConv(in_channels, out_channels, act, norm, bias, True)
264
+ else:
265
+ raise NotImplementedError('conv {} is not implemented'.format(conv))
266
+
267
+ def forward(self, x, edge_index):
268
+ return self.gconv(x, edge_index)
269
+
270
+
271
+ class DynConv(GraphConv):
272
+ """
273
+ Dynamic graph convolution layer
274
+ """
275
+ def __init__(self, in_channels, out_channels, kernel_size=9, dilation=1, conv='edge', act='relu',
276
+ norm=None, bias=True, heads=8, **kwargs):
277
+ super(DynConv, self).__init__(in_channels, out_channels, conv, act, norm, bias, heads)
278
+ self.k = kernel_size
279
+ self.d = dilation
280
+ self.dilated_knn_graph = DilatedKnnGraph(kernel_size, dilation, **kwargs)
281
+
282
+ def forward(self, x, batch=None):
283
+ edge_index = self.dilated_knn_graph(x, batch)
284
+ return super(DynConv, self).forward(x, edge_index)
285
+
286
+
287
+ class PlainDynBlock(nn.Module):
288
+ """
289
+ Plain Dynamic graph convolution block
290
+ """
291
+ def __init__(self, channels, kernel_size=9, dilation=1, conv='edge', act='relu', norm=None,
292
+ bias=True, res_scale=1, **kwargs):
293
+ super(PlainDynBlock, self).__init__()
294
+ self.body = DynConv(channels, channels, kernel_size, dilation, conv,
295
+ act, norm, bias, **kwargs)
296
+ self.res_scale = res_scale
297
+
298
+ def forward(self, x, batch=None):
299
+ return self.body(x, batch), batch
300
+
301
+
302
+ class ResDynBlock(nn.Module):
303
+ """
304
+ Residual Dynamic graph convolution block
305
+ """
306
+ def __init__(self, channels, kernel_size=9, dilation=1, conv='edge', act='relu', norm=None,
307
+ bias=True, res_scale=1, **kwargs):
308
+ super(ResDynBlock, self).__init__()
309
+ self.body = DynConv(channels, channels, kernel_size, dilation, conv,
310
+ act, norm, bias, **kwargs)
311
+ self.res_scale = res_scale
312
+
313
+ def forward(self, x, batch=None):
314
+ return self.body(x, batch) + x*self.res_scale, batch
315
+
316
+
317
+ class DenseDynBlock(nn.Module):
318
+ """
319
+ Dense Dynamic graph convolution block
320
+ """
321
+ def __init__(self, in_channels, out_channels=64, kernel_size=9, dilation=1, conv='edge', act='relu', norm=None, bias=True, **kwargs):
322
+ super(DenseDynBlock, self).__init__()
323
+ self.body = DynConv(in_channels, out_channels, kernel_size, dilation, conv,
324
+ act, norm, bias, **kwargs)
325
+
326
+ def forward(self, x, batch=None):
327
+ dense = self.body(x, batch)
328
+ return torch.cat((x, dense), 1), batch
329
+
330
+
331
+ class ResGraphBlock(nn.Module):
332
+ """
333
+ Residual Static graph convolution block
334
+ """
335
+ def __init__(self, channels, conv='edge', act='relu', norm=None, bias=True, heads=8, res_scale=1):
336
+ super(ResGraphBlock, self).__init__()
337
+ self.body = GraphConv(channels, channels, conv, act, norm, bias, heads)
338
+ self.res_scale = res_scale
339
+
340
+ def forward(self, x, edge_index):
341
+ return self.body(x, edge_index) + x*self.res_scale, edge_index
342
+
343
+
344
+ class DenseGraphBlock(nn.Module):
345
+ """
346
+ Dense Static graph convolution block
347
+ """
348
+ def __init__(self, in_channels, out_channels, conv='edge', act='relu', norm=None, bias=True, heads=8):
349
+ super(DenseGraphBlock, self).__init__()
350
+ self.body = GraphConv(in_channels, out_channels, conv, act, norm, bias, heads)
351
+
352
+ def forward(self, x, edge_index):
353
+ dense = self.body(x, edge_index)
354
+ return torch.cat((x, dense), 1), edge_index
355
+
gradio/title.md ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <div>
2
+ <div>
3
+ <div style="display: flex; justify-content: center; align-items: center; text-align: center; font-size: 40px;">
4
+ <b>⚛️ PLA-Net</b>
5
+ </div>
6
+ <br>
7
+ <div style="display: flex; justify-content: center; align-items: center; text-align: center;">
8
+ <a href="#"><img src="https://img.shields.io/static/v1?label=11.6&message=CUDA&color=green"></a> &ensp;
9
+ <a href="#"><img src="https://img.shields.io/static/v1?label=1.12.0&message=Pytorch&color=red"></a> &ensp;
10
+ <a href="https://github.com/juliocesar-io/PLA-Net"><img src="https://img.shields.io/static/v1?logo=github&label=Github&message=Fork"></a>
11
+ </div>
12
+ <br>
13
+ <div style="justify-content: center; align-items: center; text-align: center; font-size: 14px;">
14
+ <p>
15
+ Run Inference of PLA-Net for a single protein in the and multiple ligands to predict the binding affinities using Graph Neural Networks.
16
+ </p>
17
+ </div>
18
+ </div>
19
+ </div>
model/__init__.py ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ import sys
2
+ import os
3
+ ROOT_DIR = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
4
+ sys.path.append(ROOT_DIR)
model/model.py ADDED
@@ -0,0 +1,246 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn.functional as F
3
+ from torch_geometric.nn import global_add_pool, global_mean_pool, global_max_pool
4
+
5
+ from gcn_lib.sparse.torch_vertex import GENConv
6
+ from gcn_lib.sparse.torch_nn import norm_layer, MLP, MM_AtomEncoder
7
+
8
+ from model.model_encoder import AtomEncoder, BondEncoder
9
+
10
+ import logging
11
+
12
+
13
+ class DeeperGCN(torch.nn.Module):
14
+ def __init__(self, args, is_prot=False, saliency=False):
15
+ super(DeeperGCN, self).__init__()
16
+
17
+ # Set PM configuration
18
+ if is_prot:
19
+ self.num_layers = args.num_layers_prot
20
+ mlp_layers = args.mlp_layers_prot
21
+ hidden_channels = args.hidden_channels_prot
22
+ self.msg_norm = args.msg_norm_prot
23
+ learn_msg_scale = args.learn_msg_scale_prot
24
+ self.conv_encode_edge = args.conv_encode_edge_prot
25
+
26
+ # Set LM configuration
27
+ else:
28
+ self.num_layers = args.num_layers
29
+ mlp_layers = args.mlp_layers
30
+ hidden_channels = args.hidden_channels
31
+ self.msg_norm = args.msg_norm
32
+ learn_msg_scale = args.learn_msg_scale
33
+ self.conv_encode_edge = args.conv_encode_edge
34
+
35
+ # Set overall model configuration
36
+ self.dropout = args.dropout
37
+ self.block = args.block
38
+ self.add_virtual_node = args.add_virtual_node
39
+ self.training = True
40
+ self.args = args
41
+
42
+ num_classes = args.nclasses
43
+ conv = args.conv
44
+ aggr = args.gcn_aggr
45
+ t = args.t
46
+ self.learn_t = args.learn_t
47
+ p = args.p
48
+ self.learn_p = args.learn_p
49
+
50
+ norm = args.norm
51
+
52
+ graph_pooling = args.graph_pooling
53
+
54
+ # Print model parameters
55
+ print(
56
+ "The number of layers {}".format(self.num_layers),
57
+ "Aggr aggregation method {}".format(aggr),
58
+ "block: {}".format(self.block),
59
+ )
60
+ if self.block == "res+":
61
+ print("LN/BN->ReLU->GraphConv->Res")
62
+ elif self.block == "res":
63
+ print("GraphConv->LN/BN->ReLU->Res")
64
+ elif self.block == "dense":
65
+ raise NotImplementedError("To be implemented")
66
+ elif self.block == "plain":
67
+ print("GraphConv->LN/BN->ReLU")
68
+ else:
69
+ raise Exception("Unknown block Type")
70
+
71
+ self.gcns = torch.nn.ModuleList()
72
+ self.norms = torch.nn.ModuleList()
73
+
74
+ if self.add_virtual_node:
75
+ self.virtualnode_embedding = torch.nn.Embedding(1, hidden_channels)
76
+ torch.nn.init.constant_(self.virtualnode_embedding.weight.data, 0)
77
+
78
+ self.mlp_virtualnode_list = torch.nn.ModuleList()
79
+
80
+ for layer in range(self.num_layers - 1):
81
+ self.mlp_virtualnode_list.append(MLP([hidden_channels] * 3, norm=norm))
82
+
83
+ # Set GCN layer configuration
84
+ for layer in range(self.num_layers):
85
+ if conv == "gen":
86
+ gcn = GENConv(
87
+ hidden_channels,
88
+ hidden_channels,
89
+ args,
90
+ aggr=aggr,
91
+ t=t,
92
+ learn_t=self.learn_t,
93
+ p=p,
94
+ learn_p=self.learn_p,
95
+ msg_norm=self.msg_norm,
96
+ learn_msg_scale=learn_msg_scale,
97
+ encode_edge=self.conv_encode_edge,
98
+ bond_encoder=True,
99
+ norm=norm,
100
+ mlp_layers=mlp_layers,
101
+ )
102
+ else:
103
+ raise Exception("Unknown Conv Type")
104
+ self.gcns.append(gcn)
105
+ self.norms.append(norm_layer(norm, hidden_channels))
106
+
107
+ # Set embbeding layers
108
+ self.atom_encoder = AtomEncoder(emb_dim=hidden_channels)
109
+
110
+ if saliency:
111
+ self.atom_encoder = MM_AtomEncoder(emb_dim=hidden_channels)
112
+ else:
113
+ self.atom_encoder = AtomEncoder(emb_dim=hidden_channels)
114
+
115
+ if not self.conv_encode_edge:
116
+ self.bond_encoder = BondEncoder(emb_dim=hidden_channels)
117
+
118
+ # Set type of pooling
119
+ if graph_pooling == "sum":
120
+ self.pool = global_add_pool
121
+ elif graph_pooling == "mean":
122
+ self.pool = global_mean_pool
123
+ elif graph_pooling == "max":
124
+ self.pool = global_max_pool
125
+ else:
126
+ raise Exception("Unknown Pool Type")
127
+
128
+ # Set classification layer
129
+ self.graph_pred_linear = torch.nn.Linear(hidden_channels, num_classes)
130
+
131
+ def forward(self, input_batch, dropout=True, embeddings=False):
132
+
133
+ x = input_batch.x
134
+ edge_index = input_batch.edge_index
135
+ edge_attr = input_batch.edge_attr
136
+ batch = input_batch.batch
137
+
138
+ h = self.atom_encoder(x)
139
+
140
+ if self.add_virtual_node:
141
+ virtualnode_embedding = self.virtualnode_embedding(
142
+ torch.zeros(batch[-1].item() + 1)
143
+ .to(edge_index.dtype)
144
+ .to(edge_index.device)
145
+ )
146
+ h = h + virtualnode_embedding[batch]
147
+
148
+ if self.conv_encode_edge:
149
+ edge_emb = edge_attr
150
+ else:
151
+ edge_emb = self.bond_encoder(edge_attr)
152
+
153
+ if self.block == "res+":
154
+
155
+ h = self.gcns[0](h, edge_index, edge_emb)
156
+
157
+ for layer in range(1, self.num_layers):
158
+ h1 = self.norms[layer - 1](h)
159
+ h2 = F.relu(h1)
160
+ if dropout:
161
+ h2 = F.dropout(h2, p=self.dropout, training=self.training)
162
+
163
+ if self.add_virtual_node:
164
+ virtualnode_embedding_temp = (
165
+ global_add_pool(h2, batch) + virtualnode_embedding
166
+ )
167
+ if dropout:
168
+ virtualnode_embedding = F.dropout(
169
+ self.mlp_virtualnode_list[layer - 1](
170
+ virtualnode_embedding_temp
171
+ ),
172
+ self.dropout,
173
+ training=self.training,
174
+ )
175
+
176
+ h2 = h2 + virtualnode_embedding[batch]
177
+
178
+ h = self.gcns[layer](h2, edge_index, edge_emb) + h
179
+
180
+ h = self.norms[self.num_layers - 1](h)
181
+ if dropout:
182
+ h = F.dropout(h, p=self.dropout, training=self.training)
183
+
184
+ elif self.block == "res":
185
+
186
+ h = F.relu(self.norms[0](self.gcns[0](h, edge_index, edge_emb)))
187
+ h = F.dropout(h, p=self.dropout, training=self.training)
188
+
189
+ for layer in range(1, self.num_layers):
190
+ h1 = self.gcns[layer](h, edge_index, edge_emb)
191
+ h2 = self.norms[layer](h1)
192
+ h = F.relu(h2) + h
193
+ h = F.dropout(h, p=self.dropout, training=self.training)
194
+
195
+ elif self.block == "dense":
196
+ raise NotImplementedError("To be implemented")
197
+
198
+ elif self.block == "plain":
199
+
200
+ h = F.relu(self.norms[0](self.gcns[0](h, edge_index, edge_emb)))
201
+ h = F.dropout(h, p=self.dropout, training=self.training)
202
+
203
+ for layer in range(1, self.num_layers):
204
+ h1 = self.gcns[layer](h, edge_index, edge_emb)
205
+ h2 = self.norms[layer](h1)
206
+ if layer != (self.num_layers - 1):
207
+ h = F.relu(h2)
208
+ else:
209
+ h = h2
210
+ h = F.dropout(h, p=self.dropout, training=self.training)
211
+ else:
212
+ raise Exception("Unknown block Type")
213
+
214
+ h_graph = self.pool(h, batch)
215
+
216
+ if self.args.use_prot or embeddings:
217
+ return h_graph
218
+ else:
219
+ return self.graph_pred_linear(h_graph)
220
+
221
+ def print_params(self, epoch=None, final=False):
222
+
223
+ if self.learn_t:
224
+ ts = []
225
+ for gcn in self.gcns:
226
+ ts.append(gcn.t.item())
227
+ if final:
228
+ print("Final t {}".format(ts))
229
+ else:
230
+ logging.info("Epoch {}, t {}".format(epoch, ts))
231
+ if self.learn_p:
232
+ ps = []
233
+ for gcn in self.gcns:
234
+ ps.append(gcn.p.item())
235
+ if final:
236
+ print("Final p {}".format(ps))
237
+ else:
238
+ logging.info("Epoch {}, p {}".format(epoch, ps))
239
+ if self.msg_norm:
240
+ ss = []
241
+ for gcn in self.gcns:
242
+ ss.append(gcn.msg_norm.msg_scale.item())
243
+ if final:
244
+ print("Final s {}".format(ss))
245
+ else:
246
+ logging.info("Epoch {}, s {}".format(epoch, ss))
model/model_concatenation.py ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+ import torch.nn.functional as F
4
+
5
+ from gcn_lib.sparse.torch_nn import MLP
6
+
7
+ from model.model import DeeperGCN
8
+
9
+ import numpy as np
10
+ import logging
11
+
12
+
13
+ class PLANet(torch.nn.Module):
14
+ def __init__(self, args,saliency=False):
15
+ super(PLANet, self).__init__()
16
+
17
+ # Args
18
+ self.args = args
19
+ # Molecule and protein networks
20
+ self.molecule_gcn = DeeperGCN(args, saliency=saliency)
21
+ self.target_gcn = DeeperGCN(args, is_prot=True)
22
+
23
+ # Individual modules' final embbeding size
24
+ output_molecule = args.hidden_channels
25
+ output_protein = args.hidden_channels_prot
26
+ # Concatenated embbeding size
27
+ Final_output = output_molecule + output_protein
28
+ # Overall model's final embbeding size
29
+ hidden_channels = args.hidden_channels
30
+
31
+ # Multiplier
32
+ if args.multi_concat:
33
+ self.multiplier_prot = torch.nn.Parameter(torch.zeros(hidden_channels))
34
+ self.multiplier_ligand = torch.nn.Parameter(torch.ones(hidden_channels))
35
+ elif self.args.MLP:
36
+ # MLP
37
+ hidden_channel = 64
38
+ channels_concat = [256, hidden_channel, hidden_channel, 128]
39
+ self.concatenation_gcn = MLP(channels_concat, norm=args.norm, last_lin=True)
40
+ # breakpoint()
41
+ indices = np.diag_indices(hidden_channel)
42
+ tensor_linear_layer = torch.zeros(hidden_channel, Final_output)
43
+ tensor_linear_layer[indices[0], indices[1]] = 1
44
+ self.concatenation_gcn[0].weight = torch.nn.Parameter(tensor_linear_layer)
45
+ self.concatenation_gcn[0].bias = torch.nn.Parameter(
46
+ torch.zeros(hidden_channel)
47
+ )
48
+ else:
49
+ # Concatenation Layer
50
+ self.concatenation_gcn = nn.Linear(Final_output, hidden_channels)
51
+ indices = np.diag_indices(output_molecule)
52
+ tensor_linear_layer = torch.zeros(hidden_channels, Final_output)
53
+ tensor_linear_layer[indices[0], indices[1]] = 1
54
+ self.concatenation_gcn.weight = torch.nn.Parameter(tensor_linear_layer)
55
+ self.concatenation_gcn.bias = torch.nn.Parameter(
56
+ torch.zeros(hidden_channels)
57
+ )
58
+
59
+ # Classification Layer
60
+ num_classes = args.nclasses
61
+ self.classification = nn.Linear(hidden_channels, num_classes)
62
+
63
+ def forward(self, molecule, target):
64
+
65
+ molecule_features = self.molecule_gcn(molecule)
66
+ target_features = self.target_gcn(target)
67
+ # Multiplier
68
+ if self.args.multi_concat:
69
+ All_features = (
70
+ target_features * self.multiplier_prot
71
+ + molecule_features * self.multiplier_ligand
72
+ )
73
+ else:
74
+ # Concatenation of LM and PM modules
75
+ All_features = torch.cat((molecule_features, target_features), dim=1)
76
+ All_features = self.concatenation_gcn(All_features)
77
+ # Classification
78
+ classification = self.classification(All_features)
79
+
80
+ return classification
81
+
82
+ def print_params(self, epoch=None, final=False):
83
+
84
+ logging.info("======= Molecule GCN ========")
85
+ self.molecule_gcn.print_params(epoch)
86
+ logging.info("======= Protein GCN ========")
87
+ self.target_gcn.print_params(epoch)
88
+ if self.args.multi_concat:
89
+ sum_prot_multi = sum(self.multiplier_prot)
90
+ sum_lig_multi = sum(self.multiplier_ligand)
91
+ logging.info("Sumed prot multi: {}".format(sum_prot_multi))
92
+ logging.info("Sumed lig multi: {}".format(sum_lig_multi))
model/model_encoder.py ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from data.features import get_atom_feature_dims, get_bond_feature_dims
3
+
4
+ full_atom_feature_dims = get_atom_feature_dims()
5
+ full_bond_feature_dims = get_bond_feature_dims()
6
+
7
+ class AtomEncoder(torch.nn.Module):
8
+
9
+ def __init__(self, emb_dim):
10
+ super(AtomEncoder, self).__init__()
11
+
12
+ self.atom_embedding_list = torch.nn.ModuleList()
13
+
14
+ for i, dim in enumerate(full_atom_feature_dims):
15
+ emb = torch.nn.Embedding(dim, emb_dim)
16
+ torch.nn.init.xavier_uniform_(emb.weight.data)
17
+ self.atom_embedding_list.append(emb)
18
+
19
+ def forward(self, x):
20
+ x_embedding = 0
21
+ for i in range(x.shape[1]):
22
+ x_embedding += self.atom_embedding_list[i](x[:,i])
23
+
24
+ return x_embedding
25
+
26
+
27
+ class BondEncoder(torch.nn.Module):
28
+
29
+ def __init__(self, emb_dim):
30
+ super(BondEncoder, self).__init__()
31
+
32
+ self.bond_embedding_list = torch.nn.ModuleList()
33
+
34
+ for i, dim in enumerate(full_bond_feature_dims):
35
+ emb = torch.nn.Embedding(dim, emb_dim)
36
+ torch.nn.init.xavier_uniform_(emb.weight.data)
37
+ self.bond_embedding_list.append(emb)
38
+
39
+ def forward(self, edge_attr):
40
+ bond_embedding = 0
41
+ for i in range(edge_attr.shape[1]):
42
+ bond_embedding += self.bond_embedding_list[i](edge_attr[:,i])
43
+
44
+ return bond_embedding
45
+
46
+
47
+ if __name__ == '__main__':
48
+ from loader import GraphClassificationPygDataset
49
+ dataset = GraphClassificationPygDataset(name = 'tox21')
50
+ atom_enc = AtomEncoder(100)
51
+ bond_enc = BondEncoder(100)
52
+
53
+ print(atom_enc(dataset[0].x))
54
+ print(bond_enc(dataset[0].edge_attr))
pretrained-models/BINARY_ada/Fold1/Best_Model.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d20694b2317f52610baf8126b11587b0732f85724a9322a7f10aaac44d1d5ca0
3
+ size 22503711
pretrained-models/BINARY_ada/Fold2/Best_Model.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:70bd05db3d218f477792f56f681f09e571f817298c1d2f94c4b8f5c850725a88
3
+ size 22506847
pretrained-models/BINARY_ada/Fold3/Best_Model.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2dbfbe976cb712448f80eb354d45670c37c4d67c6949f80cfd9ce8c2716fdc9b
3
+ size 22505567
pretrained-models/BINARY_ada/Fold4/Best_Model.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1f9d16bc2669153e692680c8e0db26c81029ef47f65755e34e1ae71f4c359526
3
+ size 22506783
requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ rdkit-pypi==2021.9.3
2
+ modlamp==4.3.0
3
+ ogb==1.3.6
4
+ tqdm==4.63.0
5
+ h5py==3.11.0
6
+ scipy==1.9.0
7
+ numpy==1.24.4
8
+ gradio==4.43.0
9
+ fastapi==0.112.4
scripts/__init__.py ADDED
File without changes
scripts/model/__init__.py ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ import sys
2
+ import os
3
+ ROOT_DIR = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
4
+ sys.path.append(ROOT_DIR)
scripts/model/model.py ADDED
@@ -0,0 +1,246 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn.functional as F
3
+ from torch_geometric.nn import global_add_pool, global_mean_pool, global_max_pool
4
+
5
+ from gcn_lib.sparse.torch_vertex import GENConv
6
+ from gcn_lib.sparse.torch_nn import norm_layer, MLP, MM_AtomEncoder
7
+
8
+ from model.model_encoder import AtomEncoder, BondEncoder
9
+
10
+ import logging
11
+
12
+
13
+ class DeeperGCN(torch.nn.Module):
14
+ def __init__(self, args, is_prot=False, saliency=False):
15
+ super(DeeperGCN, self).__init__()
16
+
17
+ # Set PM configuration
18
+ if is_prot:
19
+ self.num_layers = args.num_layers_prot
20
+ mlp_layers = args.mlp_layers_prot
21
+ hidden_channels = args.hidden_channels_prot
22
+ self.msg_norm = args.msg_norm_prot
23
+ learn_msg_scale = args.learn_msg_scale_prot
24
+ self.conv_encode_edge = args.conv_encode_edge_prot
25
+
26
+ # Set LM configuration
27
+ else:
28
+ self.num_layers = args.num_layers
29
+ mlp_layers = args.mlp_layers
30
+ hidden_channels = args.hidden_channels
31
+ self.msg_norm = args.msg_norm
32
+ learn_msg_scale = args.learn_msg_scale
33
+ self.conv_encode_edge = args.conv_encode_edge
34
+
35
+ # Set overall model configuration
36
+ self.dropout = args.dropout
37
+ self.block = args.block
38
+ self.add_virtual_node = args.add_virtual_node
39
+ self.training = True
40
+ self.args = args
41
+
42
+ num_classes = args.nclasses
43
+ conv = args.conv
44
+ aggr = args.gcn_aggr
45
+ t = args.t
46
+ self.learn_t = args.learn_t
47
+ p = args.p
48
+ self.learn_p = args.learn_p
49
+
50
+ norm = args.norm
51
+
52
+ graph_pooling = args.graph_pooling
53
+
54
+ # Print model parameters
55
+ print(
56
+ "The number of layers {}".format(self.num_layers),
57
+ "Aggr aggregation method {}".format(aggr),
58
+ "block: {}".format(self.block),
59
+ )
60
+ if self.block == "res+":
61
+ print("LN/BN->ReLU->GraphConv->Res")
62
+ elif self.block == "res":
63
+ print("GraphConv->LN/BN->ReLU->Res")
64
+ elif self.block == "dense":
65
+ raise NotImplementedError("To be implemented")
66
+ elif self.block == "plain":
67
+ print("GraphConv->LN/BN->ReLU")
68
+ else:
69
+ raise Exception("Unknown block Type")
70
+
71
+ self.gcns = torch.nn.ModuleList()
72
+ self.norms = torch.nn.ModuleList()
73
+
74
+ if self.add_virtual_node:
75
+ self.virtualnode_embedding = torch.nn.Embedding(1, hidden_channels)
76
+ torch.nn.init.constant_(self.virtualnode_embedding.weight.data, 0)
77
+
78
+ self.mlp_virtualnode_list = torch.nn.ModuleList()
79
+
80
+ for layer in range(self.num_layers - 1):
81
+ self.mlp_virtualnode_list.append(MLP([hidden_channels] * 3, norm=norm))
82
+
83
+ # Set GCN layer configuration
84
+ for layer in range(self.num_layers):
85
+ if conv == "gen":
86
+ gcn = GENConv(
87
+ hidden_channels,
88
+ hidden_channels,
89
+ args,
90
+ aggr=aggr,
91
+ t=t,
92
+ learn_t=self.learn_t,
93
+ p=p,
94
+ learn_p=self.learn_p,
95
+ msg_norm=self.msg_norm,
96
+ learn_msg_scale=learn_msg_scale,
97
+ encode_edge=self.conv_encode_edge,
98
+ bond_encoder=True,
99
+ norm=norm,
100
+ mlp_layers=mlp_layers,
101
+ )
102
+ else:
103
+ raise Exception("Unknown Conv Type")
104
+ self.gcns.append(gcn)
105
+ self.norms.append(norm_layer(norm, hidden_channels))
106
+
107
+ # Set embbeding layers
108
+ self.atom_encoder = AtomEncoder(emb_dim=hidden_channels)
109
+
110
+ if saliency:
111
+ self.atom_encoder = MM_AtomEncoder(emb_dim=hidden_channels)
112
+ else:
113
+ self.atom_encoder = AtomEncoder(emb_dim=hidden_channels)
114
+
115
+ if not self.conv_encode_edge:
116
+ self.bond_encoder = BondEncoder(emb_dim=hidden_channels)
117
+
118
+ # Set type of pooling
119
+ if graph_pooling == "sum":
120
+ self.pool = global_add_pool
121
+ elif graph_pooling == "mean":
122
+ self.pool = global_mean_pool
123
+ elif graph_pooling == "max":
124
+ self.pool = global_max_pool
125
+ else:
126
+ raise Exception("Unknown Pool Type")
127
+
128
+ # Set classification layer
129
+ self.graph_pred_linear = torch.nn.Linear(hidden_channels, num_classes)
130
+
131
+ def forward(self, input_batch, dropout=True, embeddings=False):
132
+
133
+ x = input_batch.x
134
+ edge_index = input_batch.edge_index
135
+ edge_attr = input_batch.edge_attr
136
+ batch = input_batch.batch
137
+
138
+ h = self.atom_encoder(x)
139
+
140
+ if self.add_virtual_node:
141
+ virtualnode_embedding = self.virtualnode_embedding(
142
+ torch.zeros(batch[-1].item() + 1)
143
+ .to(edge_index.dtype)
144
+ .to(edge_index.device)
145
+ )
146
+ h = h + virtualnode_embedding[batch]
147
+
148
+ if self.conv_encode_edge:
149
+ edge_emb = edge_attr
150
+ else:
151
+ edge_emb = self.bond_encoder(edge_attr)
152
+
153
+ if self.block == "res+":
154
+
155
+ h = self.gcns[0](h, edge_index, edge_emb)
156
+
157
+ for layer in range(1, self.num_layers):
158
+ h1 = self.norms[layer - 1](h)
159
+ h2 = F.relu(h1)
160
+ if dropout:
161
+ h2 = F.dropout(h2, p=self.dropout, training=self.training)
162
+
163
+ if self.add_virtual_node:
164
+ virtualnode_embedding_temp = (
165
+ global_add_pool(h2, batch) + virtualnode_embedding
166
+ )
167
+ if dropout:
168
+ virtualnode_embedding = F.dropout(
169
+ self.mlp_virtualnode_list[layer - 1](
170
+ virtualnode_embedding_temp
171
+ ),
172
+ self.dropout,
173
+ training=self.training,
174
+ )
175
+
176
+ h2 = h2 + virtualnode_embedding[batch]
177
+
178
+ h = self.gcns[layer](h2, edge_index, edge_emb) + h
179
+
180
+ h = self.norms[self.num_layers - 1](h)
181
+ if dropout:
182
+ h = F.dropout(h, p=self.dropout, training=self.training)
183
+
184
+ elif self.block == "res":
185
+
186
+ h = F.relu(self.norms[0](self.gcns[0](h, edge_index, edge_emb)))
187
+ h = F.dropout(h, p=self.dropout, training=self.training)
188
+
189
+ for layer in range(1, self.num_layers):
190
+ h1 = self.gcns[layer](h, edge_index, edge_emb)
191
+ h2 = self.norms[layer](h1)
192
+ h = F.relu(h2) + h
193
+ h = F.dropout(h, p=self.dropout, training=self.training)
194
+
195
+ elif self.block == "dense":
196
+ raise NotImplementedError("To be implemented")
197
+
198
+ elif self.block == "plain":
199
+
200
+ h = F.relu(self.norms[0](self.gcns[0](h, edge_index, edge_emb)))
201
+ h = F.dropout(h, p=self.dropout, training=self.training)
202
+
203
+ for layer in range(1, self.num_layers):
204
+ h1 = self.gcns[layer](h, edge_index, edge_emb)
205
+ h2 = self.norms[layer](h1)
206
+ if layer != (self.num_layers - 1):
207
+ h = F.relu(h2)
208
+ else:
209
+ h = h2
210
+ h = F.dropout(h, p=self.dropout, training=self.training)
211
+ else:
212
+ raise Exception("Unknown block Type")
213
+
214
+ h_graph = self.pool(h, batch)
215
+
216
+ if self.args.use_prot or embeddings:
217
+ return h_graph
218
+ else:
219
+ return self.graph_pred_linear(h_graph)
220
+
221
+ def print_params(self, epoch=None, final=False):
222
+
223
+ if self.learn_t:
224
+ ts = []
225
+ for gcn in self.gcns:
226
+ ts.append(gcn.t.item())
227
+ if final:
228
+ print("Final t {}".format(ts))
229
+ else:
230
+ logging.info("Epoch {}, t {}".format(epoch, ts))
231
+ if self.learn_p:
232
+ ps = []
233
+ for gcn in self.gcns:
234
+ ps.append(gcn.p.item())
235
+ if final:
236
+ print("Final p {}".format(ps))
237
+ else:
238
+ logging.info("Epoch {}, p {}".format(epoch, ps))
239
+ if self.msg_norm:
240
+ ss = []
241
+ for gcn in self.gcns:
242
+ ss.append(gcn.msg_norm.msg_scale.item())
243
+ if final:
244
+ print("Final s {}".format(ss))
245
+ else:
246
+ logging.info("Epoch {}, s {}".format(epoch, ss))
scripts/model/model_concatenation.py ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+ import torch.nn.functional as F
4
+
5
+ from gcn_lib.sparse.torch_nn import MLP
6
+
7
+ from model.model import DeeperGCN
8
+
9
+ import numpy as np
10
+ import logging
11
+
12
+
13
+ class PLANet(torch.nn.Module):
14
+ def __init__(self, args,saliency=False):
15
+ super(PLANet, self).__init__()
16
+
17
+ # Args
18
+ self.args = args
19
+ # Molecule and protein networks
20
+ self.molecule_gcn = DeeperGCN(args, saliency=saliency)
21
+ self.target_gcn = DeeperGCN(args, is_prot=True)
22
+
23
+ # Individual modules' final embbeding size
24
+ output_molecule = args.hidden_channels
25
+ output_protein = args.hidden_channels_prot
26
+ # Concatenated embbeding size
27
+ Final_output = output_molecule + output_protein
28
+ # Overall model's final embbeding size
29
+ hidden_channels = args.hidden_channels
30
+
31
+ # Multiplier
32
+ if args.multi_concat:
33
+ self.multiplier_prot = torch.nn.Parameter(torch.zeros(hidden_channels))
34
+ self.multiplier_ligand = torch.nn.Parameter(torch.ones(hidden_channels))
35
+ elif self.args.MLP:
36
+ # MLP
37
+ hidden_channel = 64
38
+ channels_concat = [256, hidden_channel, hidden_channel, 128]
39
+ self.concatenation_gcn = MLP(channels_concat, norm=args.norm, last_lin=True)
40
+ # breakpoint()
41
+ indices = np.diag_indices(hidden_channel)
42
+ tensor_linear_layer = torch.zeros(hidden_channel, Final_output)
43
+ tensor_linear_layer[indices[0], indices[1]] = 1
44
+ self.concatenation_gcn[0].weight = torch.nn.Parameter(tensor_linear_layer)
45
+ self.concatenation_gcn[0].bias = torch.nn.Parameter(
46
+ torch.zeros(hidden_channel)
47
+ )
48
+ else:
49
+ # Concatenation Layer
50
+ self.concatenation_gcn = nn.Linear(Final_output, hidden_channels)
51
+ indices = np.diag_indices(output_molecule)
52
+ tensor_linear_layer = torch.zeros(hidden_channels, Final_output)
53
+ tensor_linear_layer[indices[0], indices[1]] = 1
54
+ self.concatenation_gcn.weight = torch.nn.Parameter(tensor_linear_layer)
55
+ self.concatenation_gcn.bias = torch.nn.Parameter(
56
+ torch.zeros(hidden_channels)
57
+ )
58
+
59
+ # Classification Layer
60
+ num_classes = args.nclasses
61
+ self.classification = nn.Linear(hidden_channels, num_classes)
62
+
63
+ def forward(self, molecule, target):
64
+
65
+ molecule_features = self.molecule_gcn(molecule)
66
+ target_features = self.target_gcn(target)
67
+ # Multiplier
68
+ if self.args.multi_concat:
69
+ All_features = (
70
+ target_features * self.multiplier_prot
71
+ + molecule_features * self.multiplier_ligand
72
+ )
73
+ else:
74
+ # Concatenation of LM and PM modules
75
+ All_features = torch.cat((molecule_features, target_features), dim=1)
76
+ All_features = self.concatenation_gcn(All_features)
77
+ # Classification
78
+ classification = self.classification(All_features)
79
+
80
+ return classification
81
+
82
+ def print_params(self, epoch=None, final=False):
83
+
84
+ logging.info("======= Molecule GCN ========")
85
+ self.molecule_gcn.print_params(epoch)
86
+ logging.info("======= Protein GCN ========")
87
+ self.target_gcn.print_params(epoch)
88
+ if self.args.multi_concat:
89
+ sum_prot_multi = sum(self.multiplier_prot)
90
+ sum_lig_multi = sum(self.multiplier_ligand)
91
+ logging.info("Sumed prot multi: {}".format(sum_prot_multi))
92
+ logging.info("Sumed lig multi: {}".format(sum_lig_multi))
scripts/model/model_encoder.py ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from data.features import get_atom_feature_dims, get_bond_feature_dims
3
+
4
+ full_atom_feature_dims = get_atom_feature_dims()
5
+ full_bond_feature_dims = get_bond_feature_dims()
6
+
7
+ class AtomEncoder(torch.nn.Module):
8
+
9
+ def __init__(self, emb_dim):
10
+ super(AtomEncoder, self).__init__()
11
+
12
+ self.atom_embedding_list = torch.nn.ModuleList()
13
+
14
+ for i, dim in enumerate(full_atom_feature_dims):
15
+ emb = torch.nn.Embedding(dim, emb_dim)
16
+ torch.nn.init.xavier_uniform_(emb.weight.data)
17
+ self.atom_embedding_list.append(emb)
18
+
19
+ def forward(self, x):
20
+ x_embedding = 0
21
+ for i in range(x.shape[1]):
22
+ x_embedding += self.atom_embedding_list[i](x[:,i])
23
+
24
+ return x_embedding
25
+
26
+
27
+ class BondEncoder(torch.nn.Module):
28
+
29
+ def __init__(self, emb_dim):
30
+ super(BondEncoder, self).__init__()
31
+
32
+ self.bond_embedding_list = torch.nn.ModuleList()
33
+
34
+ for i, dim in enumerate(full_bond_feature_dims):
35
+ emb = torch.nn.Embedding(dim, emb_dim)
36
+ torch.nn.init.xavier_uniform_(emb.weight.data)
37
+ self.bond_embedding_list.append(emb)
38
+
39
+ def forward(self, edge_attr):
40
+ bond_embedding = 0
41
+ for i in range(edge_attr.shape[1]):
42
+ bond_embedding += self.bond_embedding_list[i](edge_attr[:,i])
43
+
44
+ return bond_embedding
45
+
46
+
47
+ if __name__ == '__main__':
48
+ from loader import GraphClassificationPygDataset
49
+ dataset = GraphClassificationPygDataset(name = 'tox21')
50
+ atom_enc = AtomEncoder(100)
51
+ bond_enc = BondEncoder(100)
52
+
53
+ print(atom_enc(dataset[0].x))
54
+ print(bond_enc(dataset[0].edge_attr))
scripts/pla_net_inference.py ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import numpy as np
3
+ import pandas as pd
4
+ import time
5
+ from torch_geometric.data import DataLoader
6
+ from model.model_concatenation import PLANet
7
+ from utils.args import ArgsInit
8
+ from utils.model import get_dataset_inference, test_gcn
9
+
10
+
11
+ def main(args):
12
+
13
+ if args.use_gpu:
14
+ device = torch.device("cuda:" + str(args.device)) if torch.cuda.is_available() else torch.device("cpu")
15
+ else:
16
+ device = torch.device('cpu')
17
+
18
+ #Numpy and torch seeds
19
+ torch.manual_seed(args.seed)
20
+ np.random.seed(args.seed)
21
+ if device.type == 'cuda':
22
+ torch.cuda.manual_seed(args.seed)
23
+ print('%s' % args)
24
+
25
+
26
+ data_inference = pd.read_csv(
27
+ args.input_file_smiles,
28
+ names=["Smiles"],
29
+ header=0
30
+ )
31
+
32
+ print("Data Inference: ", data_inference)
33
+
34
+ data_target = pd.read_csv(
35
+ args.target_list, names=["Fasta", "Target", "Label"]
36
+ )
37
+ data_target = data_target[data_target.Target == args.target]
38
+
39
+ print("Data Target: ", data_target)
40
+
41
+ test = get_dataset_inference(
42
+ data_inference,
43
+ use_prot=args.use_prot,
44
+ target=data_target,
45
+ args=args,
46
+ advs=False,
47
+ saliency=False,
48
+ )
49
+
50
+ test_loader = DataLoader(test, batch_size=args.batch_size, shuffle=False,
51
+ num_workers=args.num_workers)
52
+
53
+ model = PLANet(args).to(device)
54
+
55
+
56
+ print('Model inference in: {}'.format(args.inference_path))
57
+ start_time = time.time()
58
+
59
+ #Load pre-trained molecule model
60
+
61
+ print('Evaluating...')
62
+ test_gcn(model, device, test_loader, args)
63
+
64
+
65
+ end_time = time.time()
66
+ total_time = end_time - start_time
67
+ print('Total time: {}'.format(time.strftime('%H:%M:%S', time.gmtime(total_time))))
68
+
69
+
70
+ if __name__ == "__main__":
71
+ args = ArgsInit().args
72
+ # Default args for inference
73
+
74
+ args.nclasses = 2
75
+ args.batch_size = 10
76
+ args.use_prot = True
77
+ args.freeze_molecule = True
78
+ args.conv_encode_edge = True
79
+ args.learn_t = True
80
+ args.binary = True
81
+
82
+ main(args)
setup.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # setup.py
2
+ from setuptools import setup, find_packages
3
+
4
+ with open('requirements.txt') as f:
5
+ requirements = f.read().splitlines()
6
+
7
+ setup(
8
+ name='pla_net',
9
+ version='0.0.0',
10
+ packages=find_packages(),
11
+ install_requires=[requirements],
12
+ classifiers=[],
13
+ python_requires='>=3.8',
14
+ )
utils/__init__.py ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ import sys
2
+ import os
3
+ ROOT_DIR = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
4
+ sys.path.append(ROOT_DIR)