Spaces:
Sleeping
Sleeping
juliocesar-io
commited on
Commit
•
b6f1234
1
Parent(s):
5b85ed1
Added initial app
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitignore +4 -0
- Dockerfile +44 -0
- README.md +121 -11
- app.py +154 -0
- data/__init__.py +4 -0
- data/dataset.py +418 -0
- data/dataset_saliency.py +378 -0
- data/datasets/AD/Inference.csv +1 -0
- data/datasets/AD/Smiles_AD_1.csv +0 -0
- data/datasets/AD/Smiles_AD_2.csv +0 -0
- data/datasets/AD/Smiles_AD_3.csv +0 -0
- data/datasets/AD/Smiles_AD_4.csv +0 -0
- data/datasets/AD/Smiles_AD_Test.csv +0 -0
- data/datasets/AD/Targets_Fasta.csv +102 -0
- data/datasets/AD/saliency.csv +1 -0
- data/datasets/DUDE/Smiles_1.csv +0 -0
- data/datasets/DUDE/Smiles_2.csv +0 -0
- data/datasets/DUDE/Smiles_3.csv +0 -0
- data/datasets/DUDE/Smiles_4.csv +0 -0
- data/datasets/DUDE/Smiles_Test.csv +0 -0
- data/features.py +136 -0
- example/input_smiles.csv +4 -0
- gcn_lib/__init__.py +0 -0
- gcn_lib/dense/__init__.py +4 -0
- gcn_lib/dense/torch_edge.py +101 -0
- gcn_lib/dense/torch_nn.py +93 -0
- gcn_lib/dense/torch_vertex.py +115 -0
- gcn_lib/sparse/__init__.py +4 -0
- gcn_lib/sparse/torch_edge.py +113 -0
- gcn_lib/sparse/torch_message.py +98 -0
- gcn_lib/sparse/torch_nn.py +160 -0
- gcn_lib/sparse/torch_vertex.py +355 -0
- gradio/title.md +19 -0
- model/__init__.py +4 -0
- model/model.py +246 -0
- model/model_concatenation.py +92 -0
- model/model_encoder.py +54 -0
- pretrained-models/BINARY_ada/Fold1/Best_Model.pth +3 -0
- pretrained-models/BINARY_ada/Fold2/Best_Model.pth +3 -0
- pretrained-models/BINARY_ada/Fold3/Best_Model.pth +3 -0
- pretrained-models/BINARY_ada/Fold4/Best_Model.pth +3 -0
- requirements.txt +9 -0
- scripts/__init__.py +0 -0
- scripts/model/__init__.py +4 -0
- scripts/model/model.py +246 -0
- scripts/model/model_concatenation.py +92 -0
- scripts/model/model_encoder.py +54 -0
- scripts/pla_net_inference.py +82 -0
- setup.py +14 -0
- utils/__init__.py +4 -0
.gitignore
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
__pycache__
|
2 |
+
log
|
3 |
+
tmp
|
4 |
+
output_predictions.csv
|
Dockerfile
ADDED
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Use NVIDIA PyTorch image as the base
|
2 |
+
FROM nvcr.io/nvidia/pytorch:22.03-py3
|
3 |
+
|
4 |
+
RUN apt-get update && apt-get install -y libxrender1
|
5 |
+
|
6 |
+
# Base pytorch
|
7 |
+
RUN conda install pytorch==1.12.0 torchvision==0.13.0 torchaudio==0.12.0 cudatoolkit=11.6 -c pytorch -c conda-forge
|
8 |
+
|
9 |
+
# Set required versions for each core dependency using cu116
|
10 |
+
RUN pip install torch-scatter==2.0.9 torch-sparse==0.6.14 torch-cluster==1.6.0 torch-spline-conv==1.2.1 torch-geometric==2.1.0 -f https://data.pyg.org/whl/torch-1.12.0+cu116.html
|
11 |
+
|
12 |
+
# Create a new user named "user" with UID 1000
|
13 |
+
RUN useradd -m -u 1000 user
|
14 |
+
|
15 |
+
# Set environment variables
|
16 |
+
ENV PYTHONUNBUFFERED=1 \
|
17 |
+
GRADIO_ALLOW_FLAGGING=never \
|
18 |
+
GRADIO_NUM_PORTS=1 \
|
19 |
+
GRADIO_SERVER_NAME=0.0.0.0 \
|
20 |
+
GRADIO_THEME=huggingface \
|
21 |
+
SYSTEM=spaces \
|
22 |
+
HOME=/home/user \
|
23 |
+
PATH=/home/user/.local/bin:$PATH
|
24 |
+
|
25 |
+
# Set the working directory to the user's app directory as root
|
26 |
+
WORKDIR $HOME/app
|
27 |
+
|
28 |
+
# Copy the current directory contents into the container at $HOME/app
|
29 |
+
COPY . $HOME/app
|
30 |
+
|
31 |
+
# Change ownership of the app directory to "user"
|
32 |
+
RUN chown -R user:user $HOME/app
|
33 |
+
|
34 |
+
# Switch to the "user" user
|
35 |
+
USER user
|
36 |
+
|
37 |
+
# Upgrade pip as the user
|
38 |
+
RUN pip install --no-cache-dir --upgrade pip
|
39 |
+
|
40 |
+
# Install the local package as the user
|
41 |
+
RUN pip install --user .
|
42 |
+
|
43 |
+
# Set the default command to bash
|
44 |
+
CMD ["/bin/bash"]
|
README.md
CHANGED
@@ -1,11 +1,121 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
|
4 |
-
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# PLA-Net: Predicting Protein-Ligand Interactions with Deep Graph Networks
|
2 |
+
|
3 |
+
Forked version of [PLA-Net](https://github.com/BCV-Uniandes/PLA-Net)
|
4 |
+
|
5 |
+
## Background
|
6 |
+
|
7 |
+
**PLA-Net** is a deep learning model designed to predict interactions between small organic molecules (ligands) and any of the 102 target proteins in the Alzheimer's Disease (AD) dataset. By transforming molecular and protein sequences into graph representations, PLA-Net leverages Graph Convolutional Networks (GCNs) to analyze and predict target-ligand interaction probabilities. Developed by [BCV-Uniandes](https://github.com/BCV-Uniandes/PLA-Net).
|
8 |
+
|
9 |
+
## Key Features
|
10 |
+
|
11 |
+
- **Graph-Based Input Representation**
|
12 |
+
- **Ligand Module (LM):** Converts SMILES sequences of molecules into graph representations.
|
13 |
+
- **Protein Module (PM):** Transforms FASTA sequences of proteins into graph structures.
|
14 |
+
|
15 |
+
- **Deep Graph Convolutional Networks**
|
16 |
+
- Each module employs a deep GCN followed by an average pooling layer to extract meaningful features from the input graphs.
|
17 |
+
|
18 |
+
- **Interaction Prediction**
|
19 |
+
- The feature representations from the LM and PM are concatenated.
|
20 |
+
- A fully connected layer processes the combined features to predict the interaction probability between the ligand and the target protein.
|
21 |
+
|
22 |
+
## Quick Start
|
23 |
+
|
24 |
+
If you want to run PLA-Net without installing it, you can run it freely on this [Hugging Face Space](https://huggingface.co/spaces/juliocesar-io/PLA-Net).
|
25 |
+
|
26 |
+
## Docker Install
|
27 |
+
|
28 |
+
To prevent conflicts with the host machine, it is recommended to run PLA-Net in a Docker container.
|
29 |
+
|
30 |
+
First make sure you have an NVIDIA GPU and [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html) installed. Then build the image with the following command:
|
31 |
+
|
32 |
+
```bash
|
33 |
+
docker build -t pla-net:latest .
|
34 |
+
```
|
35 |
+
|
36 |
+
### Inference
|
37 |
+
|
38 |
+
To run inference, run the following command:
|
39 |
+
|
40 |
+
This will run inference for the target protein `ada` with the SMILES in the `input_smiles.csv` file and save the predictions to the `output_predictions.csv` file.
|
41 |
+
|
42 |
+
The prediction file has the following format:
|
43 |
+
|
44 |
+
```bash
|
45 |
+
target,smiles,interaction_probability,interaction_class
|
46 |
+
ada,Cn4c(CCC(=O)Nc3ccc2ccn(CC[C@H](CO)n1cnc(C(N)=O)c1)c2c3)nc5ccccc45,0.9994347542524338,1
|
47 |
+
```
|
48 |
+
|
49 |
+
Where `interaction_class` is 1 if the interaction probability is greater than 0.5, and 0 otherwise.
|
50 |
+
|
51 |
+
```bash
|
52 |
+
docker run \
|
53 |
+
-it --rm --gpus all \
|
54 |
+
-v "$(pwd)":/home/user/output \
|
55 |
+
pla-net:latest \
|
56 |
+
python /home/user/app/scripts/pla_net_inference.py \
|
57 |
+
--use_gpu \
|
58 |
+
--target ada \
|
59 |
+
--target_list /home/user/app/data/datasets/AD/Targets_Fasta.csv \
|
60 |
+
--target_checkpoint_path /home/user/app/pretrained-models/BINARY_ada \
|
61 |
+
--input_file_smiles /home/user/app/example/input_smiles.csv \
|
62 |
+
--output_file /home/user/output/output_predictions.csv
|
63 |
+
```
|
64 |
+
|
65 |
+
Args:
|
66 |
+
|
67 |
+
- `use_gpu`: Use GPU for inference.
|
68 |
+
- `target`: Target protein ID from the list of targets. Check the list of available targets in the [data](https://github.com/juliocesar-io/PLA-Net/blob/main/data/datasets/AD/Targets_Fasta.csv) folder.
|
69 |
+
- `target_list`: Path to the target list CSV file.
|
70 |
+
- `target_checkpoint_path`: Path to the target checkpoint. (e.g. `/workspace/pretrained-models/BINARY_ada`) one checkpoint for each target.
|
71 |
+
- `input_file_smiles`: Path to the input SMILES file.
|
72 |
+
- `output_file`: Path to the output predictions file.
|
73 |
+
|
74 |
+
|
75 |
+
### Gradio Server
|
76 |
+
We provide a simple graphical user interface to run PLA-Net with Gradio. To use it, run the following command:
|
77 |
+
|
78 |
+
```bash
|
79 |
+
docker run \
|
80 |
+
-it --rm --gpus all \
|
81 |
+
-p 7860:7860 \
|
82 |
+
pla-net:latest \
|
83 |
+
python app.py
|
84 |
+
```
|
85 |
+
|
86 |
+
Then open your browser and go to `http://localhost:7860/` to access the web interface.
|
87 |
+
|
88 |
+
|
89 |
+
## Local Install
|
90 |
+
|
91 |
+
To do inference with PLA-Net, you need to install the dependencies and activate the environment. You can do this by running the following commands:
|
92 |
+
|
93 |
+
```bash
|
94 |
+
conda env create -f environment.yml
|
95 |
+
conda activate pla-net
|
96 |
+
```
|
97 |
+
|
98 |
+
Now you can run inference with PLA-Net locally. In the project folder, run the following command:
|
99 |
+
|
100 |
+
```bash
|
101 |
+
python scripts/pla_net_inference.py \
|
102 |
+
--use_gpu \
|
103 |
+
--target ada \
|
104 |
+
--target_list data/datasets/AD/Targets_Fasta.csv \
|
105 |
+
--target_checkpoint_path pretrained-models/BINARY_ada \
|
106 |
+
--input_file_smiles example/input_smiles.csv \
|
107 |
+
--output_file example/output_predictions.csv
|
108 |
+
```
|
109 |
+
|
110 |
+
## Models
|
111 |
+
|
112 |
+
You can download the pre-trained models from [Hugging Face](https://huggingface.co/juliocesar-io/PLA-Net).
|
113 |
+
## Training
|
114 |
+
|
115 |
+
To train each of the components of our method: LM, LM+Advs, LMPM and PLA-Net please refer to planet.sh file and run the desired models.
|
116 |
+
|
117 |
+
To evaluate each of the components of our method: LM, LM+Advs, LMPM and PLA-Net please run the corresponding bash file in the inference folder.
|
118 |
+
|
119 |
+
## Citation
|
120 |
+
|
121 |
+
Ruiz Puentes, P., Rueda-Gensini, L., Valderrama, N. et al. Predicting target–ligand interactions with graph convolutional networks for interpretable pharmaceutical discovery. Sci Rep 12, 8434 (2022). https://doi.org/10.1038/s41598-022-12180-x
|
app.py
ADDED
@@ -0,0 +1,154 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import uuid
|
2 |
+
import gradio as gr
|
3 |
+
import torch
|
4 |
+
import os
|
5 |
+
import pandas as pd
|
6 |
+
from rdkit import Chem
|
7 |
+
from scripts.pla_net_inference import main
|
8 |
+
from utils.args import ArgsInit
|
9 |
+
|
10 |
+
os.system("nvidia-smi")
|
11 |
+
print("TORCH_CUDA", torch.cuda.is_available())
|
12 |
+
|
13 |
+
PROJECT_URL = "https://www.nature.com/articles/s41598-022-12180-x"
|
14 |
+
|
15 |
+
DEFAULT_PATH_DOCKER = "/home/user/app"
|
16 |
+
|
17 |
+
def load_and_filter_data(protein_id, ligand_smiles):
|
18 |
+
|
19 |
+
# generate random short id, make short
|
20 |
+
random_id = str(uuid.uuid4())[:8]
|
21 |
+
|
22 |
+
print("Inference ID: ", random_id)
|
23 |
+
|
24 |
+
# check that ligand_smiles is not empty
|
25 |
+
if not ligand_smiles or ligand_smiles.strip() == "":
|
26 |
+
error_msg = f"!SMILES string is required"
|
27 |
+
raise gr.Error(error_msg, duration=5)
|
28 |
+
|
29 |
+
# Split the input SMILES string by ':' to get a list
|
30 |
+
smiles_list = ligand_smiles.split(':')
|
31 |
+
|
32 |
+
|
33 |
+
|
34 |
+
print("Smiles to predict: ", smiles_list)
|
35 |
+
print("Target Protein ID: ", protein_id)
|
36 |
+
|
37 |
+
# Validate SMILES
|
38 |
+
invalid_smiles = []
|
39 |
+
for smiles in smiles_list:
|
40 |
+
mol = Chem.MolFromSmiles(smiles.strip())
|
41 |
+
if mol is None:
|
42 |
+
invalid_smiles.append(smiles.strip())
|
43 |
+
|
44 |
+
|
45 |
+
|
46 |
+
if invalid_smiles:
|
47 |
+
error_msg = f"!Invalid 💥 SMILES string(s) : {', '.join(invalid_smiles)}"
|
48 |
+
raise gr.Error(error_msg, duration=5)
|
49 |
+
|
50 |
+
# Create tmp folder
|
51 |
+
os.makedirs(f"{DEFAULT_PATH_DOCKER}/example/tmp", exist_ok=True)
|
52 |
+
|
53 |
+
# Save SMILES to CSV
|
54 |
+
df = pd.DataFrame({"smiles": [s.strip() for s in smiles_list if s.strip()]})
|
55 |
+
df.to_csv(f"{DEFAULT_PATH_DOCKER}/example/tmp/{random_id}_input_smiles.csv", index=False)
|
56 |
+
|
57 |
+
# Run inference
|
58 |
+
args = ArgsInit().args
|
59 |
+
args.nclasses = 2
|
60 |
+
args.batch_size = 10
|
61 |
+
args.use_prot = True
|
62 |
+
args.freeze_molecule = True
|
63 |
+
args.conv_encode_edge = True
|
64 |
+
args.learn_t = True
|
65 |
+
args.binary = True
|
66 |
+
|
67 |
+
args.use_gpu = True
|
68 |
+
args.target = protein_id
|
69 |
+
args.target_list = f"{DEFAULT_PATH_DOCKER}/data/datasets/AD/Targets_Fasta.csv"
|
70 |
+
args.target_checkpoint_path = f"{DEFAULT_PATH_DOCKER}/pretrained-models/BINARY_{protein_id}"
|
71 |
+
args.input_file_smiles = f"{DEFAULT_PATH_DOCKER}/example/tmp/{random_id}_input_smiles.csv"
|
72 |
+
args.output_file = f"{DEFAULT_PATH_DOCKER}/example/tmp/{random_id}_output_predictions.csv"
|
73 |
+
|
74 |
+
|
75 |
+
print("Args: ", args)
|
76 |
+
main(args)
|
77 |
+
|
78 |
+
# Load the CSV file
|
79 |
+
df = pd.read_csv(f'{DEFAULT_PATH_DOCKER}/example/tmp/{random_id}_output_predictions.csv')
|
80 |
+
|
81 |
+
print("Prediction Results output: ", df)
|
82 |
+
return df
|
83 |
+
|
84 |
+
def load_description(fp):
|
85 |
+
with open(fp, 'r', encoding='utf-8') as f:
|
86 |
+
content = f.read()
|
87 |
+
return content
|
88 |
+
|
89 |
+
def run_inference(protein_id, ligand_smile):
|
90 |
+
result_df = load_and_filter_data(protein_id, ligand_smile)
|
91 |
+
return result_df
|
92 |
+
|
93 |
+
def create_interface():
|
94 |
+
with gr.Blocks(title="PLA-Net Web Inference") as inference:
|
95 |
+
gr.HTML(load_description("gradio/title.md"))
|
96 |
+
|
97 |
+
gr.Markdown("### Input")
|
98 |
+
with gr.Row():
|
99 |
+
with gr.Column():
|
100 |
+
gr.Markdown("#### Target Protein")
|
101 |
+
protein_id = gr.Dropdown(
|
102 |
+
choices=["ada"],
|
103 |
+
label="Target Protein ID",
|
104 |
+
info="Select the target protein from the dropdown menu.",
|
105 |
+
value="ada"
|
106 |
+
)
|
107 |
+
with gr.Column():
|
108 |
+
gr.Markdown("#### Ligand")
|
109 |
+
ligand_smile = gr.Textbox(
|
110 |
+
info="Provide SMILES input (separate multiple SMILES with ':' )",
|
111 |
+
placeholder="SMILES input",
|
112 |
+
label="SMILES string(s)",
|
113 |
+
)
|
114 |
+
gr.Examples(
|
115 |
+
examples=[
|
116 |
+
"Cn4c(CCC(=O)Nc3ccc2ccn(CC[C@H](CO)n1cnc(C(N)=O)c1)c2c3)nc5ccccc45",
|
117 |
+
"OCCCCCn1cnc2C(O)CN=CNc12",
|
118 |
+
"Nc4nc(c1ccco1)c3ncn(C(=O)NCCc2ccccc2)c3n4"
|
119 |
+
],
|
120 |
+
inputs=ligand_smile,
|
121 |
+
label="Example SMILES"
|
122 |
+
)
|
123 |
+
btn = gr.Button("Run")
|
124 |
+
gr.Markdown("### Output")
|
125 |
+
out = gr.Dataframe(
|
126 |
+
headers=["target", "smiles", "interaction_probability", "interaction_class"],
|
127 |
+
datatype=["str", "str", "number", "number"],
|
128 |
+
label="Prediction Results"
|
129 |
+
)
|
130 |
+
|
131 |
+
btn.click(fn=run_inference, inputs=[protein_id, ligand_smile], outputs=out)
|
132 |
+
|
133 |
+
gr.Markdown("""
|
134 |
+
PLA-Net model for predicting interactions
|
135 |
+
between small organic molecules and one of the 102 target proteins in the AD dataset. Graph representations
|
136 |
+
of the molecule and a given target protein are generated from SMILES and FASTA sequences and are used as
|
137 |
+
input to the Ligand Module (LM) and Protein Module (PM), respectively. Each module comprises a deep GCN
|
138 |
+
followed by an average pooling layer, which extracts relevant features of their corresponding input graph. Both
|
139 |
+
representations are finally concatenated and combined through a fully connected layer to predict the target–
|
140 |
+
ligand interaction probability.
|
141 |
+
""")
|
142 |
+
|
143 |
+
gr.Markdown("""
|
144 |
+
Ruiz Puentes, P., Rueda-Gensini, L., Valderrama, N. et al.
|
145 |
+
Predicting target–ligand interactions with graph convolutional networks
|
146 |
+
for interpretable pharmaceutical discovery. Sci Rep 12, 8434 (2022).
|
147 |
+
[https://doi.org/10.1038/s41598-022-12180-x](https://doi.org/10.1038/s41598-022-12180-x)
|
148 |
+
""")
|
149 |
+
|
150 |
+
return inference
|
151 |
+
|
152 |
+
if __name__ == "__main__":
|
153 |
+
interface = create_interface()
|
154 |
+
interface.launch()
|
data/__init__.py
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import sys
|
2 |
+
import os
|
3 |
+
ROOT_DIR = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
|
4 |
+
sys.path.append(ROOT_DIR)
|
data/dataset.py
ADDED
@@ -0,0 +1,418 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
import shutil, os
|
3 |
+
import os.path as osp
|
4 |
+
import numpy as np
|
5 |
+
from tqdm import tqdm
|
6 |
+
|
7 |
+
import torch
|
8 |
+
from torch_geometric.data import Data
|
9 |
+
from torch.autograd import Variable
|
10 |
+
|
11 |
+
from rdkit import Chem
|
12 |
+
|
13 |
+
from data.features import (
|
14 |
+
allowable_features,
|
15 |
+
atom_to_feature_vector,
|
16 |
+
bond_to_feature_vector,
|
17 |
+
atom_feature_vector_to_dict,
|
18 |
+
bond_feature_vector_to_dict,
|
19 |
+
)
|
20 |
+
|
21 |
+
from utils.data_util import one_hot_vector_sm, one_hot_vector_am, get_atom_feature_dims
|
22 |
+
|
23 |
+
|
24 |
+
def load_dataset(
|
25 |
+
cross_val, binary_task, target, args, use_prot=False, advs=False, test=False, inference=False, saliency=False
|
26 |
+
):
|
27 |
+
"""
|
28 |
+
Load data and return data in dataframes format for each split and the loader of each split.
|
29 |
+
Args:
|
30 |
+
cross_val (int): Data partition being used [1-4].
|
31 |
+
binary_tast (boolean): Whether to perform binary classification or multiclass classification.
|
32 |
+
target (string): Name of the protein target for binary classification.
|
33 |
+
args (parser): Complete arguments (configuration) of the model.
|
34 |
+
use_prot (boolean): Whether to use the PM module.
|
35 |
+
advs (boolean): Whether to train the LM module with adversarial augmentations.
|
36 |
+
test (boolean): Whether the model is being tested or trained.
|
37 |
+
Return:
|
38 |
+
train (loader): Training loader
|
39 |
+
valid (loader): Validation loader
|
40 |
+
test (loader): Test loader
|
41 |
+
data_train (dataframe): Training data dataframe
|
42 |
+
data_valid (dataframe): Validation data dataframe
|
43 |
+
data_test (dataframe): Test data dataframe
|
44 |
+
|
45 |
+
"""
|
46 |
+
# Read all data files
|
47 |
+
if binary_task:
|
48 |
+
path = "data/datasets/AD/"
|
49 |
+
add_val = '_AD'
|
50 |
+
else:
|
51 |
+
path = "data/datasets/DUDE/"
|
52 |
+
add_val = ''
|
53 |
+
|
54 |
+
data_test = pd.read_csv(
|
55 |
+
path + f"Smiles{add_val}_Test.csv", names=["Smiles", "Target", "Label"]
|
56 |
+
)
|
57 |
+
|
58 |
+
data_inference = pd.read_csv(
|
59 |
+
path + f"Inference.csv", names=["Smiles", "Target", "Label"]
|
60 |
+
)
|
61 |
+
if not test and not inference:
|
62 |
+
# Verify cross validation partition is defined
|
63 |
+
assert cross_val in [1, 2, 3, 4], "{} data partition is not defined".format(
|
64 |
+
cross_val
|
65 |
+
)
|
66 |
+
print("Loading data...")
|
67 |
+
|
68 |
+
A = pd.read_csv(
|
69 |
+
path + f"Smiles{add_val}_1.csv", names=["Smiles", "Target", "Label"]
|
70 |
+
)
|
71 |
+
B = pd.read_csv(
|
72 |
+
path + f"Smiles{add_val}_2.csv", names=["Smiles", "Target", "Label"]
|
73 |
+
)
|
74 |
+
C = pd.read_csv(
|
75 |
+
path + f"Smiles{add_val}_3.csv", names=["Smiles", "Target", "Label"]
|
76 |
+
)
|
77 |
+
D = pd.read_csv(
|
78 |
+
path + f"Smiles{add_val}_4.csv", names=["Smiles", "Target", "Label"]
|
79 |
+
)
|
80 |
+
|
81 |
+
if use_prot and binary_task:
|
82 |
+
data_target = pd.read_csv(
|
83 |
+
path + "Targets_Fasta.csv", names=["Fasta", "Target", "Label"]
|
84 |
+
)
|
85 |
+
else:
|
86 |
+
data_target = []
|
87 |
+
|
88 |
+
# Generate train and validation splits according to cross validation number
|
89 |
+
if cross_val == 1:
|
90 |
+
data_train = pd.concat([A, B, C], ignore_index=True)
|
91 |
+
data_val = D
|
92 |
+
elif cross_val == 2:
|
93 |
+
data_train = pd.concat([A, C, D], ignore_index=True)
|
94 |
+
data_val = B
|
95 |
+
elif cross_val == 3:
|
96 |
+
data_train = pd.concat([A, B, D], ignore_index=True)
|
97 |
+
data_val = C
|
98 |
+
elif cross_val == 4:
|
99 |
+
data_train = pd.concat([B, C, D], ignore_index=True)
|
100 |
+
data_val = A
|
101 |
+
|
102 |
+
# If in binary classification select data for the specific target being train
|
103 |
+
if binary_task:
|
104 |
+
data_train = data_train[data_train.Target == target]
|
105 |
+
data_val = data_val[data_val.Target == target]
|
106 |
+
data_test = data_test[data_test.Target == target]
|
107 |
+
if use_prot:
|
108 |
+
data_target = data_target[data_target.Target == target]
|
109 |
+
|
110 |
+
# Get dataset for each split
|
111 |
+
train = get_dataset(data_train, use_prot, data_target, args, advs)
|
112 |
+
valid = get_dataset(data_val, use_prot, data_target, args)
|
113 |
+
test = get_dataset(data_test, use_prot, data_target, args)
|
114 |
+
|
115 |
+
elif test and not inference:
|
116 |
+
# Read test data file
|
117 |
+
data_target = None
|
118 |
+
if binary_task:
|
119 |
+
data_test = data_test[data_test.Target == target]
|
120 |
+
if use_prot:
|
121 |
+
data_target = pd.read_csv(
|
122 |
+
path + "Targets_Fasta.csv", names=["Fasta", "Target", "Label"]
|
123 |
+
)
|
124 |
+
data_target = data_target[data_target.Target == target]
|
125 |
+
|
126 |
+
test = get_dataset(
|
127 |
+
data_test,
|
128 |
+
target=data_target,
|
129 |
+
use_prot=use_prot,
|
130 |
+
args=args,
|
131 |
+
advs=advs,
|
132 |
+
saliency=saliency,
|
133 |
+
)
|
134 |
+
# No need for these sets in test mode
|
135 |
+
train = []
|
136 |
+
valid = []
|
137 |
+
data_train = []
|
138 |
+
data_val = []
|
139 |
+
|
140 |
+
elif inference:
|
141 |
+
# Read inference data file
|
142 |
+
data_target = None
|
143 |
+
if binary_task:
|
144 |
+
data_inference = data_inference[data_inference.Target == target]
|
145 |
+
if use_prot:
|
146 |
+
data_target = pd.read_csv(
|
147 |
+
path + "Targets_Fasta.csv", names=["Fasta", "Target", "Label"]
|
148 |
+
)
|
149 |
+
data_target = data_target[data_target.Target == target]
|
150 |
+
|
151 |
+
test = get_dataset(
|
152 |
+
data_inference,
|
153 |
+
target=data_target,
|
154 |
+
use_prot=use_prot,
|
155 |
+
args=args,
|
156 |
+
advs=advs,
|
157 |
+
saliency=args.saliency,
|
158 |
+
)
|
159 |
+
# No need for these sets in test mode
|
160 |
+
train = []
|
161 |
+
valid = []
|
162 |
+
data_train = []
|
163 |
+
data_val = []
|
164 |
+
|
165 |
+
print("Done.")
|
166 |
+
return train, valid, test, data_train, data_val, data_test
|
167 |
+
|
168 |
+
|
169 |
+
def reload_dataset(cross_val, binary_task, target, args, advs=False):
|
170 |
+
print("Reloading data")
|
171 |
+
args.edge_dict = {}
|
172 |
+
if binary_task:
|
173 |
+
path = "data/datasets/AD/"
|
174 |
+
A = pd.read_csv(path + "Smiles_AD_1.csv", names=["Smiles", "Target", "Label"])
|
175 |
+
B = pd.read_csv(path + "Smiles_AD_2.csv", names=["Smiles", "Target", "Label"])
|
176 |
+
C = pd.read_csv(path + "Smiles_AD_3.csv", names=["Smiles", "Target", "Label"])
|
177 |
+
D = pd.read_csv(path + "Smiles_AD_4.csv", names=["Smiles", "Target", "Label"])
|
178 |
+
data_test = pd.read_csv(
|
179 |
+
path + "AD_Test.csv", names=["Smiles", "Target", "Label"]
|
180 |
+
)
|
181 |
+
|
182 |
+
if cross_val == 1:
|
183 |
+
data_train = pd.concat([A, B, C], ignore_index=True)
|
184 |
+
elif cross_val == 2:
|
185 |
+
data_train = pd.concat([A, C, D], ignore_index=True)
|
186 |
+
elif cross_val == 3:
|
187 |
+
data_train = pd.concat([A, B, D], ignore_index=True)
|
188 |
+
else:
|
189 |
+
data_train = pd.concat([B, C, D], ignore_index=True)
|
190 |
+
|
191 |
+
if binary_task:
|
192 |
+
data_train = data_train[data_train.Target == target]
|
193 |
+
|
194 |
+
train = get_dataset(data_train, args=args, advs=advs)
|
195 |
+
print("Done.")
|
196 |
+
|
197 |
+
return train, data_train
|
198 |
+
|
199 |
+
|
200 |
+
def smiles_to_graph(smiles_string, is_prot=False, received_mol=False):
|
201 |
+
"""
|
202 |
+
Converts SMILES string to graph Data object
|
203 |
+
:input: SMILES string (str)
|
204 |
+
:return: graph object
|
205 |
+
"""
|
206 |
+
|
207 |
+
if not is_prot:
|
208 |
+
mol = Chem.MolFromSmiles(smiles_string)
|
209 |
+
else:
|
210 |
+
mol = Chem.MolFromFASTA(smiles_string)
|
211 |
+
# atoms
|
212 |
+
atom_features_list = []
|
213 |
+
for atom in mol.GetAtoms():
|
214 |
+
ftrs = atom_to_feature_vector(atom)
|
215 |
+
atom_features_list.append(ftrs)
|
216 |
+
|
217 |
+
x = np.array(atom_features_list, dtype=np.int64)
|
218 |
+
|
219 |
+
# bonds
|
220 |
+
num_bond_features = 3 # bond type, bond stereo, is_conjugated
|
221 |
+
if len(mol.GetBonds()) > 0: # mol has bonds
|
222 |
+
edges_list = []
|
223 |
+
edge_features_list = []
|
224 |
+
for bond in mol.GetBonds():
|
225 |
+
i = bond.GetBeginAtomIdx()
|
226 |
+
j = bond.GetEndAtomIdx()
|
227 |
+
|
228 |
+
edge_feature = bond_to_feature_vector(bond)
|
229 |
+
|
230 |
+
# add edges in both directions
|
231 |
+
edges_list.append((i, j))
|
232 |
+
edge_features_list.append(edge_feature)
|
233 |
+
edges_list.append((j, i))
|
234 |
+
edge_features_list.append(edge_feature)
|
235 |
+
|
236 |
+
# data.edge_index: Graph connectivity in COO format with shape [2, num_edges]
|
237 |
+
edge_index = np.array(edges_list, dtype=np.int64).T
|
238 |
+
|
239 |
+
# data.edge_attr: Edge feature matrix with shape [num_edges, num_edge_features]
|
240 |
+
edge_attr = np.array(edge_features_list, dtype=np.int64)
|
241 |
+
|
242 |
+
else: # mol has no bonds
|
243 |
+
edge_index = np.empty((2, 0), dtype=np.int64)
|
244 |
+
edge_attr = np.empty((0, num_bond_features), dtype=np.int64)
|
245 |
+
|
246 |
+
return edge_attr, edge_index, x
|
247 |
+
|
248 |
+
|
249 |
+
def smiles_to_graph_advs(
|
250 |
+
smiles_string, args, advs=False, received_mol=False, saliency=False
|
251 |
+
):
|
252 |
+
"""
|
253 |
+
Converts SMILES string to graph Data object
|
254 |
+
:input: SMILES string (str)
|
255 |
+
:return: graph object
|
256 |
+
"""
|
257 |
+
if not received_mol:
|
258 |
+
mol = Chem.MolFromSmiles(smiles_string)
|
259 |
+
else:
|
260 |
+
mol = smiles_string
|
261 |
+
|
262 |
+
# atoms
|
263 |
+
atom_features_list = []
|
264 |
+
atom_feat_dims = get_atom_feature_dims()
|
265 |
+
|
266 |
+
for atom in mol.GetAtoms():
|
267 |
+
ftrs = atom_to_feature_vector(atom)
|
268 |
+
if saliency:
|
269 |
+
ftrs_oh = one_hot_vector_am(ftrs, atom_feat_dims)
|
270 |
+
atom_features_list.append(torch.unsqueeze(ftrs_oh, 0))
|
271 |
+
else:
|
272 |
+
atom_features_list.append(ftrs)
|
273 |
+
|
274 |
+
if saliency:
|
275 |
+
x = torch.cat(atom_features_list)
|
276 |
+
else:
|
277 |
+
x = np.array(atom_features_list, dtype=np.int64)
|
278 |
+
|
279 |
+
if advs:
|
280 |
+
# bonds
|
281 |
+
mol_edge_dict = {}
|
282 |
+
|
283 |
+
num_bond_features = 3 # bond type, bond stereo, is_conjugated
|
284 |
+
features_dim1 = torch.eye(5)
|
285 |
+
features_dim2 = torch.eye(6)
|
286 |
+
features_dim3 = torch.eye(2)
|
287 |
+
if len(mol.GetBonds()) > 0: # mol has bonds
|
288 |
+
edges_list = []
|
289 |
+
edge_features_list = []
|
290 |
+
for bond in mol.GetBonds():
|
291 |
+
i = bond.GetBeginAtomIdx()
|
292 |
+
j = bond.GetEndAtomIdx()
|
293 |
+
edge_feature = bond_to_feature_vector(bond)
|
294 |
+
|
295 |
+
# add edges in both directions
|
296 |
+
edges_list.append((i, j))
|
297 |
+
edges_list.append((j, i))
|
298 |
+
|
299 |
+
edge_feature_oh = one_hot_vector_sm(
|
300 |
+
edge_feature, features_dim1, features_dim2, features_dim3
|
301 |
+
)
|
302 |
+
if advs:
|
303 |
+
mol_edge_dict[(i, j)] = Variable(
|
304 |
+
torch.tensor([1.0]), requires_grad=True
|
305 |
+
)
|
306 |
+
|
307 |
+
# add edges in both directions
|
308 |
+
edge_features_list.append(
|
309 |
+
torch.unsqueeze(mol_edge_dict[(i, j)] * edge_feature_oh, 0)
|
310 |
+
)
|
311 |
+
edge_features_list.append(
|
312 |
+
torch.unsqueeze(mol_edge_dict[(i, j)] * edge_feature_oh, 0)
|
313 |
+
)
|
314 |
+
else:
|
315 |
+
# add edges in both directions
|
316 |
+
edge_features_list.append(torch.unsqueeze(edge_feature_oh, 0))
|
317 |
+
edge_features_list.append(torch.unsqueeze(edge_feature_oh, 0))
|
318 |
+
if advs:
|
319 |
+
# Update edge dict
|
320 |
+
args.edge_dict[smiles_string] = mol_edge_dict
|
321 |
+
|
322 |
+
# data.edge_index: Graph connectivity in COO format with shape [2, num_edges]
|
323 |
+
edge_index = np.array(edges_list, dtype=np.int64).T
|
324 |
+
# data.edge_attr: Edge feature matrix with shape [num_edges, num_edge_features]
|
325 |
+
|
326 |
+
edge_attr = torch.cat(edge_features_list)
|
327 |
+
|
328 |
+
else: # mol has no bonds
|
329 |
+
edge_index = np.empty((2, 0), dtype=np.int64)
|
330 |
+
edge_attr = np.empty((0, num_bond_features), dtype=np.int64)
|
331 |
+
args.edge_dict[smiles_string] = {}
|
332 |
+
|
333 |
+
return edge_attr, edge_index, x
|
334 |
+
|
335 |
+
|
336 |
+
def get_dataset(
|
337 |
+
dataset, use_prot=False, target=None, args=None, advs=False, saliency=False
|
338 |
+
):
|
339 |
+
total_dataset = []
|
340 |
+
if use_prot:
|
341 |
+
prot_graph = transform_molecule_pg(
|
342 |
+
target["Fasta"].item(), label=None, is_prot=use_prot
|
343 |
+
)
|
344 |
+
|
345 |
+
for mol, label in tqdm(
|
346 |
+
zip(dataset["Smiles"], dataset["Label"]), total=len(dataset["Smiles"])
|
347 |
+
):
|
348 |
+
if use_prot:
|
349 |
+
total_dataset.append(
|
350 |
+
[
|
351 |
+
transform_molecule_pg(mol, label, args, advs, saliency=saliency),
|
352 |
+
prot_graph,
|
353 |
+
]
|
354 |
+
)
|
355 |
+
else:
|
356 |
+
total_dataset.append(
|
357 |
+
transform_molecule_pg(mol, label, args, advs, saliency=saliency)
|
358 |
+
)
|
359 |
+
return total_dataset
|
360 |
+
|
361 |
+
|
362 |
+
def get_perturbed_dataset(mols, labels, args, valence=False):
|
363 |
+
total_dataset = []
|
364 |
+
for mol, label in zip(mols, labels):
|
365 |
+
total_dataset.append(transform_molecule_pg(mol, label, args, received_mol=True))
|
366 |
+
return total_dataset
|
367 |
+
|
368 |
+
|
369 |
+
def transform_molecule_pg(
|
370 |
+
smiles,
|
371 |
+
label,
|
372 |
+
args=None,
|
373 |
+
advs=False,
|
374 |
+
received_mol=False,
|
375 |
+
saliency=False,
|
376 |
+
is_prot=False,
|
377 |
+
):
|
378 |
+
|
379 |
+
if is_prot:
|
380 |
+
edge_attr_p, edge_index_p, x_p = smiles_to_graph(smiles, is_prot)
|
381 |
+
x_p = torch.tensor(x_p)
|
382 |
+
edge_index_p = torch.tensor(edge_index_p)
|
383 |
+
edge_attr_p = torch.tensor(edge_attr_p)
|
384 |
+
|
385 |
+
return Data(edge_attr=edge_attr_p, edge_index=edge_index_p, x=x_p)
|
386 |
+
|
387 |
+
else:
|
388 |
+
if args.advs or received_mol:
|
389 |
+
if advs or received_mol:
|
390 |
+
edge_attr, edge_index, x = smiles_to_graph_advs(
|
391 |
+
smiles,
|
392 |
+
args,
|
393 |
+
advs=True,
|
394 |
+
received_mol=received_mol,
|
395 |
+
saliency=saliency,
|
396 |
+
)
|
397 |
+
else:
|
398 |
+
edge_attr, edge_index, x = smiles_to_graph_advs(
|
399 |
+
smiles, args, received_mol=received_mol, saliency=saliency
|
400 |
+
)
|
401 |
+
else:
|
402 |
+
edge_attr, edge_index, x = smiles_to_graph(smiles)
|
403 |
+
|
404 |
+
if not saliency:
|
405 |
+
x = torch.tensor(x)
|
406 |
+
y = torch.tensor([label])
|
407 |
+
edge_index = torch.tensor(edge_index)
|
408 |
+
if not args.advs and not received_mol:
|
409 |
+
edge_attr = torch.tensor(edge_attr)
|
410 |
+
|
411 |
+
if received_mol:
|
412 |
+
mol = smiles
|
413 |
+
else:
|
414 |
+
mol = Chem.MolFromSmiles(smiles)
|
415 |
+
|
416 |
+
return Data(
|
417 |
+
edge_attr=edge_attr, edge_index=edge_index, x=x, y=y, mol=mol, smiles=smiles
|
418 |
+
)
|
data/dataset_saliency.py
ADDED
@@ -0,0 +1,378 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
import pandas as pd
|
3 |
+
import shutil, os
|
4 |
+
import os.path as osp
|
5 |
+
import numpy as np
|
6 |
+
from tqdm import tqdm
|
7 |
+
|
8 |
+
import torch
|
9 |
+
from torch_geometric.data import Data
|
10 |
+
from torch.autograd import Variable
|
11 |
+
|
12 |
+
from rdkit import Chem
|
13 |
+
|
14 |
+
from data.features import (
|
15 |
+
allowable_features,
|
16 |
+
atom_to_feature_vector,
|
17 |
+
bond_to_feature_vector,
|
18 |
+
atom_feature_vector_to_dict,
|
19 |
+
bond_feature_vector_to_dict,
|
20 |
+
)
|
21 |
+
|
22 |
+
from utils.data_util import one_hot_vector_sm, one_hot_vector_am, get_atom_feature_dims
|
23 |
+
|
24 |
+
|
25 |
+
def load_dataset(
|
26 |
+
cross_val, binary_task, target, args, use_prot=False, advs=False, test=False
|
27 |
+
):
|
28 |
+
"""
|
29 |
+
Load data and return data in dataframes format for each split and the loader of each split.
|
30 |
+
Args:
|
31 |
+
cross_val (int): Data partition being used [1-4].
|
32 |
+
binary_tast (boolean): Whether to perform binary classification or multiclass classification.
|
33 |
+
target (string): Name of the protein target for binary classification.
|
34 |
+
args (parser): Complete arguments (configuration) of the model.
|
35 |
+
use_prot (boolean): Whether to use the PM module.
|
36 |
+
advs (boolean): Whether to train the LM module with adversarial augmentations.
|
37 |
+
test (boolean): Whether the model is being tested or trained.
|
38 |
+
Return:
|
39 |
+
train (loader): Training loader
|
40 |
+
valid (loader): Validation loader
|
41 |
+
test (loader): Test loader
|
42 |
+
data_train (dataframe): Training data dataframe
|
43 |
+
data_valid (dataframe): Validation data dataframe
|
44 |
+
data_test (dataframe): Test data dataframe
|
45 |
+
|
46 |
+
"""
|
47 |
+
# TODO: NO QUEREMOS QUE ESTÉ LA PARTICIÓN DEL MULTICLASE?
|
48 |
+
# Read all data files
|
49 |
+
if not test:
|
50 |
+
# Verify cross validation partition is defined
|
51 |
+
assert cross_val in [1, 2, 3, 4], "{} data partition is not defined".format(
|
52 |
+
cross_val
|
53 |
+
)
|
54 |
+
print("Loading data...")
|
55 |
+
if binary_task:
|
56 |
+
path = "data/datasets/AD/"
|
57 |
+
A = pd.read_csv(
|
58 |
+
path + "Smiles_AD_1.csv", names=["Smiles", "Target", "Label"]
|
59 |
+
)
|
60 |
+
B = pd.read_csv(
|
61 |
+
path + "Smiles_AD_2.csv", names=["Smiles", "Target", "Label"]
|
62 |
+
)
|
63 |
+
C = pd.read_csv(
|
64 |
+
path + "Smiles_AD_3.csv", names=["Smiles", "Target", "Label"]
|
65 |
+
)
|
66 |
+
D = pd.read_csv(
|
67 |
+
path + "Smiles_AD_4.csv", names=["Smiles", "Target", "Label"]
|
68 |
+
)
|
69 |
+
data_test = pd.read_csv(
|
70 |
+
path + "AD_Test.csv", names=["Smiles", "Target", "Label"]
|
71 |
+
)
|
72 |
+
if use_prot:
|
73 |
+
data_target = pd.read_csv(
|
74 |
+
path + "Targets_Fasta.csv", names=["Fasta", "Target", "Label"]
|
75 |
+
)
|
76 |
+
else:
|
77 |
+
data_target = []
|
78 |
+
# Generate train and validation splits according to cross validation number
|
79 |
+
if cross_val == 1:
|
80 |
+
data_train = pd.concat([A, B, C], ignore_index=True)
|
81 |
+
data_val = D
|
82 |
+
elif cross_val == 2:
|
83 |
+
data_train = pd.concat([A, C, D], ignore_index=True)
|
84 |
+
data_val = B
|
85 |
+
elif cross_val == 3:
|
86 |
+
data_train = pd.concat([A, B, D], ignore_index=True)
|
87 |
+
data_val = C
|
88 |
+
elif cross_val == 4:
|
89 |
+
data_train = pd.concat([B, C, D], ignore_index=True)
|
90 |
+
data_val = A
|
91 |
+
# If in binary classification select data for the specific target being train
|
92 |
+
if binary_task:
|
93 |
+
data_train = data_train[data_train.Target == target]
|
94 |
+
data_val = data_val[data_val.Target == target]
|
95 |
+
data_test = data_test[data_test.Target == target]
|
96 |
+
if use_prot:
|
97 |
+
data_target = data_target[data_target.Target == target]
|
98 |
+
# Get dataset for each split
|
99 |
+
train = get_dataset(data_train, use_prot, data_target, args, advs)
|
100 |
+
valid = get_dataset(data_val, use_prot, data_target, args)
|
101 |
+
test = get_dataset(data_test, use_prot, data_target, args)
|
102 |
+
else:
|
103 |
+
# Read test data file
|
104 |
+
if binary_task:
|
105 |
+
path = "data/datasets/AD/"
|
106 |
+
data_test = pd.read_csv(
|
107 |
+
path + "Smiles_AD_Test.csv", names=["Smiles", "Target", "Label"]
|
108 |
+
)
|
109 |
+
data_test = data_test[data_test.Target == target]
|
110 |
+
if use_prot:
|
111 |
+
data_target = pd.read_csv(
|
112 |
+
path + "Targets_Fasta.csv", names=["Fasta", "Target", "Label"]
|
113 |
+
)
|
114 |
+
data_target = data_target[data_target.Target == target]
|
115 |
+
else:
|
116 |
+
data_target = []
|
117 |
+
test = get_dataset(data_test,target=data_target, use_prot=use_prot, args=args, advs=advs, saliency=args.saliency)
|
118 |
+
train = []
|
119 |
+
valid = []
|
120 |
+
data_train = []
|
121 |
+
data_val = []
|
122 |
+
print("Done.")
|
123 |
+
return train, valid, test, data_train, data_val, data_test
|
124 |
+
|
125 |
+
|
126 |
+
def reload_dataset(cross_val, binary_task, target, args, advs=False):
|
127 |
+
print("Reloading data")
|
128 |
+
args.edge_dict = {}
|
129 |
+
if binary_task:
|
130 |
+
path = "data/datasets/AD/"
|
131 |
+
A = pd.read_csv(path + "Smiles_AD_1.csv", names=["Smiles", "Target", "Label"])
|
132 |
+
B = pd.read_csv(path + "Smiles_AD_2.csv", names=["Smiles", "Target", "Label"])
|
133 |
+
C = pd.read_csv(path + "Smiles_AD_3.csv", names=["Smiles", "Target", "Label"])
|
134 |
+
D = pd.read_csv(path + "Smiles_AD_4.csv", names=["Smiles", "Target", "Label"])
|
135 |
+
data_test = pd.read_csv(
|
136 |
+
path + "AD_Test.csv", names=["Smiles", "Target", "Label"]
|
137 |
+
)
|
138 |
+
|
139 |
+
if cross_val == 1:
|
140 |
+
data_train = pd.concat([A, B, C], ignore_index=True)
|
141 |
+
elif cross_val == 2:
|
142 |
+
data_train = pd.concat([A, C, D], ignore_index=True)
|
143 |
+
elif cross_val == 3:
|
144 |
+
data_train = pd.concat([A, B, D], ignore_index=True)
|
145 |
+
else:
|
146 |
+
data_train = pd.concat([B, C, D], ignore_index=True)
|
147 |
+
|
148 |
+
if binary_task:
|
149 |
+
data_train = data_train[data_train.Target == target]
|
150 |
+
|
151 |
+
train = get_dataset(data_train, args=args, advs=advs)
|
152 |
+
print("Done.")
|
153 |
+
|
154 |
+
return train, data_train
|
155 |
+
|
156 |
+
|
157 |
+
def smiles_to_graph(smiles_string, is_prot=False, received_mol=False, saliency=False):
|
158 |
+
"""
|
159 |
+
Converts SMILES string to graph Data object
|
160 |
+
:input: SMILES string (str)
|
161 |
+
:return: graph object
|
162 |
+
"""
|
163 |
+
|
164 |
+
if not is_prot:
|
165 |
+
mol = Chem.MolFromSmiles(smiles_string)
|
166 |
+
else:
|
167 |
+
mol = Chem.MolFromFASTA(smiles_string)
|
168 |
+
# atoms
|
169 |
+
atom_features_list = []
|
170 |
+
atom_feat_dims = get_atom_feature_dims()
|
171 |
+
for atom in mol.GetAtoms():
|
172 |
+
ftrs = atom_to_feature_vector(atom)
|
173 |
+
if saliency:
|
174 |
+
ftrs_oh = one_hot_vector_am(ftrs, atom_feat_dims)
|
175 |
+
atom_features_list.append(torch.unsqueeze(ftrs_oh, 0))
|
176 |
+
else:
|
177 |
+
atom_features_list.append(ftrs)
|
178 |
+
|
179 |
+
if saliency:
|
180 |
+
x = torch.cat(atom_features_list)
|
181 |
+
else:
|
182 |
+
x = np.array(atom_features_list, dtype=np.int64)
|
183 |
+
|
184 |
+
# bonds
|
185 |
+
num_bond_features = 3 # bond type, bond stereo, is_conjugated
|
186 |
+
if len(mol.GetBonds()) > 0: # mol has bonds
|
187 |
+
edges_list = []
|
188 |
+
edge_features_list = []
|
189 |
+
for bond in mol.GetBonds():
|
190 |
+
i = bond.GetBeginAtomIdx()
|
191 |
+
j = bond.GetEndAtomIdx()
|
192 |
+
|
193 |
+
edge_feature = bond_to_feature_vector(bond)
|
194 |
+
|
195 |
+
# add edges in both directions
|
196 |
+
edges_list.append((i, j))
|
197 |
+
edge_features_list.append(edge_feature)
|
198 |
+
edges_list.append((j, i))
|
199 |
+
edge_features_list.append(edge_feature)
|
200 |
+
|
201 |
+
# data.edge_index: Graph connectivity in COO format with shape [2, num_edges]
|
202 |
+
edge_index = np.array(edges_list, dtype=np.int64).T
|
203 |
+
|
204 |
+
# data.edge_attr: Edge feature matrix with shape [num_edges, num_edge_features]
|
205 |
+
edge_attr = np.array(edge_features_list, dtype=np.int64)
|
206 |
+
|
207 |
+
else: # mol has no bonds
|
208 |
+
edge_index = np.empty((2, 0), dtype=np.int64)
|
209 |
+
edge_attr = np.empty((0, num_bond_features), dtype=np.int64)
|
210 |
+
|
211 |
+
return edge_attr, edge_index, x
|
212 |
+
|
213 |
+
|
214 |
+
def smiles_to_graph_advs(
|
215 |
+
smiles_string, args, advs=False, received_mol=False, saliency=False
|
216 |
+
):
|
217 |
+
"""
|
218 |
+
Converts SMILES string to graph Data object
|
219 |
+
:input: SMILES string (str)
|
220 |
+
:return: graph object
|
221 |
+
"""
|
222 |
+
if not received_mol:
|
223 |
+
mol = Chem.MolFromSmiles(smiles_string)
|
224 |
+
else:
|
225 |
+
mol = smiles_string
|
226 |
+
|
227 |
+
# atoms
|
228 |
+
atom_features_list = []
|
229 |
+
atom_feat_dims = get_atom_feature_dims()
|
230 |
+
|
231 |
+
for atom in mol.GetAtoms():
|
232 |
+
ftrs = atom_to_feature_vector(atom)
|
233 |
+
if saliency:
|
234 |
+
ftrs_oh = one_hot_vector_am(ftrs, atom_feat_dims)
|
235 |
+
atom_features_list.append(torch.unsqueeze(ftrs_oh, 0))
|
236 |
+
else:
|
237 |
+
atom_features_list.append(ftrs)
|
238 |
+
|
239 |
+
if saliency:
|
240 |
+
x = torch.cat(atom_features_list)
|
241 |
+
else:
|
242 |
+
x = np.array(atom_features_list, dtype=np.int64)
|
243 |
+
|
244 |
+
if advs:
|
245 |
+
# bonds
|
246 |
+
mol_edge_dict = {}
|
247 |
+
|
248 |
+
num_bond_features = 3 # bond type, bond stereo, is_conjugated
|
249 |
+
features_dim1 = torch.eye(5)
|
250 |
+
features_dim2 = torch.eye(6)
|
251 |
+
features_dim3 = torch.eye(2)
|
252 |
+
if len(mol.GetBonds()) > 0: # mol has bonds
|
253 |
+
edges_list = []
|
254 |
+
edge_features_list = []
|
255 |
+
for bond in mol.GetBonds():
|
256 |
+
i = bond.GetBeginAtomIdx()
|
257 |
+
j = bond.GetEndAtomIdx()
|
258 |
+
edge_feature = bond_to_feature_vector(bond)
|
259 |
+
|
260 |
+
# add edges in both directions
|
261 |
+
edges_list.append((i, j))
|
262 |
+
edges_list.append((j, i))
|
263 |
+
|
264 |
+
edge_feature_oh = one_hot_vector_sm(
|
265 |
+
edge_feature, features_dim1, features_dim2, features_dim3
|
266 |
+
)
|
267 |
+
if advs:
|
268 |
+
mol_edge_dict[(i, j)] = Variable(
|
269 |
+
torch.tensor([1.0]), requires_grad=True
|
270 |
+
)
|
271 |
+
|
272 |
+
# add edges in both directions
|
273 |
+
edge_features_list.append(
|
274 |
+
torch.unsqueeze(mol_edge_dict[(i, j)] * edge_feature_oh, 0)
|
275 |
+
)
|
276 |
+
edge_features_list.append(
|
277 |
+
torch.unsqueeze(mol_edge_dict[(i, j)] * edge_feature_oh, 0)
|
278 |
+
)
|
279 |
+
else:
|
280 |
+
# add edges in both directions
|
281 |
+
edge_features_list.append(torch.unsqueeze(edge_feature_oh, 0))
|
282 |
+
edge_features_list.append(torch.unsqueeze(edge_feature_oh, 0))
|
283 |
+
if advs:
|
284 |
+
# Update edge dict
|
285 |
+
args.edge_dict[smiles_string] = mol_edge_dict
|
286 |
+
|
287 |
+
# data.edge_index: Graph connectivity in COO format with shape [2, num_edges]
|
288 |
+
edge_index = np.array(edges_list, dtype=np.int64).T
|
289 |
+
# data.edge_attr: Edge feature matrix with shape [num_edges, num_edge_features]
|
290 |
+
|
291 |
+
edge_attr = torch.cat(edge_features_list)
|
292 |
+
|
293 |
+
else: # mol has no bonds
|
294 |
+
edge_index = np.empty((2, 0), dtype=np.int64)
|
295 |
+
edge_attr = np.empty((0, num_bond_features), dtype=np.int64)
|
296 |
+
args.edge_dict[smiles_string] = {}
|
297 |
+
|
298 |
+
return edge_attr, edge_index, x
|
299 |
+
|
300 |
+
|
301 |
+
def get_dataset(
|
302 |
+
dataset, use_prot=False, target=None, args=None, advs=False, saliency=False
|
303 |
+
):
|
304 |
+
total_dataset = []
|
305 |
+
if use_prot:
|
306 |
+
prot_graph = transform_molecule_pg(
|
307 |
+
target["Fasta"].item(), label=None, is_prot=use_prot
|
308 |
+
)
|
309 |
+
|
310 |
+
for mol, label in tqdm(
|
311 |
+
zip(dataset["Smiles"], dataset["Label"]), total=len(dataset["Smiles"])
|
312 |
+
):
|
313 |
+
if use_prot:
|
314 |
+
total_dataset.append([transform_molecule_pg(mol,label,args, advs, saliency=saliency),prot_graph])
|
315 |
+
else:
|
316 |
+
total_dataset.append(
|
317 |
+
transform_molecule_pg(mol, label, args, advs, saliency=saliency)
|
318 |
+
)
|
319 |
+
return total_dataset
|
320 |
+
|
321 |
+
|
322 |
+
def get_perturbed_dataset(mols, labels, args):
|
323 |
+
total_dataset = []
|
324 |
+
for mol, label in zip(mols, labels):
|
325 |
+
total_dataset.append(transform_molecule_pg(mol, label, args, received_mol=True))
|
326 |
+
return total_dataset
|
327 |
+
|
328 |
+
|
329 |
+
def transform_molecule_pg(
|
330 |
+
smiles,
|
331 |
+
label,
|
332 |
+
args=None,
|
333 |
+
advs=False,
|
334 |
+
received_mol=False,
|
335 |
+
saliency=False,
|
336 |
+
is_prot=False,
|
337 |
+
):
|
338 |
+
|
339 |
+
if is_prot:
|
340 |
+
edge_attr_p, edge_index_p, x_p = smiles_to_graph(smiles, is_prot)
|
341 |
+
x_p = torch.tensor(x_p)
|
342 |
+
edge_index_p = torch.tensor(edge_index_p)
|
343 |
+
edge_attr_p = torch.tensor(edge_attr_p)
|
344 |
+
|
345 |
+
return Data(edge_attr=edge_attr_p, edge_index=edge_index_p, x=x_p)
|
346 |
+
|
347 |
+
else:
|
348 |
+
if args.advs or received_mol:
|
349 |
+
if advs or received_mol:
|
350 |
+
edge_attr, edge_index, x = smiles_to_graph_advs(
|
351 |
+
smiles,
|
352 |
+
args,
|
353 |
+
advs=True,
|
354 |
+
received_mol=received_mol,
|
355 |
+
saliency=saliency,
|
356 |
+
)
|
357 |
+
else:
|
358 |
+
edge_attr, edge_index, x = smiles_to_graph_advs(
|
359 |
+
smiles, args, received_mol=received_mol, saliency=saliency
|
360 |
+
)
|
361 |
+
else:
|
362 |
+
edge_attr, edge_index, x = smiles_to_graph(smiles, saliency=saliency)
|
363 |
+
|
364 |
+
if not saliency:
|
365 |
+
x = torch.tensor(x)
|
366 |
+
y = torch.tensor([label])
|
367 |
+
edge_index = torch.tensor(edge_index)
|
368 |
+
if not args.advs and not received_mol:
|
369 |
+
edge_attr = torch.tensor(edge_attr)
|
370 |
+
|
371 |
+
if received_mol:
|
372 |
+
mol = smiles
|
373 |
+
else:
|
374 |
+
mol = Chem.MolFromSmiles(smiles)
|
375 |
+
|
376 |
+
return Data(
|
377 |
+
edge_attr=edge_attr, edge_index=edge_index, x=x, y=y, mol=mol, smiles=smiles
|
378 |
+
)
|
data/datasets/AD/Inference.csv
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
OCCCCCn1cnc2C(O)CN=CNc12,ada,0
|
data/datasets/AD/Smiles_AD_1.csv
ADDED
The diff for this file is too large to render.
See raw diff
|
|
data/datasets/AD/Smiles_AD_2.csv
ADDED
The diff for this file is too large to render.
See raw diff
|
|
data/datasets/AD/Smiles_AD_3.csv
ADDED
The diff for this file is too large to render.
See raw diff
|
|
data/datasets/AD/Smiles_AD_4.csv
ADDED
The diff for this file is too large to render.
See raw diff
|
|
data/datasets/AD/Smiles_AD_Test.csv
ADDED
The diff for this file is too large to render.
See raw diff
|
|
data/datasets/AD/Targets_Fasta.csv
ADDED
@@ -0,0 +1,102 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
MSDVAIVKEGWLHKRGEYIKTWRPRYFLLKNDGTFIGYKERPQDVDQREAPLNNFSVAQCQLMKTERPRPNTFIIRCLQWTTVIERTFHVETPEEREEWTTAIQTVADGLKKQEEEEMDFRSGSPSDNSGAEEMEVSLAKPKHRVTMNEFEYLKLLGKGTFGKVILVKEKATGRYYAMKILKKEVIVAKDEVAHTLTENRVLQNSRHPFLTALKYSFQTHDRLCFVMEYANGGELFFHLSRERVFSEDRARFYGAEIVSALDYLHSEKNVVYRDLKLENLMLDKDGHIKITDFGLCKEGIKDGATMKTFCGTPEYLAPEVLEDNDYGRAVDWWGLGVVMYEMMCGRLPFYNQDHEKLFELILMEEIRFPRTLGPEAKSLLSGLLKKDPKQRLGGGSEDAKEIMQHRFFAGIVWQHVYEKKLSPPFKPQVTSETDTRYFDEEFTAQMITITPPDQDDSMECVDSERRPHFPQFSYSASGTA,akt1,0
|
2 |
+
MLSNSQGQSPPVPFPAPAPPPQPPTPALPHPPAQPPPPPPQQFPQFHVKSGLQIKKNAIIDDYKVTSQVLGLGINGKVLQIFNKRTQEKFALKMLQDCPKARREVELHWRASQCPHIVRIVDVYENLYAGRKCLLIVMECLDGGELFSRIQDRGDQAFTEREASEIMKSIGEAIQYLHSINIAHRDVKPENLLYTSKRPNAILKLTDFGFAKETTSHNSLTTPCYTPYYVAPEVLGPEKYDKSCDMWSLGVIMYILLCGYPPFYSNHGLAISPGMKTRIRMGQYEFPNPEWSEVSEEVKMLIRNLLKTEPTQRMTITEFMNHPWIMQSTKVPQTPLHTSRVLKEDKERWEDVKEEMTSALATMRVDYEQIKIKKIEDASNPLLLKRRKKARALEAAALAH,mapk2,1
|
3 |
+
MAHVRGLQLPGCLALAALCSLVHSQHVFLAPQQARSLLQRVRRANTFLEEVRKGNLERECVEETCSYEEAFEALESSTATDVFWAKYTACETARTPRDKLAACLEGNCAEGLGTNYRGHVNITRSGIECQLWRSRYPHKPEINSTTHPGADLQENFCRNPDSSTTGPWCYTTDPTVRRQECSIPVCGQDQVTVAMTPRSEGSSVNLSPPLEQCVPDRGQQYQGRLAVTTHGLPCLAWASAQAKALSKHQDFNSAVQLVENFCRNPDGDEEGVWCYVAGKPGDFGYCDLNYCEEAVEEETGDGLDEDSDRAIEGRTATSEYQTFFNPRTFGSGEADCGLRPLFEKKSLEDKTERELLESYIDGRIVEGSDAEIGMSPWQVMLFRKSPQELLCGASLISDRWVLTAAHCLLYPPWDKNFTENDLLVRIGKHSRTRYERNIEKISMLEKIYIHPRYNWRENLDRDIALMKLKKPVAFSDYIHPVCLPDRETAASLLQAGYKGRVTGWGNLKETWTANVGKGQPSVLQVVNLPIVERPVCKDSTRIRITDNMFCAGYKPDEGKRGDACEGDSGGPFVMKSPFNNRWYQMGIVSWGEGCDRDGKYGFYTHVFRLKKWIQKVIDQFGE,thrb,2
|
4 |
+
MWSWKCLLFWAVLVTATLCTARPSPTLPEQAQPWGAPVEVESFLVHPGDLLQLRCRLRDDVQSINWLRDGVQLAESNRTRITGEEVEVQDSVPADSGLYACVTSSPSGSDTTYFSVNVSDALPSSEDDDDDDDSSSEEKETDNTKPNRMPVAPYWTSPEKMEKKLHAVPAAKTVKFKCPSSGTPNPTLRWLKNGKEFKPDHRIGGYKVRYATWSIIMDSVVPSDKGNYTCIVENEYGSINHTYQLDVVERSPHRPILQAGLPANKTVALGSNVEFMCKVYSDPQPHIQWLKHIEVNGSKIGPDNLPYVQILKTAGVNTTDKEMEVLHLRNVSFEDAGEYTCLAGNSIGLSHHSAWLTVLEALEERPAVMTSPLYLEIIIYCTGAFLISCMVGSVIVYKMKSGTKKSDFHSQMAVHKLAKSIPLRRQVTVSADSSASMNSGVLLVRPSRLSSSGTPMLAGVSEYELPEDPRWELPRDRLVLGKPLGEGCFGQVVLAEAIGLDKDKPNRVTKVAVKMLKSDATEKDLSDLISEMEMMKMIGKHKNIINLLGACTQDGPLYVIVEYASKGNLREYLQARRPPGLEYCYNPSHNPEEQLSSKDLVSCAYQVARGMEYLASKKCIHRDLAARNVLVTEDNVMKIADFGLARDIHHIDYYKKTTNGRLPVKWMAPEALFDRIYTHQSDVWSFGVLLWEIFTLGGSPYPGVPVEELFKLLKEGHRMDKPSNCTNELYMMMRDCWHAVPSQRPTFKQLVEDLDRIVALTSNQEYLDLSMPLDQYSPSFPDTRSSTCSSGEDSVFSHEPLPEEPCLPRHPAQLANGGLKRR,fgfr1,3
|
5 |
+
MRALLARLLLCVLVVSDSKGSNELHQVPSNCDCLNGGTCVSNKYFSNIHWCNCPKKFGGQHCEIDKSKTCYEGNGHFYRGKASTDTMGRPCLPWNSATVLQQTYHAHRSDALQLGLGKHNYCRNPDNRRRPWCYVQVGLKPLVQECMVHDCADGKKPSSPPEELKFQCGQKTLRPRFKIIGGEFTTIENQPWFAAIYRRHRGGSVTYVCGGSLISPCWVISATHCFIDYPKKEDYIVYLGRSRLNSNTQGEMKFEVENLILHKDYSADTLAHHNDIALLKIRSKEGRCAQPSRTIQTICLPSMYNDPQFGTSCEITGFGKENSTDYLYPEQLKMTVVKLISHRECQQPHYYGSEVTTKMLCAADPQWKTDSCQGDSGGPLVCSLQGRMTLTGIVSWGRGCALKDKPGVYTRVSHFLPWIRSHTKEENGLAL,urok,4
|
6 |
+
MGMACLTMTEMEGTSTSSIYQNGDISGNANSMKQIDPVLQVYLYHSLGKSEADYLTFPSGEYVAEEICIAASKACGITPVYHNMFALMSETERIWYPPNHVFHIDESTRHNVLYRIRFYFPRWYCSGSNRAYRHGISRGAEAPLLDDFVMSYLFAQWRHDFVHGWIKVPVTHETQEECLGMAVLDMMRIAKENDQTPLAIYNSISYKTFLPKCIRAKIQDYHILTRKRIRYRFRRFIQQFSQCKATARNLKLKYLINLETLQSAFYTEKFEVKEPGSGPSGEEIFATIIITGNGGIQWSRGKHKESETLTEQDLQLYCDFPNIIDVSIKQANQEGSNESRVVTIHKQDGKNLEIELSSLREALSFVSLIDGYYRLTADAHHYLCKEVAPPAVLENIQSNCHGPISMDFAISKLKKAGNQTGLYVLRCSPKDFNKYFLTFAVERENVIEYKHCLITKNENEEYNLSGTKKNFSSLKDLLNCYQMETVRSDNIIFQFTKCCPPKPKDKSNLLVFRTNGVSDVPTSPTLQRPTHMNQMVFHKIRNEDLIFNESLGQGTFTKIFKGVRREVGDYGQLHETEVLLKVLDKAHRNYSESFFEAASMMSKLSHKHLVLNYGVCVCGDENILVQEFVKFGSLDTYLKKNKNCINILWKLEVAKQLAWAMHFLEENTLIHGNVCAKNILLIREEDRKTGNPPFIKLSDPGISITVLPKDILQERIPWVPPECIENPKNLNLATDKWSFGTTLWEICSGGDKPLSALDSQRKLQFYEDRHQLPAPKWAELANLINNCMDYEPDFRPSFRAIIRDLNSLFTPDYELLTENDMLPNMRIGALGFSGAFEDRDPTQFEERHLKFLQQLGKGNFGSVEMCRYDPLQDNTGEVVAVKKLQHSTEEHLRDFEREIEILKSLQHDNIVKYKGVCYSAGRRNLKLIMEYLPYGSLRDYLQKHKERIDHIKLLQYTSQICKGMEYLGTKRYIHRDLATRNILVENENRVKIGDFGLTKVLPQDKEYYKVKEPGESPIFWYAPESLTESKFSVASDVWSFGVVLYELFTYIEKSKSPPAEFMRMIGNDKQGQMIVFHLIELLKNNGRLPRPDGCPDEIYMIMTECWNNNVNQRPSFRDLALRVDQIRDNMAG,jak2,5
|
7 |
+
MKTLLLLAVIMIFGLLQAHGNLVNFHRMIKLTTGKEAALSYGFYGCHCGVGGRGSPKDATDRCCVTHDCCYKRLEKRGCGTKFLSYKFSNSGSRITCAKQDSCRSQLCECDKAAATCFARNKTTYNKKYQYYSNKHCRGSTPRC,pa2ga,6
|
8 |
+
MRQSLLFLTSVVPFVLAPRPPDDPGFGPHQRLEKLDSLLSDYDILSLSNIQQHSVRKRDLQTSTHVETLLTFSALKRHFKLYLTSSTERFSQNFKVVVVDGKNESEYTVKWQDFFTGHVVGEPDSRVLAHIRDDDVIIRINTDGAEYNIEPLWRFVNDTKDKRMLVYKSEDIKNVSRLQSPKVCGYLKVDNEELLPKGLVDREPPEELVHRVKRRADPDPMKNTCKLLVVADHRFYRYMGRGEESTTTNYLIELIDRVDDIYRNTSWDNAGFKGYGIQIEQIRILKSPQEVKPGEKHYNMAKSYPNEEKDAWDVKMLLEQFSFDIAEEASKVCLAHLFTYQDFDMGTLGLAYVGSPRANSHGGVCPKAYYSPVGKKNIYLNSGLTSTKNYGKTILTKEADLVTTHELGHNFGAEHDPDGLAECAPNEDQGGKYVMYPIAVSGDHENNKMFSNCSKQSIYKTIESKAQECFQERSNKVCGNSRVDEGEECDPGIMYLNNDTCCNSDCTLKEGVQCSDRNSPCCKNCQFETAQKKCQEAINATCKGVSYCTGNSSECPPPGNAEDDTVCLDLGKCKDGKCIPFCEREQQLESCACNETDNSCKVCCRDLSGRCVPYVDAEQKNLFLRKGKPCTVGFCDMNGKCEKRVQDVIERFWDFIDQLSINTFGKFLADNIVGSVLVFSLIFWIPFSILVHCVDKKLDKQYESLSLFHPSNVEMLSSMDSASVRIIKPFPAPQTPGRLQPAPVIPSAPAAPKLDHQRMDTIQEDPSTDSHMDEDGFEKDPFPNSSTAAKSFEDLTDHPVTRSEKAASFKLQRQNRVDSKETEC,ada17,7
|
9 |
+
MEVQLGLGRVYPRPPSKTYRGAFQNLFQSVREVIQNPGPRHPEAASAAPPGASLLLLQQQQQQQQQQQQQQQQQQQQQQQETSPRQQQQQQGEDGSPQAHRRGPTGYLVLDEEQQPSQPQSALECHPERGCVPEPGAAVAASKGLPQQLPAPPDEDDSAAPSTLSLLGPTFPGLSSCSADLKDILSEASTMQLLQQQQQEAVSEGSSSGRAREASGAPTSSKDNYLGGTSTISDNAKELCKAVSVSMGLGVEALEHLSPGEQLRGDCMYAPLLGVPPAVRPTPCAPLAECKGSLLDDSAGKSTEDTAEYSPFKGGYTKGLEGESLGCSGSAAAGSSGTLELPSTLSLYKSGALDEAAAYQSRDYYNFPLALAGPPPPPPPPHPHARIKLENPLDYGSAWAAAAAQCRYGDLASLHGAGAAGPGSGSPSAAASSSWHTLFTAEEGQLYGPCGGGGGGGGGGGGGGGGGGGGGGGEAGAVAPYGYTRPPQGLAGQESDFTAPDVWYPGGMVSRVPYPSPTCVKSEMGPWMDSYSGPYGDMRLETARDHVLPIDYYFPPQKTCLICGDEASGCHYGALTCGSCKVFFKRAAEGKQKYLCASRNDCTIDKFRRKNCPSCRLRKCYEAGMTLGARKLKKLGNLKLQEEGEASSTTSPTEETTQKLTVSHIEGYECQPIFLNVLEAIEPGVVCAGHDNNQPDSFAALLSSLNELGERQLVHVVKWAKALPGFRNLHVDDQMAVIQYSWMGLMVFAMGWRSFTNVNSRMLYFAPDLVFNEYRMHKSRMYSQCVRMRHLSQEFGWLQITPQEFLCMKALLLFSIIPVDGLKNQKFFDELRMNYIKELDRIIACKRKNPTSCSRRFYQLTKLLDSVQPIARELHQFTFDLLIKSHMVSVDFPEMMAEIISVQVPKILSGKVKPIYFHTQ,andr,8
|
10 |
+
MKDSCITVMAMALLSGFFFFAPASSYNLDVRGARSFSPPRAGRHFGYRVLQVGNGVIVGAPGEGNSTGSLYQCQSGTGHCLPVTLRGSNYTSKYLGMTLATDPTDGSILACDPGLSRTCDQNTYLSGLCYLFRQNLQGPMLQGRPGFQECIKGNVDLVFLFDGSMSLQPDEFQKILDFMKDVMKKLSNTSYQFAAVQFSTSYKTEFDFSDYVKRKDPDALLKHVKHMLLLTNTFGAINYVATEVFREELGARPDATKVLIIITDGEATDSGNIDAAKDIIRYIIGIGKHFQTKESQETLHKFASKPASEFVKILDTFEKLKDLFTELQKKIYVIEGTSKQDLTSFNMELSSSGISADLSRGHAVVGAVGAKDWAGGFLDLKADLQDDTFIGNEPLTPEVRAGYLGYTVTWLPSRQKTSLLASGAPRYQHMGRVLLFQEPQGGGHWSQVQTIHGTQIGSYFGGELCGVDVDQDGETELLLIGAPLFYGEQRGGRVFIYQRRQLGFEEVSELQGDPGYPLGRFGEAITALTDINGDGLVDVAVGAPLEEQGAVYIFNGRHGGLSPQPSQRIEGTQVLSGIQWFGRSIHGVKDLEGDGLADVAVGAESQMIVLSSRPVVDMVTLMSFSPAEIPVHEVECSYSTSNKMKEGVNITICFQIKSLIPQFQGRLVANLTYTLQLDGHRTRRRGLFPGGRHELRRNIAVTTSMSCTDFSFHFPVCVQDLISPINVSLNFSLWEEEGTPRDQRAQGKDIPPILRPSLHSETWEIPFEKNCGEDKKCEANLRVSFSPARSRALRLTAFASLSVELSLSNLEEDAYWVQLDLHFPPGLSFRKVEMLKPHSQIPVSCEELPEESRLLSRALSCNVSSPIFKAGHSVALQMMFNTLVNSSWGDSVELHANVTCNNEDSDLLEDNSATTIIPILYPINILIQDQEDSTLYVSFTPKGPKIHQVKHMYQVRIQPSIHDHNIPTLEAVVGVPQPPSEGPITHQWSVQMEPPVPCHYEDLERLPDAAEPCLPGALFRCPVVFRQEILVQVIGTLELVGEIEASSMFSLCSSLSISFNSSKHFHLYGSNASLAQVVMKVDVVYEKQMLYLYVLSGIGGLLLLLLIFIVLYKVGFFKRNLKEKMEAGRGVPNGIPAEDSEQLASGQEAGDPGCLKPLHEKDSESGGGKD,ital,9
|
11 |
+
MAALSGGGGGGAEPGQALFNGDMEPEAGAGAGAAASSAADPAIPEEVWNIKQMIKLTQEHIEALLDKFGGEHNPPSIYLEAYEEYTSKLDALQQREQQLLESLGNGTDFSVSSSASMDTVTSSSSSSLSVLPSSLSVFQNPTDVARSNPKSPQKPIVRVFLPNKQRTVVPARCGVTVRDSLKKALMMRGLIPECCAVYRIQDGEKKPIGWDTDISWLTGEELHVEVLENVPLTTHNFVRKTFFTLAFCDFCRKLLFQGFRCQTCGYKFHQRCSTEVPLMCVNYDQLDLLFVSKFFEHHPIPQEEASLAETALTSGSSPSAPASDSIGPQILTSPSPSKSIPIPQPFRPADEDHRNQFGQRDRSSSAPNVHINTIEPVNIDDLIRDQGFRGDGGSTTGLSATPPASLPGSLTNVKALQKSPGPQRERKSSSSSEDRNRMKTLGRRDSSDDWEIPDGQITVGQRIGSGSFGTVYKGKWHGDVAVKMLNVTAPTPQQLQAFKNEVGVLRKTRHVNILLFMGYSTKPQLAIVTQWCEGSSLYHHLHIIETKFEMIKLIDIARQTAQGMDYLHAKSIIHRDLKSNNIFLHEDLTVKIGDFGLATVKSRWSGSHQFEQLSGSILWMAPEVIRMQDKNPYSFQSDVYAFGIVLYELMTGQLPYSNINNRDQIIFMVGRGYLSPDLSKVRSNCPKAMKRLMAECLKKKRDERPLFPQILASIELLARSLPKIHRSASEPSLNRAGFQTEDFSLYACASPKTPIQAGGYGAFPVH,braf,10
|
12 |
+
MTPNSMTENGLTAWDKPKHCPDREHDWKLVGMSEACLHRKSHSERRSTLKNEQSSPHLIQTTWTSSIFHLDHDDVNDQSVSSAQTFQTEEKKCKGYIPSYLDKDELCVVCGDKATGYHYRCITCEGCKGFFRRTIQKNLHPSYSCKYEGKCVIDKVTRNQCQECRFKKCIYVGMATDLVLDDSKRLAKRKLIEENREKRRREELQKSIGHKPEPTDEEWELIKTVTEAHVATNAQGSHWKQKRKFLPEDIGQAPIVNAPEGGKVDLEAFSHFTKIITPAITRVVDFAKKLPMFCELPCEDQIILLKGCCMEIMSLRAAVRYDPESETLTLNGEMAVTRGQLKNGGLGVVSDAIFDLGMSLSSFNLDDTEVALLQAVLLMSSDRPGLACVERIEKYQDSFLLAFEHYINYRKHHVTHFWPKLLMKVTDLRMIGACHASRFLHMKVECPTELFPPLFLEVFED,thb,11
|
13 |
+
MEHGTLLAQPGLWTRDTSWALLYFLCYILPQTAPQVLRIGGIFETVENEPVNVEELAFKFAVTSINRNRTLMPNTTLTYDIQRINLFDSFEASRRACDQLALGVAALFGPSHSSSVSAVQSICNALEVPHIQTRWKHPSVDNKDLFYINLYPDYAAISRAILDLVLYYNWKTVTVVYEDSTGLIRLQELIKAPSRYNIKIKIRQLPSGNKDAKPLLKEMKKGKEFYVIFDCSHETAAEILKQILFMGMMTEYYHYFFTTLDLFALDLELYRYSGVNMTGFRLLNIDNPHVSSIIEKWSMERLQAPPRPETGLLDGMMTTEAALMYDAVYMVAIASHRASQLTVSSLQCHRHKPWRLGPRFMNLIKEARWDGLTGHITFNKTNGLRKDFDLDIISLKEEGTEKAAGEVSKHLYKVWKKIGIWNSNSGLNMTDSNKDKSSNITDSLANRTLIVTTILEEPYVMYRKSDKPLYGNDRFEGYCLDLLKELSNILGFIYDVKLVPDGKYGAQNDKGEWNGMVKELIDHRADLAVAPLTITYVREKVIDFSKPFMTLGISILYRKPNGTNPGVFSFLNPLSPDIWMYVLLACLGVSCVLFVIARFTPYEWYNPHPCNPDSDVVENNFTLLNSFWFGVGALMQQGSELMPKALSTRIVGGIWWFFTLIIISSYTANLAAFLTVERMESPIDSADDLAKQTKIEYGAVRDGSTMTFFKKSKISTYEKMWAFMSSRQQTALVRNSDEGIQRVLTTDYALLMESTSIEYVTQRNCNLTQIGGLIDSKGYGVGTPIGSPYRDKITIAILQLQEEGKLHMMKEKWWRGNGCPEEDNKEASALGVENIGGIFIVLAAGLVLSVFVAIGEFIYKSRKNNDIEQAFCFFYGLQCKQTHPTNSTSGTTLSTDLECGKLIREERGIRKQSSVHTV,grik1,12
|
14 |
+
MADPAAGPPPSEGEESTVRFARKGALRQKNVHEVKNHKFTARFFKQPTFCSHCTDFIWGFGKQGFQCQVCCFVVHKRCHEFVTFSCPGADKGPASDDPRSKHKFKIHTYSSPTFCDHCGSLLYGLIHQGMKCDTCMMNVHKRCVMNVPSLCGTDHTERRGRIYIQAHIDRDVLIVLVRDAKNLVPMDPNGLSDPYVKLKLIPDPKSESKQKTKTIKCSLNPEWNETFRFQLKESDKDRRLSVEIWDWDLTSRNDFMGSLSFGISELQKASVDGWFKLLSQEEGEYFNVPVPPEGSEANEELRQKFERAKISQGTKVPEEKTTNTVSKFDNNGNRDRMKLTDFNFLMVLGKGSFGKVMLSERKGTDELYAVKILKKDVVIQDDDVECTMVEKRVLALPGKPPFLTQLHSCFQTMDRLYFVMEYVNGGDLMYHIQQVGRFKEPHAVFYAAEIAIGLFFLQSKGIIYRDLKLDNVMLDSEGHIKIADFGMCKENIWDGVTTKTFCGTPDYIAPEIIAYQPYGKSVDWWAFGVLLYEMLAGQAPFEGEDEDELFQSIMEHNVAYPKSMSKEAVAICKGLMTKHPGKRLGCGPEGERDIKEHAFFRYIDWEKLERKEIQPPYKPKARDKRDTSNFDKEFTRQPVELTPTDKLFIMNLDQNEFAGFSYTNPEFVINV,kpcb,13
|
15 |
+
FLDGIDKAQEEHEKYHSNWRAMASDFNLPPVVAKEIVASCDKCQLKGEAMHGQVDCSPGIWQLDCTHLEGKVILVAVHVASGYIEAEVIPAETGQETAYFLLKLAGRWPVKTVHTDNGSNFTSTTVKAACWWAGIKQEFGIPYNPQSQGVIESMNKELKKIIGQVRDQAEHLKTAVQMAVFIHNFKRKGGIGGYSAGERIVDIIATDIQTKELQKQITKIQNFRVYYRDSRDPVWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRDYGKQMAGDDCVASRQDED,hivint,14
|
16 |
+
MAWRHLKKRAQDAVIILGGGGLLFASYLMATGDERFYAEHLMPTLQGLLDPESAHRLAVRFTSLGLLPRARFQDSDMLEVRVLGHKFRNPVGIAAGFDKHGEAVDGLYKMGFGFVEIGSVTPKPQEGNPRPRVFRLPEDQAVINRYGFNSHGLSVVEHRLRARQQKQAKLTEDGLPLGVNLGKNKTSVDAAEDYAEGVRVLGPLADYLVVNVSSPNTAGLRSLQGKAELRRLLTKVLQERDGLRRVHRPAVLVKIAPDLTSQDKEDIASVVKELGIDGLIVTNTTVSRPAGLQGALRSETGGLSGKPLRDLSTQTIREMYALTQGRVPIIGVGGVSSGQDALEKIRAGASLVQLYTALTFWGPPVVGKVKRELEALLKEQGFGGVTDAIGADHRR,pyrd,15
|
17 |
+
MENFQKVEKIGEGTYGVVYKARNKLTGEVVALKKIRLDTETEGVPSTAIREISLLKELNHPNIVKLLDVIHTENKLYLVFEFLHQDLKKFMDASALTGIPLPLIKSYLFQLLQGLAFCHSHRVLHRDLKPQNLLINTEGAIKLADFGLARAFGVPVRTYTHEVVTLWYRAPEILLGCKYYSTAVDIWSLGCIFAEMVTRRALFPGDSEIDQLFRIFRTLGTPDEVVWPGVTSMPDYKPSFPKWARQDFSKVVPPLDEDGRSLLSQMLHYDPNKRISAKAALAHPFFQDVTKPVPHLRL,cdk2,16
|
18 |
+
MCDAFVGTWKLVSSENFDDYMKEVGVGFATRKVAGMAKPNMIISVNGDVITIKSESTFKNTEISFILGQEFDEVTADDRKVKSTITLDGGVLVHVQKWDGKSTTIKRKREDDKLVVECVMKGVTSTRVYERA,fabp4,17
|
19 |
+
MDTKHFLPLDFSTQVNSSLTSPTGRGSMAAPSLHPSLGPGIGSPGQLHSPISTLSSPINGMGPPFSVISSPMGPHSMSVPTTPTLGFSTGSPQLSSPMNPVSSSEDIKPPLGLNGVLKVPAHPSGNMASFTKHICAICGDRSSGKHYGVYSCEGCKGFFKRTVRKDLTYTCRDNKDCLIDKRQRNRCQYCRYQKCLAMGMKREAVQEERQRGKDRNENEVESTSSANEDMPVERILEAELAVEPKTETYVEANMGLNPSSPNDPVTNICQAADKQLFTLVEWAKRIPHFSELPLDDQVILLRAGWNELLIASFSHRSIAVKDGILLATGLHVHRNSAHSAGVGAIFDRVLTELVSKMRDMQMDKTELGCLRAIVLFNPDSKGLSNPAEVEALREKVYASLEAYCKHKYPEQPGRFAKLLLRLPALRSIGLKCLEHLFFFKLIGDTPIDTFLMEMLEAPHQMT,rxra,18
|
20 |
+
MNPLLILTFVAAALAAPFDDDDKIVGGYNCEENSVPYQVSLNSGYHFCGGSLINEQWVVSAGHCYKSRIQVRLGEHNIEVLEGNEQFINAAKIIRHPQYDRKTLNNDIMLIKLSSRAVINARVSTISLPTAPPATGTKCLISGWGNTASSGADYPDELQCLDAPVLSQAKCEASYPGKITSNMFCVGFLEGGKDSCQGDSGGPVVCNGQLQGVVSWGDGCAQKNKPGVYTKVYNYVKWIKNTIAANS,try1,19
|
21 |
+
MSCINLPTVLPGSPSKTRGQIQVILGPMFSGKSTELMRRVRRFQIAQYKCLVIKYAKDTRYSSSFCTHDRNTMEALPACLLRDVAQEALGVAVIGIDEGQFFPDIVEFCEAMANAGKTVIVAALDGTFQRKPFGAILNLVPLAESVVKLTAVCMECFREAAYTKRLGTEKEVEVIGGADKYHSVCRLCYFKKASGQPAGPDNKENCPVPGKPGEAVAARKLFAPQQILQCSPAN,kith,20
|
22 |
+
MALIPDLAMETWLLLAVSLVLLYLYGTHSHGLFKKLGIPGPTPLPFLGNILSYHKGFCMFDMECHKKYGKVWGFYDGQQPVLAITDPDMIKTVLVKECYSVFTNRRPFGPVGFMKSAISIAEDEEWKRLRSLLSPTFTSGKLKEMVPIIAQYGDVLVRNLRREAETGKPVTLKDVFGAYSMDVITSTSFGVNIDSLNNPQDPFVENTKKLLRFDFLDPFFLSITVFPFLIPILEVLNICVFPREVTNFLRKSVKRMKESRLEDTQKHRVDFLQLMIDSQNSKETESHKALSDLELVAQSIIFIFAGYETTSSVLSFIMYELATHPDVQQKLQEEIDAVLPNKAPPTYDTVLQMEYLDMVVNETLRLFPIAMRLERVCKKDVEINGMFIPKGVVVMIPSYALHRDPKYWTEPEKFLPERFSKKNKDNIDPYIYTPFGSGPRNCIGMRFALMNMKLALIRVLQNFSFKPCKETQIPLKLSLGGLLQPEKPVVLKVESRDGTVSGA,cp3a4,21
|
23 |
+
MAYSQGGGKKKVCYYYDGDIGNYYYGQGHPMKPHRIRMTHNLLLNYGLYRKMEIYRPHKATAEEMTKYHSDEYIKFLRSIRPDNMSEYSKQMQRFNVGEDCPVFDGLFEFCQLSTGGSVAGAVKLNRQQTDMAVNWAGGLHHAKKSEASGFCYVNDIVLAILELLKYHQRVLYIDIDIHHGDGVEEAFYTTDRVMTVSFHKYGEYFPGTGDLRDIGAGKGKYYAVNFPMRDGIDDESYGQIFKPIISKVMEMYQPSAVVLQCGADSLSGDRLGCFNLTVKGHAKCVEVVKTFNLPLLMLGGGGYTIRNVARCWTYETAVALDCEIPNELPYNDYFEYFGPDFKLHISPSNMTNQNTPEYMEKIKQRLFENLRMLPHAPGVQMQAIPEDAVHEDSGDEDGEDPDKRISIRASDKRIACDEEFSDSEDEGEGGRRNVADHKKGAKKARIEEDKKETEDKKTDVKEEDKSKDNSGEKTDTKGTKSEQLSNP,hdac2,22
|
24 |
+
MTELKAKGPRAPHVAGGPPSPEVGSPLLCRPAAGPFPGSQTSDTLPEVSAIPISLDGLLFPRPCQGQDPSDEKTQDQQSLSDVEGAYSRAEATRGAGGSSSSPPEKDSGLLDSVLDTLLAPSGPGQSQPSPPACEVTSSWCLFGPELPEDPPAAPATQRVLSPLMSRSGCKVGDSSGTAAAHKVLPRGLSPARQLLLPASESPHWSGAPVKPSPQAAAVEVEEEDGSESEESAGPLLKGKPRALGGAAAGGGAAAVPPGAAAGGVALVPKEDSRFSAPRVALVEQDAPMAPGRSPLATTVMDFIHVPILPLNHALLAARTRQLLEDESYDGGAGAASAFAPPRSSPCASSTPVAVGDFPDCAYPPDAEPKDDAYPLYSDFQPPALKIKEEEEGAEASARSPRSYLVAGANPAAFPDFPLGPPPPLPPRATPSRPGEAAVTAAPASASVSSASSSGSTLECILYKAEGAPPQQGPFAPPPCKAPGASGCLLPRDGLPSTSASAAAAGAAPALYPALGLNGLPQLGYQAAVLKEGLPQVYPPYLNYLRPDSEASQSPQYSFESLPQKICLICGDEASGCHYGVLTCGSCKVFFKRAMEGQHNYLCAGRNDCIVDKIRRKNCPACRLRKCCQAGMVLGGRKFKKFNKVRVVRALDAVALPQPVGVPNESQALSQRFTFSPGQDIQLIPPLINLLMSIEPDVIYAGHDNTKPDTSSSLLTSLNQLGERQLLSVVKWSKSLPGFRNLHIDDQITLIQYSWMSLMVFGLGWRSYKHVSGQMLYFAPDLILNEQRMKESSFYSLCLTMWQIPQEFVKLQVSQEEFLCMKVLLLLNTIPLEGLRSQTQFEEMRSSYIRELIKAIGLRQKGVVSSSQRFYQLTKLLDNLHDLVKQLHLYCLNTFIQSRALSVEFPEMMSEVIAAQLPKILAGMVKPLLFHKK,prgr,23
|
25 |
+
MGSNKSKPKDASQRRRSLEPAENVHGAGGGAFPASQTPSKPASADGHRGPSAAFAPAAAEPKLFGGFNSSDTVTSPQRAGPLAGGVTTFVALYDYESRTETDLSFKKGERLQIVNNTEGDWWLAHSLSTGQTGYIPSNYVAPSDSIQAEEWYFGKITRRESERLLLNAENPRGTFLVRESETTKGAYCLSVSDFDNAKGLNVKHYKIRKLDSGGFYITSRTQFNSLQQLVAYYSKHADGLCHRLTTVCPTSKPQTQGLAKDAWEIPRESLRLEVKLGQGCFGEVWMGTWNGTTRVAIKTLKPGTMSPEAFLQEAQVMKKLRHEKLVQLYAVVSEEPIYIVTEYMSKGSLLDFLKGETGKYLRLPQLVDMAAQIASGMAYVERMNYVHRDLRAANILVGENLVCKVADFGLARLIEDNEYTARQGAKFPIKWTAPEAALYGRFTIKSDVWSFGILLTELTTKGRVPYPGMVNREVLDQVERGYRMPCPPECPESLHDLMCQCWRKEPEERPTFEYLQAFLEDYFTSTEPQYQPGENL,src,24
|
26 |
+
MTFNSFEGSKTCVPADINKEEEFVEEFNRLKTFANFPSGSPVSASTLARAGFLYTGEGDTVRCFSCHAAVDRWQYGDSAVGRHRKVSPNCRFINGFYLENSATQSTNSGIQNGQYKVENYLGSRDHFALDRPSETHADYLLRTGQVVDISDTIYPRNPAMYSEEARLKSFQNWPDYAHLTPRELASAGLYYTGIGDQVQCFCCGGKLKNWEPCDRAWSEHRRHFPNCFFVLGRNLNIRSESDAVSSDRNFPNSTNLPRNPSMADYEARIFTFGTWIYSVNKEQLARAGFYALGEGDKVKCFHCGGGLTDWKPSEDPWEQHAKWYPGCKYLLEQKGQEYINNIHLTHSLEECLVRTTEKTPSLTRRIDDTIFQNPMVQEAIRMGFSFKDIKKIMEEKIQISGSNYKSLEVLVADLVNAQKDSMQDESSQTSLQKEISTEEQLRRLQEEKLCKICMDRNIAIVFVPCGHLVTCKQCAEAVDKCPMCYTVITFKQKIFMS,xiap,25
|
27 |
+
MSRSLLLWFLLFLLLLPPLPVLLADPGAPTPVNPCCYYPCQHQGICVRFGLDRYQCDCTRTGYSGPNCTIPGLWTWLRNSLRPSPSFTHFLLTHGRWFWEFVNATFIREMLMRLVLTVRSNLIPSPPTYNSAHDYISWESFSNVSYYTRILPSVPKDCPTPMGTKGKKQLPDAQLLARRFLLRRKFIPDPQGTNLMFAFFAQHFTHQFFKTSGKMGPGFTKALGHGVDLGHIYGDNLERQYQLRLFKDGKLKYQVLDGEMYPPSVEEAPVLMHYPRGIPPQSQMAVGQEVFGLLPGLMLYATLWLREHNRVCDLLKAEHPTWGDEQLFQTTRLILIGETIKIVIEEYVQQLSGYFLQLKFDPELLFGVQFQYRNRIAMEFNHLYHWHPLMPDSFKVGSQEYSYEQFLFNTSMLVDYGVEALVDAFSRQIAGRIGGGRNMDHHILHVAVDVIRESREMRLQPFNEYRKRFGMKPYTSFQELVGEKEMAAELEELYGDIDALEFYPGLLLEKCHPNSIFGESMIEIGAPFSLKGLLGNPICSPEYWKPSTFGGEVGFNIVKTATLKKLVCLNTKTCPYVSFRVPDASQDDGPAVERPSTEL,pgh1,26
|
28 |
+
MVDTESPLCPLSPLEAGDLESPLSEEFLQEMGNIQEISQSIGEDSSGSFGFTEYQYLGSCPGSDGSVITDTLSPASSPSSVTYPVVPGSVDESPSGALNIECRICGDKASGYHYGVHACEGCKGFFRRTIRLKLVYDKCDRSCKIQKKNRNKCQYCRFHKCLSVGMSHNAIRFGRMPRSEKAKLKAEILTCEHDIEDSETADLKSLAKRIYEAYLKNFNMNKVKARVILSGKASNNPPFVIHDMETLCMAEKTLVAKLVANGIQNKEAEVRIFHCCQCTSVETVTELTEFAKAIPGFANLDLNDQVTLLKYGVYEAIFAMLSSVMNKDGMLVAYGNGFITREFLKSLRKPFCDIMEPKFDFAMKFNALELDDSDISLFVAAIICCGDRPGLLNVGHIEKMQEGIVHVLRLHLQSNHPDDIFLFPKLLQKMADLRQLVTEHAQLVQIIKKTESDAALHPLLQEIYRDMY,ppara,27
|
29 |
+
MAATEGVGEAAQGGEPGQPAQPPPQPHPPPPQQQHKEEMAAEAGEAVASPMDDGFVSLDSPSYVLYRDRAEWADIDPVPQNDGPNPVVQIIYSDKFRDVYDYFRAVLQRDERSERAFKLTRDAIELNAANYTVWHFRRVLLKSLQKDLHEEMNYITAIIEEQPKNYQVWHHRRVLVEWLRDPSQELEFIADILNQDAKNYHAWQHRQWVIQEFKLWDNELQYVDQLLKEDVRNNSVWNQRYFVISNTTGYNDRAVLEREVQYTLEMIKLVPHNESAWNYLKGILQDRGLSKYPNLLNQLLDLQPSHSSPYLIAFLVDIYEDMLENQCDNKEDILNKALELCEILAKEKDTIRKEYWRYIGRSLQSKHSTENDSPTNVQQ,fnta,28
|
30 |
+
MPEAPPLLLAAVLLGLVLLVVLLLLLRHWGWGLCLIGWNEFILQPIHNLLMGDTKEQRILNHVLQHAEPGNAQSVLEAIDTYCEQKEWAMNVGDKKGKIVDAVIQEHQPSVLLELGAYCGYSAVRMARLLSPGARLITIEINPDCAAITQRMVDFAGVKDKVTLVVGASQDIIPQLKKKYDVDTLDMVFLDHWKDRYLPDTLLLEECGLLRKGTVLLADNVICPGAPDFLAHVRGSSCFECTHYQSFLEYREVVDGLEKAIYKGPGSEAGP,comt,29
|
31 |
+
MEGISIYTSDNYTEEMGSGDYDSMKEPCFREENANFNKIFLPTIYSIIFLTGIVGNGLVILVMGYQKKLRSMTDKYRLHLSVADLLFVITLPFWAVDAVANWYFGNFLCKAVHVIYTVNLYSSVLILAFISLDRYLAIVHATNSQRPRKLLAEKVVYVGVWIPALLLTIPDFIFANVSEADDRYICDRFYPNDLWVVVFQFQHIMVGLILPGIVILSCYCIIISKLSHSKGHQKRKALKTTVILILAFFACWLPYYIGISIDSFILLEIIKQGCEFENTVHKWISITEALAFFHCCLNPILYAFLGAKFKTSAQHALTSVSRGSSLKILSKGKRGGHSSVSTESESSSFHSS,cxcr4,30
|
32 |
+
MPVAGSELPRRPLPPAAQERDAEPRPPHGELQYLGQIQHILRCGVRKDDRTGTGTLSVFGMQARYSLRDEFPLLTTKRVFWKGVLEELLWFIKGSTNAKELSSKGVKIWDANGSRDFLDSLGFSTREEGDLGPVYGFQWRHFGAEYRDMESDYSGQGVDQLQRVIDTIKTNPDDRRIIMCAWNPRDLPLMALPPCHALCQFYVVNSELSCQLYQRSGDMGLGVPFNIASYALLTYMIAHITGLKPGDFIHTLGDAHIYLNHIEPLKIQLQREPRPFPKLRILRKVEKIDDFKAEDFQIEGYNPHPTIKMEMAV,tysy,31
|
33 |
+
PQITLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMNLPGKWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNFP,hivpr,32
|
34 |
+
MKAPAVLAPGILVLLFTLVQRSNGECKEALAKSEMNVNMKYQLPNFTAETPIQNVILHEHHIFLGATNYIYVLNEEDLQKVAEYKTGPVLEHPDCFPCQDCSSKANLSGGVWKDNINMALVVDTYYDDQLISCGSVNRGTCQRHVFPHNHTADIQSEVHCIFSPQIEEPSQCPDCVVSALGAKVLSSVKDRFINFFVGNTINSSYFPDHPLHSISVRRLKETKDGFMFLTDQSYIDVLPEFRDSYPIKYVHAFESNNFIYFLTVQRETLDAQTFHTRIIRFCSINSGLHSYMEMPLECILTEKRKKRSTKKEVFNILQAAYVSKPGAQLARQIGASLNDDILFGVFAQSKPDSAEPMDRSAMCAFPIKYVNDFFNKIVNKNNVRCLQHFYGPNHEHCFNRTLLRNSSGCEARRDEYRTEFTTALQRVDLFMGQFSEVLLTSISTFIKGDLTIANLGTSEGRFMQVVVSRSGPSTPHVNFLLDSHPVSPEVIVEHTLNQNGYTLVITGKKITKIPLNGLGCRHFQSCSQCLSAPPFVQCGWCHDKCVRSEECLSGTWTQQICLPAIYKVFPNSAPLEGGTRLTICGWDFGFRRNNKFDLKKTRVLLGNESCTLTLSESTMNTLKCTVGPAMNKHFNMSIIISNGHGTTQYSTFSYVDPVITSISPKYGPMAGGTLLTLTGNYLNSGNSRHISIGGKTCTLKSVSNSILECYTPAQTISTEFAVKLKIDLANRETSIFSYREDPIVYEIHPTKSFISGGSTITGVGKNLNSVSVPRMVINVHEAGRNFTVACQHRSNSEIICCTTPSLQQLNLQLPLKTKAFFMLDGILSKYFDLIYVHNPVFKPFEKPVMISMGNENVLEIKGNDIDPEAVKGEVLKVGNKSCENIHLHSEAVLCTVPNDLLKLNSELNIEWKQAISSTVLGKVIVQPDQNFTGLIAGVVSISTALLLLLGFFLWLKKRKQIKDLGSELVRYDARVHTPHLDRLVSARSVSPTTEMVSNESVDYRATFPEDQFPNSSQNGSCRQVQYPLTDMSPILTSGDSDISSPLLQNTVHIDLSALNPELVQAVQHVVIGPSSLIVHFNEVIGRGHFGCVYHGTLLDNDGKKIHCAVKSLNRITDIGEVSQFLTEGIIMKDFSHPNVLSLLGICLRSEGSPLVVLPYMKHGDLRNFIRNETHNPTVKDLIGFGLQVAKGMKYLASKKFVHRDLAARNCMLDEKFTVKVADFGLARDMYDKEYYSVHNKTGAKLPVKWMALESLQTQKFTTKSDVWSFGVLLWELMTRGAPPYPDVNTFDITVYLLQGRRLLQPEYCPDPLYEVMLKCWHPKAEMRPSFSELVSRISAIFSTFIGEHYVHVNATYVNVKCVAPYPSLLSSEDNADDEVDTRPASFWETS,met,33
|
35 |
+
MVLHLLLFLLLTPQGGHSCQGLELARELVLAKVRALFLDALGPPAVTREGGDPGVRRLPRRHALGGFTHRGSEPEEEEDVSQAILFPATDASCEDKSAARGLAQEAEEGLFRYMFRPSQHTRSRQVTSAQLWFHTGLDRQGTAASNSSEPLLGLLALSPGGPVAVPMSLGHAPPHWAVLHLATSALSLLTHPVLVLLLRCPLCTCSARPEATPFLVAHTRTRPPSGGERARRSTPLMSWPWSPSALRLLQRPPEEPAAHANCHRVALNISFQELGWERWIVYPPSFIFHYCHGGCGLHIPPNLSLPVPGAPPTPAQPYSLLPGAQPCCAALPGTMRPLHVRTTSDGGYSFKYETVPNLLTQHCACI,inha,34
|
36 |
+
MEEPEEPADSGQSLVPVYIYSPEYVSMCDSLAKIPKRASMVHSLIEAYALHKQMRIVKPKVASMEEMATFHTDAYLQHLQKVSQEGDDDHPDSIEYGLGYDCPATEGIFDYAAAIGGATITAAQCLIDGMCKVAINWSGGWHHAKKDEASGFCYLNDAVLGILRLRRKFERILYVDLDLHHGDGVEDAFSFTSKVMTVSLHKFSPGFFPGTGDVSDVGLGKGRYYSVNVPIQDGIQDEKYYQICESVLKEVYQAFNPKAVVLQLGADTIAGDPMCSFNMTPVGIGKCLKYILQWQLATLILGGGGYNLANTARCWTYLTGVILGKTLSSEIPDHEFFTAYGPDYVLEITPSCRPDRNEPHRIQQILNYIKGNLKHVV,hdac8,35
|
37 |
+
MENTENSVDSKSIKNLEPKIIHGSESMDSGISLDNSYKMDYPEMGLCIIINNKNFHKSTGMTSRSGTDVDAANLRETFRNLKYEVRNKNDLTREEIVELMRDVSKEDHSKRSSFVCVLLSHGEEGIIFGTNGPVDLKKITNFFRGDRCRSLTGKPKLFIIQACRGTELDCGIETDSGVDDDMACHKIPVEADFLYAYSTAPGYYSWRNSKDGSWFIQSLCAMLKQYADKLEFMHILTRVNRKVATEFESFSFDATFHAKKQIPCIVSMLTKELYFYH,bace1,36
|
38 |
+
MSHHWGYGKHNGPEHWHKDFPIAKGERQSPVDIDTHTAKYDPSLKPLSVSYDQATSLRILNNGHAFNVEFDDSQDKAVLKGGPLDGTYRLIQFHFHWGSLDGQGSEHTVDKKKYAAELHLVHWNTKYGDFGKAVQQPDGLAVLGIFLKVGSAKPGLQKVVDVLDSIKTKGKSADFTNFDPRGLLPESLDYWTYPGSLTTPPLLECVTWIVLKEPISVSSEQVLKFRKLNFNGEGEPEELMVDNWRPAQPLKNRQIKASFK,cah2,37
|
39 |
+
MRGARGAWDFLCVLLLLLRVQTGSSQPSVSPGEPSPPSIHPGKSDLIVRVGDEIRLLCTDPGFVKWTFEILDETNENKQNEWITEKAEATNTGKYTCTNKHGLSNSIYVFVRDPAKLFLVDRSLYGKEDNDTLVRCPLTDPEVTNYSLKGCQGKPLPKDLRFIPDPKAGIMIKSVKRAYHRLCLHCSVDQEGKSVLSEKFILKVRPAFKAVPVVSVSKASYLLREGEEFTVTCTIKDVSSSVYSTWKRENSQTKLQEKYNSWHHGDFNYERQATLTISSARVNDSGVFMCYANNTFGSANVTTTLEVVDKGFINIFPMINTTVFVNDGENVDLIVEYEAFPKPEHQQWIYMNRTFTDKWEDYPKSENESNIRYVSELHLTRLKGTEGGTYTFLVSNSDVNAAIAFNVYVNTKPEILTYDRLVNGMLQCVAAGFPEPTIDWYFCPGTEQRCSASVLPVDVQTLNSSGPPFGKLVVQSSIDSSAFKHNGTVECKAYNDVGKTSAYFNFAFKGNNKEQIHPHTLFTPLLIGFVIVAGMMCIIVMILTYKYLQKPMYEVQWKVVEEINGNNYVYIDPTQLPYDHKWEFPRNRLSFGKTLGAGAFGKVVEATAYGLIKSDAAMTVAVKMLKPSAHLTEREALMSELKVLSYLGNHMNIVNLLGACTIGGPTLVITEYCCYGDLLNFLRRKRDSFICSKQEDHAEAALYKNLLHSKESSCSDSTNEYMDMKPGVSYVVPTKADKRRSVRIGSYIERDVTPAIMEDDELALDLEDLLSFSYQVAKGMAFLASKNCIHRDLAARNILLTHGRITKICDFGLARDIKNDSNYVVKGNARLPVKWMAPESIFNCVYTFESDVWSYGIFLWELFSLGSSPYPGMPVDSKFYKMIKEGFRMLSPEHAPAEMYDIMKTCWDADPLKRPTFKQIVQLIEKQISESTNHIYSNLANCSPNRQKPVVDHSVRINSVGSTASSSQPLLVHDDV,kit,38
|
40 |
+
MPKKKPTPIQLNPAPDGSAVNGTSSAETNLEALQKKLEELELDEQQRKRLEAFLTQKQKVGELKDDDFEKISELGAGNGGVVFKVSHKPSGLVMARKLIHLEIKPAIRNQIIRELQVLHECNSPYIVGFYGAFYSDGEISICMEHMDGGSLDQVLKKAGRIPEQILGKVSIAVIKGLTYLREKHKIMHRDVKPSNILVNSRGEIKLCDFGVSGQLIDSMANSFVGTRSYMSPERLQGTHYSVQSDIWSMGLSLVEMAVGRYPIPPPDAKELELMFGCQVEGDAAETPPRPRTPGRPLSSYGMDSRPPMAIFELLDYIVNEPPPKLPSGVFSLEFQDFVNKCLIKNPAERADLKQLMVHAFIKRSDAEEVDFAGWLCSTIGLNQPSTPTHAAGV,mp2k1,39
|
41 |
+
MPIMGSSVYITVELAIAVLAILGNVLVCWAVWLNSNLQNVTNYFVVSLAAADIAVGVLAIPFAITISTGFCAACHGCLFIACFVLVLTQSSIFSLLAIAIDRYIAIRIPLRYNGLVTGTRAKGIIAICWVLSFAIGLTPMLGWNNCGQPKEGKNHSQGCGEGQVACLFEDVVPMNYMVYFNFFACVLVPLLLMLGVYLRIFLAARRQLKQMESQPLPGERARSTLQKEVHAAKSLAIIVGLFALCWLPLHIINCFTFFCPDCSHAPLWLMYLAIVLSHTNSVVNPFIYAYRIREFRQTFRKIIRSHVLRQQEPFKAAGTSARVLAAHGSDGEQVSLRLNGHPPGVWANGSAPHPERRPNGYALGLVSGGSAQESQGNTGLPDVELLSHELKGVCPEPPGLDDPLAQDGAGVS,aa2ar,40
|
42 |
+
MVSQALRLLCLLLGLQGCLAAGGVAKASGGETRDMPWKPGPHRVFVTQEEAHGVLHRRRRANAFLEELRPGSLERECKEEQCSFEEAREIFKDAERTKLFWISYSDGDQCASSPCQNGGSCKDQLQSYICFCLPAFEGRNCETHKDDQLICVNENGGCEQYCSDHTGTKRSCRCHEGYSLLADGVSCTPTVEYPCGKIPILEKRNASKPQGRIVGGKVCPKGECPWQVLLLVNGAQLCGGTLINTIWVVSAAHCFDKIKNWRNLIAVLGEHDLSEHDGDEQSRRVAQVIIPSTYVPGTTNHDIALLRLHQPVVLTDHVVPLCLPERTFSERTLAFVRFSLVSGWGQLLDRGATALELMVLNVPRLMTQDCLQQSRKVGDSPNITEYMFCAGYSDGSKDSCKGDSGGPHATHYRGTWYLTGIVSWGQGCATVGHFGVYTRVSQYIEWLQKLMRSEPRPGVLLRAPFP,fa7,41
|
43 |
+
MEAAVAAPRPRLLLLVLAAAAAAAAALLPGATALQCFCHLCTKDNFTCVTDGLCFVSVTETTDKVIHNSMCIAEIDLIPRDRPFVCAPSSKTGSVTTTYCCNQDHCNKIELPTTVKSSPGLGPVELAAVIAGPVCFVCISLMLMVYICHNRTVIHHRVPNEEDPSLDRPFISEGTTLKDLIYDMTTSGSGSGLPLLVQRTIARTIVLQESIGKGRFGEVWRGKWRGEEVAVKIFSSREERSWFREAEIYQTVMLRHENILGFIAADNKDNGTWTQLWLVSDYHEHGSLFDYLNRYTVTVEGMIKLALSTASGLAHLHMEIVGTQGKPAIAHRDLKSKNILVKKNGTCCIADLGLAVRHDSATDTIDIAPNHRVGTKRYMAPEVLDDSINMKHFESFKRADIYAMGLVFWEIARRCSIGGIHEDYQLPYYDLVPSDPSVEEMRKVVCEQKLRPNIPNRWQSCEALRVMAKIMRECWYANGAARLTALRIKKTLSQLSQQEGIKM,tgfr1,42
|
44 |
+
MKSGSGGGSPTSLWGLLFLSAALSLWPTSGEICGPGIDIRNDYQQLKRLENCTVIEGYLHILLISKAEDYRSYRFPKLTVITEYLLLFRVAGLESLGDLFPNLTVIRGWKLFYNYALVIFEMTNLKDIGLYNLRNITRGAIRIEKNADLCYLSTVDWSLILDAVSNNYIVGNKPPKECGDLCPGTMEEKPMCEKTTINNEYNYRCWTTNRCQKMCPSTCGKRACTENNECCHPECLGSCSAPDNDTACVACRHYYYAGVCVPACPPNTYRFEGWRCVDRDFCANILSAESSDSEGFVIHDGECMQECPSGFIRNGSQSMYCIPCEGPCPKVCEEEKKTKTIDSVTSAQMLQGCTIFKGNLLINIRRGNNIASELENFMGLIEVVTGYVKIRHSHALVSLSFLKNLRLILGEEQLEGNYSFYVLDNQNLQQLWDWDHRNLTIKAGKMYFAFNPKLCVSEIYRMEEVTGTKGRQSKGDINTRNNGERASCESDVLHFTSTTTSKNRIIITWHRYRPPDYRDLISFTVYYKEAPFKNVTEYDGQDACGSNSWNMVDVDLPPNKDVEPGILLHGLKPWTQYAVYVKAVTLTMVENDHIRGAKSEILYIRTNASVPSIPLDVLSASNSSSQLIVKWNPPSLPNGNLSYYIVRWQRQPQDGYLYRHNYCSKDKIPIRKYADGTIDIEEVTENPKTEVCGGEKGPCCACPKTEAEKQAEKEEAEYRKVFENFLHNSIFVPRPERKRRDVMQVANTTMSSRSRNTTAADTYNITDPEELETEYPFFESRVDNKERTVISNLRPFTLYRIDIHSCNHEAEKLGCSASNFVFARTMPAEGADDIPGPVTWEPRPENSIFLKWPEPENPNGLILMYEIKYGSQVEDQRECVSRQEYRKYGGAKLNRLNPGNYTARIQATSLSGNGSWTDPVFFYVQAKTGYENFIHLIIALPVAVLLIVGGLVIMLYVFHRKRNNSRLGNGVLYASVNPEYFSAADVYVPDEWEVAREKITMSRELGQGSFGMVYEGVAKGVVKDEPETRVAIKTVNEAASMRERIEFLNEASVMKEFNCHHVVRLLGVVSQGQPTLVIMELMTRGDLKSYLRSLRPEMENNPVLAPPSLSKMIQMAGEIADGMAYLNANKFVHRDLAARNCMVAEDFTVKIGDFGMTRDIYETDYYRKGGKGLLPVRWMSPESLKDGVFTTYSDVWSFGVVLWEIATLAEQPYQGLSNEQVLRFVMEGGLLDKPDNCPDMLFELMRMCWQYNPKMRPSFLEIISSIKEEMEPGFREVSFYYSEENKLPEPEELDLEPENMESVPLDPSASSSSLPLPDRHSGHKAENGPGPGVLVLRASFDERQPYAHMNGGRKNERALPLPQSSTC,igf1r,43
|
45 |
+
MSAAVTAGKLARAPADPGKAGVPGVAAPGAPAAAPPAKEIPEVLVDPRSRRRYVRGRFLGKGGFAKCFEISDADTKEVFAGKIVPKSLLLKPHQREKMSMEISIHRSLAHQHVVGFHGFFEDNDFVFVVLELCRRRSLLELHKRRKALTEPEARYYLRQIVLGCQYLHRNRVIHRDLKLGNLFLNEDLEVKIGDFGLATKVEYDGERKKTLCGTPNYIAPEVLSKKGHSFEVDVWSIGCIMYTLLVGKPPFETSCLKETYLRIKKNEYSIPKHINPVAASLIQKMLQTDPTARPTINELLNDEFFTSGYIPARLPITCLTIPPRFSIAPSSLDPSNRKPLTVLNKGLENPLPERPREKEEPVVRETGEVVDCHLSDMLQQLHSVNASKPSERGLVRQEEAEDPACIPIFWVSKWVDYSDKYGLGYQLCDNSVGVLFNDSTRLILYNDGDSLQYIERDGTESYLTVSSHPNSLMKKITLLKYFRNYMSEHLLKAGANITPREGDELARLPYLRTWFRTRSAIILHLSNGSVQINFFQDHTKLILCPLMAAVTYIDEKRDFRTYRLSLLEEYGCCKELASRLRYARTMVDKLLSSRSASNRLKAS,plk1,44
|
46 |
+
MSVLQVLHIPDERLRKVAKPVEEVNAEIQRIVDDMFETMYAEEGIGLAATQVDIHQRIIVIDVSENRDERLVLINPELLEKSGETGIEEGCLSIPEQRALVPRAEKVKIRALDRDGKPFELEADGLLAICIQHEMDHLVGKLFMDYLSPLKQQRIRQKVEKLDRLKARA,def,45
|
47 |
+
MSDKLPYKVADIGLAAWGRKALDIAENEMPGLMRMRERYSASKPLKGARIAGCLHMTVETAVLIETLVTLGAEVQWSSCNIFSTQDHAAAAIAKAGIPVYAWKGETDEEYLWCIEQTLYFKDGPLNMILDDGGDLTNLIHTKYPQLLPGIRGISEETTTGVHNLYKMMANGILKVPAINVNDSVTKSKFDNLYGCRESLIDGIKRATDVMIAGKVAVVAGYGDVGKGCAQALRGFGARVIITEIDPINALQAAMEGYEVTTMDEACQEGNIFVTTTGCIDIILGRHFEQMKDDAIVCNIGHFDVEIDVKWLNENAVEKVNIKPQVDRYRLKNGRRIILLAEGRLVNLGCAMGHPSFVMSNSFTNQVMAQIELWTHPDKYPVGVHFLPKKLDEAVAEAHLGKLNVKLTKLTEKQAQYLGMSCDGPFKPDHYRY,sahh,46
|
48 |
+
MPLSRWLRSVGVFLLPAPYWAPRERWLGSLRRPSLVHGYPVLAWHSARCWCQAWTEEPRALCSSLRMNGDQNSDVYAQEKQDFVQHFSQIVRVLTEDEMGHPEIGDAIARLKEVLEYNAIGGKYNRGLTVVVAFRELVEPRKQDADSLQRAWTVGWCVELLQAFFLVADDIMDSSLTRRGQICWYQKPGVGLDAINDANLLEACIYRLLKLYCREQPYYLNLIELFLQSSYQTEIGQTLDLLTAPQGNVDLVRFTEKRYKSIVKYKTAFYSFYLPIAAAMYMAGIDGEKEHANAKKILLEMGEFFQIQDDYLDLFGDPSVTGKIGTDIQDNKCSWLVVQCLQRATPEQYQILKENYGQKEAEKVARVKALYEELDLPAVFLQYEEDSYSHIMALIEQYAAPLPPAVFLGLARKIYKRRK,fpps,47
|
49 |
+
MERAGPSFGQQRQQQQPQQQKQQQRDQDSVEAWLDDHWDFTFSYFVRKATREMVNAWFAERVHTIPVCKEGIRGHTESCSCPLQQSPRADNSAPGTPTRKISASEFDRPLRPIVVKDSEGTVSFLSDSEKKEQMPLTPPRFDHDEGDQCSRLLELVKDISSHLDVTALCHKIFLHIHGLISADRYSLFLVCEDSSNDKFLISRLFDVAEGSTLEEVSNNCIRLEWNKGIVGHVAALGEPLNIKDAYEDPRFNAEVDQITGYKTQSILCMPIKNHREEVVGVAQAINKKSGNGGTFTEKDEKDFAAYLAFCGIVLHNAQLYETSLLENKRNQVLLDLASLIFEEQQSLEVILKKIAATIISFMQVQKCTIFIVDEDCSDSFSSVFHMECEELEKSSDTLTREHDANKINYMYAQYVKNTMEPLNIPDVSKDKRFPWTTENTGNVNQQCIRSLLCTPIKNGKKNKVIGVCQLVNKMEENTGKVKPFNRNDEQFLEAFVIFCGLGIQNTQMYEAVERAMAKQMVTLEVLSYHASAAEEETRELQSLAAAVVPSAQTLKITDFSFSDFELSDLETALCTIRMFTDLNLVQNFQMKHEVLCRWILSVKKNYRKNVAYHNWRHAFNTAQCMFAALKAGKIQNKLTDLEILALLIAALSHDLDHRGVNNSYIQRSEHPLAQLYCHSIMEHHHFDQCLMILNSPGNQILSGLSIEEYKTTLKIIKQAILATDLALYIKRRGEFFELIRKNQFNLEDPHQKELFLAMLMTACDLSAITKPWPIQQRIAELVATEFFDQGDRERKELNIEPTDLMNREKKNKIPSMQVGFIDAICLQLYEALTHVSEDCFPLLDGCRKNRQKWQALAEQQEKMLINGESGQAKRN,pde5a,48
|
50 |
+
MDGWRRMPRWGLLLLLWGSCTFGLPTDTTTFKRIFLKRMPSIRESLKERGVDMARLGPEWSQPMKRLTLGNTTSSVILTNYMDTQYYGEIGIGTPPQTFKVVFDTGSSNVWVPSSKCSRLYTACVYHKLFDASDSSSYKHNGTELTLRYSTGTVSGFLSQDIITVGGITVTQMFGEVTEMPALPFMLAEFDGVVGMGFIEQAIGRVTPIFDNIISQGVLKEDVFSFYYNRDSENSQSLGGQIVLGGSDPQHYEGNFHYINLIKTGVWQIQMKGVSVGSSTLLCEDGCLALVDTGASYISGSTSSIEKLMEALGAKKRLFDYVVKCNEGPTLPDISFHLGGKEYTLTSADYVFQESYSSKKLCTLAIHAMDIPPPTGPTWALGATFIRKFYTEFDRRNNRIGFALAR,reni,49
|
51 |
+
MLDDRARMEAAKKEKVEQILAEFQLQEEDLKKVMRRMQKEMDRGLRLETHEEASVKMLPTYVRSTPEGSEVGDFLSLDLGGTNFRVMLVKVGEGEEGQWSVKTKHQMYSIPEDAMTGTAEMLFDYISECISDFLDKHQMKHKKLPLGFTFSFPVRHEDIDKGILLNWTKGFKASGAEGNNVVGLLRDAIKRRGDFEMDVVAMVNDTVATMISCYYEDHQCEVGMIVGTGCNACYMEEMQNVELVEGDEGRMCVNTEWGAFGDSGELDEFLLEYDRLVDESSANPGQQLYEKLIGGKYMGELVRLVLLRLVDENLLFHGEASEQLRTRGAFETRFVSQVESDTGDRKQIYNILSTLGLRPSTTDCDIVRRACESVSTRAAHMCSAGLAGVINRMRESRSEDVMRITVGVDGSVYKLHPSFKERFHASVRRLTPSCEITFIESEEGSGRGAALVSAVACKKACMLGQ,hxk4,50
|
52 |
+
MRPSGTAGAALLALLAALCPASRALEEKKVCQGTSNKLTQLGTFEDHFLSLQRMFNNCEVVLGNLEITYVQRNYDLSFLKTIQEVAGYVLIALNTVERIPLENLQIIRGNMYYENSYALAVLSNYDANKTGLKELPMRNLQEILHGAVRFSNNPALCNVESIQWRDIVSSDFLSNMSMDFQNHLGSCQKCDPSCPNGSCWGAGEENCQKLTKIICAQQCSGRCRGKSPSDCCHNQCAAGCTGPRESDCLVCRKFRDEATCKDTCPPLMLYNPTTYQMDVNPEGKYSFGATCVKKCPRNYVVTDHGSCVRACGADSYEMEEDGVRKCKKCEGPCRKVCNGIGIGEFKDSLSINATNIKHFKNCTSISGDLHILPVAFRGDSFTHTPPLDPQELDILKTVKEITGFLLIQAWPENRTDLHAFENLEIIRGRTKQHGQFSLAVVSLNITSLGLRSLKEISDGDVIISGNKNLCYANTINWKKLFGTSGQKTKIISNRGENSCKATGQVCHALCSPEGCWGPEPRDCVSCRNVSRGRECVDKCNLLEGEPREFVENSECIQCHPECLPQAMNITCTGRGPDNCIQCAHYIDGPHCVKTCPAGVMGENNTLVWKYADAGHVCHLCHPNCTYGCTGPGLEGCPTNGPKIPSIATGMVGALLLLLVVALGIGLFMRRRHIVRKRTLRRLLQERELVEPLTPSGEAPNQALLRILKETEFKKIKVLGSGAFGTVYKGLWIPEGEKVKIPVAIKELREATSPKANKEILDEAYVMASVDNPHVCRLLGICLTSTVQLITQLMPFGCLLDYVREHKDNIGSQYLLNWCVQIAKGMNYLEDRRLVHRDLAARNVLVKTPQHVKITDFGLAKLLGAEEKEYHAEGGKVPIKWMALESILHRIYTHQSDVWSYGVTVWELMTFGSKPYDGIPASEISSILEKGERLPQPPICTIDVYMIMVKCWMIDADSRPKFRELIIEFSKMARDPQRYLVIQGDERMHLPSPTDSNFYRALMDEEDMDDVVDADEYLIPQQGFFSSPSTSRTPLLSSLSATSNNSTVACIDRNGLQSCPIKEDSFLQRYSSDPTGALTEDSIDDTFLPVPEYINQSVPKRPAGSVQNPVYHNQPLNPAPSRDPHYQDPHSTAVGNPEYLNTVQPTCVNSTFDSPAHWAQKGSHQISLDNPDYQQDFFPKEAKPNGIFKGSTAENAEYLRVAPQSSEFIGA,egfr,51
|
53 |
+
MPEIVDTCSLASPASVCRTKHLHLRCSVDFTRRTLTGTAALTVQSQEDNLRSLVLDTKDLTIEKVVINGQEVKYALGERQSYKGSPMEISLPIALSKNQEIVIEISFETSPKSSALQWLTPEQTSGKEHPYLFSQCQAIHCRAILPCQDTPSVKLTYTAEVSVPKELVALMSAIRDGETPDPEDPSRKIYKFIQKVPIPCYLIALVVGALESRQIGPRTLVWSEKEQVEKSAYEFSETESMLKIAEDLGGPYVWGQYDLLVLPPSFPYGGMENPCLTFVTPTLLAGDKSLSNVIAHEISHSWTGNLVTNKTWDHFWLNEGHTVYLERHICGRLFGEKFRHFNALGGWGELQNSVKTFGETHPFTKLVVDLTDIDPDVAYSSVPYEKGFALLFYLEQLLGGPEIFLGFLKAYVEKFSYKSITTDDWKDFLYSYFKDKVDVLNQVDWNAWLYSPGLPPIKPNYDMTLTNACIALSQRWITAKEDDLNSFNATDLKDLSSHQLNEFLAQTLQRAPLPLGHIKRMQEVYNFNAINNSEIRFRWLRLCIQSKWEDAIPLALKMATEQGRMKFTRPLFKDLAAFDKSHDQAVRTYQEHKASMHPVTAMLVGKDLKVD,lkha4,52
|
54 |
+
MSLHFLYYCSEPTLDVKIAFCQGFDKQVDVSYIAKHYNMSKSKVDNQFYSVEVGDSTFTVLKRYQNLKPIGSGAQGIVCAAYDAVLDRNVAIKKLSRPFQNQTHAKRAYRELVLMKCVNHKNIISLLNVFTPQKTLEEFQDVYLVMELMDANLCQVIQMELDHERMSYLLYQMLCGIKHLHSAGIIHRDLKPSNIVVKSDCTLKILDFGLARTAGTSFMMTPYVVTRYYRAPEVILGMGYKENVDIWSVGCIMGEMVRHKILFPGRDYIDQWNKVIEQLGTPCPEFMKKLQPTVRNYVENRPKYAGLTFPKLFPDSLFPADSEHNKLKASQARDLLSKMLVIDPAKRISVDDALQHPYINVWYDPAEVEAPPPQIYDKQLDEREHTIEEWKELIYKEVMNSEEKTKNGVVKGQPSPSGAAVNSSESLPPSSSVNDISSMSTDQTLASDTDSSLEASAGPLGCCR,mk10,53
|
55 |
+
MLSRLFRMHGLFVASHPWEVIVGTVTLTICMMSMNMFTGNNKICGWNYECPKFEEDVLSSDIIILTITRCIAILYIYFQFQNLRQLGSKYILGIAGLFTIFSSFVFSTVVIHFLDKELTGLNEALPFFLLLIDLSRASTLAKFALSSNSQDEVRENIARGMAILGPTFTLDALVECLVIGVGTMSGVRQLEIMCCFGCMSVLANYFVFMTFFPACVSLVLELSRESREGRPIWQLSHFARVLEEEENKPNPVTQRVKMIMSLGLVLVHAHSRWIADPSPQNSTADTSKVSLGLDENVSKRIEPSVSLWQFYLSKMISMDIEQVITLSLALLLAVKYIFFEQTETESTLSLKNPITSPVVTQKKVPDNCCRREPMLVRNNQKCDSVEEETGINRERKVEVIKPLVAETDTPNRATFVVGNSSLLDTSSVLVTQEPEIELPREPRPNEECLQILGNAEKGAKFLSDAEIIQLVNAKHIPAYKLETLMETHERGVSIRRQLLSKKLSEPSSLQYLPYRDYNYSLVMGACCENVIGYMPIPVGVAGPLCLDEKEFQVPMATTEGCLVASTNRGCRAIGLGGGASSRVLADGMTRGPVVRLPRACDSAEVKAWLETSEGFAVIKEAFDSTSRFARLQKLHTSIAGRNLYIRFQSRSGDAMGMNMISKGTEKALSKLHEYFPEMQILAVSGNYCTDKKPAAINWIEGRGKSVVCEAVIPAKVVREVLKTTTEAMIEVNINKNLVGSAMAGSIGGYNAHAANIVTAIYIACGQDAAQNVGSSNCITLMEASGPTNEDLYISCTMPSIEIGTVGGGTNLLPQQACLQMLGVQGACKDNPGENARQLARIVCGTVMAGELSLMAALAAGHLVKSHMIHNRSKINLQDLQGACTKKTA,hmdh,54
|
56 |
+
MKTPWKVLLGLLGAAALVTIITVPVVLLNKGTDDATADSRKTYTLTDYLKNTYRLKLYSLRWISDHEYLYKQENNILVFNAEYGNSSVFLENSTFDEFGHSINDYSISPDGQFILLEYNYVKQWRHSYTASYDIYDLNKRQLITEERIPNNTQWVTWSPVGHKLAYVWNNDIYVKIEPNLPSYRITWTGKEDIIYNGITDWVYEEEVFSAYSALWWSPNGTFLAYAQFNDTEVPLIEYSFYSDESLQYPKTVRVPYPKAGAVNPTVKFFVVNTDSLSSVTNATSIQITAPASMLIGDHYLCDVTWATQERISLQWLRRIQNYSVMDICDYDESSGRWNCLVARQHIEMSTTGWVGRFRPSEPHFTLDGNSFYKIISNEEGYRHICYFQIDKKDCTFITKGTWEVIGIEALTSDYLYYISNEYKGMPGGRNLYKIQLSDYTKVTCLSCELNPERCQYYSVSFSKEAKYYQLRCSGPGLPLYTLHSSVNDKGLRVLEDNSALDKMLQNVQMPSKKLDFIILNETKFWYQMILPPHFDKSKKYPLLLDVYAGPCSQKADTVFRLNWATYLASTENIIVASFDGRGSGYQGDKIMHAINRRLGTFEVEDQIEAARQFSKMGFVDNKRIAIWGWSYGGYVTSMVLGSGSGVFKCGIAVAPVSRWEYYDSVYTERYMGLPTPEDNLDHYRNSTVMSRAENFKQVEYLLIHGTADDNVHFQQSAQISKALVDVGVDFQAMWYTDEDHGIASSTAHQHIYTHMSHFIKQCFSLP,dpp4,55
|
57 |
+
MDSKESLTPGREENPSSVLAQERGDVMDFYKTLRGGATVKVSASSPSLAVASQSDSKQRRLLVDFPKGSVSNAQQPDLSKAVSLSMGLYMGETETKVMGNDLGFPQQGQISLSSGETDLKLLEESIANLNRSTSVPENPKSSASTAVSAAPTEKEFPKTHSDVSSEQQHLKGQTGTNGGNVKLYTTDQSTFDILQDLEFSSGSPGKETNESPWRSDLLIDENCLLSPLAGEDDSFLLEGNSNEDCKPLILPDTKPKIKDNGDLVLSSPSNVTLPQVKTEKEDFIELCTPGVIKQEKLGTVYCQASFPGANIIGNKMSAISVHGVSTSGGQMYHYDMNTASLSQQQDQKPIFNVIPPIPVGSENWNRCQGSGDDNLTSLGTLNFPGRTVFSNGYSSPSMRPDVSSPPSSSSTATTGPPPKLCLVCSDEASGCHYGVLTCGSCKVFFKRAVEGQHNYLCAGRNDCIIDKIRRKNCPACRYRKCLQAGMNLEARKTKKKIKGIQQATTGVSQETSENPGNKTIVPATLPQLTPTLVSLLEVIEPEVLYAGYDSSVPDSTWRIMTTLNMLGGRQVIAAVKWAKAIPGFRNLHLDDQMTLLQYSWMFLMAFALGWRSYRQSSANLLCFAPDLIINEQRMTLPCMYDQCKHMLYVSSELHRLQVSYEEYLCMKTLLLLSSVPKDGLKSQELFDEIRMTYIKELGKAIVKREGNSSQNWQRFYQLTKLLDSMHEVVENLLNYCFQTFLDKTMSIEFPEMLAEIITNQIPKYSNGNIKKLLFHQK,gcr,56
|
58 |
+
MGETLGDSPIDPESDSFTDTLSANISQEMTMVDTEMPFWPTNFGISSVDLSVMEDHSHSFDIKPFTTVDFSSISTPHYEDIPFTRTDPVVADYKYDLKLQEYQSAIKVEPASPPYYSEKTQLYNKPHEEPSNSLMAIECRVCGDKASGFHYGVHACEGCKGFFRRTIRLKLIYDRCDLNCRIHKKSRNKCQYCRFQKCLAVGMSHNAIRFGRMPQAEKEKLLAEISSDIDQLNPESADLRALAKHLYDSYIKSFPLTKAKARAILTGKTTDKSPFVIYDMNSLMMGEDKIKFKHITPLQEQSKEVAIRIFQGCQFRSVEAVQEITEYAKSIPGFVNLDLNDQVTLLKYGVHEIIYTMLASLMNKDGVLISEGQGFMTREFLKSLRKPFGDFMEPKFEFAVKFNALELDDSDLAIFIAVIILSGDRPGLLNVKPIEDIQDNLLQALELQLKLNHPESSQLFAKLLQKMTDLRQIVTEHVQLLQVIKKTETDMSLHPLLQEIYKDLY,pparg,57
|
59 |
+
MEFSSPSREECPKPLSRVSIMAGSLTGLLLLQAVSWASGARPCIPKSFGYSSVVCVCNATYCDSFDPPTFPALGTFSRYESTRSGRRMELSMGPIQANHTGTGLLLTLQPEQKFQKVKGFGGAMTDAAALNILALSPPAQNLLLKSYFSEEGIGYNIIRVPMASCDFSIRTYTYADTPDDFQLHNFSLPEEDTKLKIPLIHRALQLAQRPVSLLASPWTSPTWLKTNGAVNGKGSLKGQPGDIYHQTWARYFVKFLDAYAEHKLQFWAVTAENEPSAGLLSGYPFQCLGFTPEHQRDFIARDLGPTLANSTHHNVRLLMLDDQRLLLPHWAKVVLTDPEAAKYVHGIAVHWYLDFLAPAKATLGETHRLFPNTMLFASEACVGSKFWEQSVRLGSWDRGMQYSHSIITNLLYHVVGWTDWNLALNPEGGPNWVRNFVDSPIIVDITKDTFYKQPMFYHLGHFSKFIPEGSQRVGLVASQKNDLDAVALMHPDGSAVVVVLNRSSKDVPLTIKDPAVGFLETISPGYSIHTYLWRRQ,glcm,58
|
60 |
+
MEDHMFGVQQIQPNVISVRLFKRKVGGLGFLVKERVSKPPVIISDLIRGGAAEQSGLIQAGDIILAVNGRPLVDLSYDSALEVLRGIASETHVVLILRGPEGFTTHLETTFTGDGTPKTIRVTQPLGPPTKAVDLSHQPPAGKEQPLAVDGASGPGNGPQHAYDDGQEAGSLPHANGLAPRPPGQDPAKKATRVSLQGRGENNELLKEIEPVLSLLTSGSRGVKGGAPAKAEMKDMGIQVDRDLDGKSHKPLPLGVENDRVFNDLWGKGNVPVVLNNPYSEKEQPPTSGKQSPTKNGSPSKCPRFLKVKNWETEVVLTDTLHLKSTLETGCTEYICMGSIMHPSQHARRPEDVRTKGQLFPLAKEFIDQYYSSIKRFGSKAHMERLEEVNKEIDTTSTYQLKDTELIYGAKHAWRNASRCVGRIQWSKLQVFDARDCTTAHGMFNYICNHVKYATNKGNLRSAITIFPQRTDGKHDFRVWNSQLIRYAGYKQPDGSTLGDPANVQFTEICIQQGWKPPRGRFDVLPLLLQANGNDPELFQIPPELVLEVPIRHPKFEWFKDLGLKWYGLPAVSNMLLEIGGLEFSACPFSGWYMGTEIGVRDYCDNSRYNILEEVAKKMNLDMRKTSSLWKDQALVEINIAVLYSFQSDKVTIVDHHSATESFIKHMENEYRCRGGCPADWVWIVPPMSGSITPVFHQEMLNYRLTPSFEYQPDPWNTHVWKGTNGTPTKRRAIGFKKLAEAVKFSAKLMGQAMAKRVKATILYATETGKSQAYAKTLCEIFKHAFDAKVMSMEEYDIVHLEHETLVLVVTSTFGNGDPPENGEKFGCALMEMRHPNSVQEERKSYKVRFNSVSSYSDSQKSSGDGPDLRDNFESAGPLANVRFSVFGLGSRAYPHFCAFGHAVDTLLEELGGERILKMREGDELCGQEEAFRTWAKKVFKAACDVFCVGDDVNIEKANNSLISNDRSWKRNKFRLTFVAEAPELTQGLSNVHKKRVSAARLLSRQNLQSPKSSRSTIFVRLHTNGSQELQYQPGDHLGVFPGNHEDLVNALIERLEDAPPVNQMVKVELLEERNTALGVISNWTDELRLPPCTIFQAFKYYLDITTPPTPLQLQQFASLATSEKEKQRLLVLSKGLQEYEEWKWGKNPTIVEVLEEFPSIQMPATLLLTQLSLLQPRYYSISSSPDMYPDEVHLTVAIVSYRTRDGEGPIHHGVCSSWLNRIQADELVPCFVRGAPSFHLPRNPQVPCILVGPGTGIAPFRSFWQQRQFDIQHKGMNPCPMVLVFGCRQSKIDHIYREETLQAKNKGVFRELYTAYSREPDKPKKYVQDILQEQLAESVYRALKEQGGHIYVCGDVTMAADVLKAIQRIMTQQGKLSAEDAGVFISRMRDDNRYHEDIFGVTLRTYEVTNRLRSESIAFIEESKKDTDEVFSS,nos1,59
|
61 |
+
MDIKNSPSSLNSPSSYNCSQSILPLEHGSIYIPSSYVDSHHEYPAMTFYSPAVMNYSIPSNVTNLEGGPGRQTTSPNVLWPTPGHLSPLVVHRQLSHLYAEPQKSPWCEARSLEHTLPVNRETLKRKVSGNRCASPVTGPGSKRDAHFCAVCSDYASGYHYGVWSCEGCKAFFKRSIQGHNDYICPATNQCTIDKNRRKSCQACRLRKCYEVGMVKCGSRRERCGYRLVRRQRSADEQLHCAGKAKRSGGHAPRVRELLLDALSPEQLVLTLLEAEPPHVLISRPSAPFTEASMMMSLTKLADKELVHMISWAKKIPGFVELSLFDQVRLLESCWMEVLMMGLMWRSIDHPGKLIFAPDLVLDRDEGKCVEGILEIFDMLLATTSRFRELKLQHKEYLCVKAMILLNSSMYPLVTATQDADSSRKLAHLLNAVTDALVWVIAKSGISSQQQSMRLANLLMLLSHVRHASNKGMEHLLNMKCKNVVPVYDLLLEMLNAHVLRGCKSSITGSECSPAEDSKSKEGSQNPQSQ,esr2,60
|
62 |
+
MSNKCDVVVVGGGISGMAAAKLLHDSGLNVVVLEARDRVGGRTYTLRNQKVKYVDLGGSYVGPTQNRILRLAKELGLETYKVNEVERLIHHVKGKSYPFRGPFPPVWNPITYLDHNNFWRTMDDMGREIPSDAPWKAPLAEEWDNMTMKELLDKLCWTESAKQLATLFVNLCVTAETHEVSALWFLWYVKQCGGTTRIISTTNGGQERKFVGGSGQVSERIMDLLGDRVKLERPVIYIDQTRENVLVETLNHEMYEAKYVISAIPPTLGMKIHFNPPLPMMRNQMITRVPLGSVIKCIVYYKEPFWRKKDYCGTMIIDGEEAPVAYTLDDTKPEGNYAAIMGFILAHKARKLARLTKEERLKKLCELYAKVLGSLEALEPVHYEEKNWCEEQYSGGCYTTYFPPGILTQYGRVLRQPVDRIYFAGTETATHWSGYMEGAVEAGERAAREILHAMGKIPEDEIWQSEPESVDVPAQPITTTFLERHLPSVPGLLRLIGLTTIFSATALGFLAHKRGLLVRV,aofb,61
|
63 |
+
MNPNQKILCTSATALVIGTIAVLIGITNLGLNIGLHLKPSCNCSHSQPEATNASQTIINNYYNDTNITQISNTNIQVEERAIRDFNNLTKGLCTINSWHIYGKDNAVRIGEDSDVLVTREPYVSCDPDECRFYALSQGTTIRGKHSNGTIHDRSQYRALISWPLSSPPTVYNSRVECIGWSSTSCHDGKTRMSICISGPNNNASAVIWYNRRPVTEINTWARNILRTQESECVCHNGVCPVVFTDGSATGPAETRIYYFKEGKILKWEPLAGTAKHIEECSCYGERAEITCTCRDNWQGSNRPVIRIDPVAMTHTSQYICSPVLTDNPRPNDPTVGKCNDPYPGNNNNGVKGFSYLDGVNTWLGRTISIASRSGYEMLKVPNALTDDKSKPTQGQTIVLNTDWSGYSGSFMDYWAEGECYRACFYVELIRGRPKEDKVWWTSNSIVSMCSSTEFLGQWDWPDGAKIEYFL,nram,62
|
64 |
+
MLARALLLCAVLALSHTANPCCSHPCQNRGVCMSVGFDQYKCDCTRTGFYGENCSTPEFLTRIKLFLKPTPNTVHYILTHFKGFWNVVNNIPFLRNAIMSYVLTSRSHLIDSPPTYNADYGYKSWEAFSNLSYYTRALPPVPDDCPTPLGVKGKKQLPDSNEIVEKLLLRRKFIPDPQGSNMMFAFFAQHFTHQFFKTDHKRGPAFTNGLGHGVDLNHIYGETLARQRKLRLFKDGKMKYQIIDGEMYPPTVKDTQAEMIYPPQVPEHLRFAVGQEVFGLVPGLMMYATIWLREHNRVCDVLKQEHPEWGDEQLFQTSRLILIGETIKIVIEDYVQHLSGYHFKLKFDPELLFNKQFQYQNRIAAEFNTLYHWHPLLPDTFQIHDQKYNYQQFIYNNSILLEHGITQFVESFTRQIAGRVAGGRNVPPAVQKVSQASIDQSRQMKYQSFNEYRKRFMLKPYESFEELTGEKEMSAELEALYGDIDAVELYPALLVEKPRPDAIFGETMVEVGAPFSLKGLMGNVICSPAYWKPSTFGGEVGFQIINTASIQSLICNNVKGCPFTSFSVPDPELIKTVTINASSSRSGLDDINPTVLLKERSTEL,pgh2,63
|
65 |
+
MAAAAAAGAGPEMVRGQVFDVGPRYTNLSYIGEGAYGMVCSAYDNVNKVRVAIKKISPFEHQTYCQRTLREIKILLRFRHENIIGINDIIRAPTIEQMKDVYIVQDLMETDLYKLLKTQHLSNDHICYFLYQILRGLKYIHSANVLHRDLKPSNLLLNTTCDLKICDFGLARVADPDHDHTGFLTEYVATRWYRAPEIMLNSKGYTKSIDIWSVGCILAEMLSNRPIFPGKHYLDQLNHILGILGSPSQEDLNCIINLKARNYLLSLPHKNKVPWNRLFPNADSKALDLLDKMLTFNPHKRIEVEQALAHPYLEQYYDPSDEPIAEAPFKFDMELDDLPKEKLKELIFEETARFQPGYRS,mk01,64
|
66 |
+
MSTGDSFETRFEKMDNLLRDPKSEVNSDCLLDGLDALVYDLDFPALRKNKNIDNFLSRYKDTINKIRDLRMKAEDYEVVKVIGRGAFGEVQLVRHKSTRKVYAMKLLSKFEMIKRSDSAFFWEERDIMAFANSPWVVQLFYAFQDDRYLYMVMEYMPGGDLVNLMSNYDVPEKWARFYTAEVVLALDAIHSMGFIHRDVKPDNMLLDKSGHLKLADFGTCMKMNKEGMVRCDTAVGTPDYISPEVLKSQGGDGYYGRECDWWSVGVFLYEMLVGDTPFYADSLVGTYSKIMNHKNSLTFPDDNDISKEAKNLICAFLTDREVRLGRNGVEEIKRHLFFKNDQWAWETLRDTVAPVVPDLSSDIDTSNFDDLEEDKGEEETFPIPKAFVGNQLPFVGFTYYSNRRYLSSANPNDNRTSSNADKSLQESLQKTIYKLEEQLHNEMQLKDEMEQKCRTSNIKLDKIMKELDEEGNQRRNLESTVSQIEKEKMLLQHRINEYQRKAEQENEKRRNVENEVSTLKDQLEDLKKVSQNSQLANEKLSQLQKQLEEANDLLRTESDTAVRLRKSHTEMSKSISQLESLNRELQERNRILENSKSQTDKDYYQLQAILEAERRDRGHDSEMIGDLQARITSLQEEVKHLKHNLEKVEGERKEAQDMLNHSEKEKNNLEIDLNYKLKSLQQRLEQEVNEHKVTKARLTDKHQSIEEAKSVAMCEMEKKLKEEREAREKAENRVVQIEKQCSMLDVDLKQSQQKLEHLTGNKERMEDEVKNLTLQLEQESNKRLLLQNELKTQAFEADNLKGLEKQMKQEINTLLEAKRLLEFELAQLTKQYRGNEGQMRELQDQLEAEQYFSTLYKTQVKELKEEIEEKNRENLKKIQELQNEKETLATQLDLAETKAESEQLARGLLEEQYFELTQESKKAASRNRQEITDKDHTVSRLEEANSMLTKDIEILRRENEELTEKMKKAEEEYKLEKEEEISNLKAAFEKNINTERTLKTQAVNKLAEIMNRKDFKIDRKKANTQDLRKKEKENRKLQLELNQEREKFNQMVVKHQKELNDMQAQLVEECAHRNELQMQLASKESDIEQLRAKLLDLSDSTSVASFPSADETDGNLPESRIEGWLSVPNRGNIKRYGWKKQYVVVSSKKILFYNDEQDKEQSNPSMVLDIDKLFHVRPVTQGDVYRAETEEIPKIFQILYANEGECRKDVEMEPVQQAEKTNFQNHKGHEFIPTLYHFPANCDACAKPLWHVFKPPPALECRRCHVKCHRDHLDKKEDLICPCKVSYDVTSARDMLLLACSQDEQKKWVTHLVKKIPKNPPSGFVRASPRTLSTRSTANQSFRKVVKNTSGKTS,rock1,65
|
67 |
+
MGVQVETISPGDGRTFPKRGQTCVVHYTGMLEDGKKFDSSRDRNKPFKFMLGKQEVIRGWEEGVAQMSVGQRAKLTISPDYAYGATGHPGIIPPHATLVFDVELLKLE,fkb1a,66
|
68 |
+
MENTENSVDSKSIKNLEPKIIHGSESMDSGISLDNSYKMDYPEMGLCIIINNKNFHKSTGMTSRSGTDVDAANLRETFRNLKYEVRNKNDLTREEIVELMRDVSKEDHSKRSSFVCVLLSHGEEGIIFGTNGPVDLKKITNFFRGDRCRSLTGKPKLFIIQACRGTELDCGIETDSGVDDDMACHKIPVEADFLYAYSTAPGYYSWRNSKDGSWFIQSLCAMLKQYADKLEFMHILTRVNRKVATEFESFSFDATFHAKKQIPCIVSMLTKELYFYH,casp3,67
|
69 |
+
MSQERPTFYRQELNKTIWEVPERYQNLSPVGSGAYGSVCAAFDTKTGLRVAVKKLSRPFQSIIHAKRTYRELRLLKHMKHENVIGLLDVFTPARSLEEFNDVYLVTHLMGADLNNIVKCQKLTDDHVQFLIYQILRGLKYIHSADIIHRDLKPSNLAVNEDCELKILDFGLARHTDDEMTGYVATRWYRAPEIMLNWMHYNQTVDIWSVGCIMAELLTGRTLFPGTDHIDQLKLILRLVGTPGAELLKKISSESARNYIQSLTQMPKMNFANVFIGANPLAVDLLEKMLVLDSDKRITAAQALAHAYFAQYHDPDDEPVADPYDQSFESRDLLIDEWKSLTYDEVISFVPPPLDQEEMES,mk14,68
|
70 |
+
MSFLSRQQPPPPRRAGAACTLRQKLIFSPCSDCEEEEEEEEEEGSGHSTGEDSAFQEPDSPLPPARSPTEPGPERRRSPGPAPGSPGELEEDLLLPGACPGADEAGGGAEGDSWEEEGFGSSSPVKSPAAPYFLGSSFSPVRCGGPGDASPRGCGARRAGEGRRSPRPDHPGTPPHKTFRKLRLFDTPHTPKSLLSKARGIDSSSVKLRGSSLFMDTEKSGKREFDVRQTPQVNINPFTPDSLLLHSSGQCRRRKRTYWNDSCGEDMEASDYELEDETRPAKRITITESNMKSRYTTEFHELEKIGSGEFGSVFKCVKRLDGCIYAIKRSKKPLAGSVDEQNALREVYAHAVLGQHSHVVRYFSAWAEDDHMLIQNEYCNGGSLADAISENYRIMSYFKEAELKDLLLQVGRGLRYIHSMSLVHMDIKPSNIFISRTSIPNAASEEGDEDDWASNKVMFKIGDLGHVTRISSPQVEEGDSRFLANEVLQENYTHLPKADIFALALTVVCAAGAEPLPRNGDQWHEIRQGRLPRIPQVLSQEFTELLKVMIHPDPERRPSAMALVKHSVLLSASRKSAEQLRIELNAEKFKNSLLQKELKKAQMAKAAAEERALFTDRMATRSTTQSNRTSRLIGKKMNRSVSLTIY,wee1,69
|
71 |
+
MAAAYLDPNLNHTPNSSTKTHLGTGMERSPGAMERVLKVFHYFESNSEPTTWASIIRHGDATDVRGIIQKIVDSHKVKHVACYGFRLSHLRSEEVHWLHVDMGVSSVREKYELAHPPEEWKYELRIRYLPKGFLNQFTEDKPTLNFFYQQVKSDYMLEIADQVDQEIALKLGCLEIRRSYWEMRGNALEKKSNYEVLEKDVGLKRFFPKSLLDSVKAKTLRKLIQQTFRQFANLNREESILKFFEILSPVYRFDKECFKCALGSSWIISVELAIGPEEGISYLTDKGCNPTHLADFTQVQTIQYSNSEDKDRKGMLQLKIAGAPEPLTVTAPSLTIAENMADLIDGYCRLVNGTSQSFIIRPQKEGERALPSIPKLANSEKQGMRTHAVSVSETDDYAEIIDEEDTYTMPSTRDYEIQRERIELGRCIGEGQFGDVHQGIYMSPENPALAVAIKTCKNCTSDSVREKFLQEALTMRQFDHPHIVKLIGVITENPVWIIMELCTLGELRSFLQVRKYSLDLASLILYAYQLSTALAYLESKRFVHRDIAARNVLVSSNDCVKLGDFGLSRYMEDSTYYKASKGKLPIKWMAPESINFRRFTSASDVWMFGVCMWEILMHGVKPFQGVKNNDVIGRIENGERLPMPPNCPPTLYSLMTKCWAYDPSRRPRFTELKAQLSTILEEEKAQQEERMRMESRRQATVSWDSGGSDEAPPKPSRPGYPSPRSSEGFYPSPQHMVQTNHYQVSGYPGSHGITAMAGSIYPGQASLLDQTDSWNHRPQEIAMWQPNVEDSTVLDLRGIGQVLPTHLMEERLIRQQQEMEEDQRWLEKEERFLKPDVRLSRGSIDREDGSLQGPIGNQHIYQPVGKPDPAAPPKKPPRPGAPGHLGSLASLSSPADSYNEGVKLQPQEISPPPTANLDRSNDKVYENVTGLVKAVIEMSSKIQPAPPEEYVPMVKEVGLALRTLLATVDETIPLLPASTHREIEMAQKLLNSDLGELINKMKLAQQYVMTSLQQEYKKQMLTAAHALAVDAKNLLDVIDQARLKMLGQTRPH,fak1,70
|
72 |
+
MHPGVLAAFLFLSWTHCRALPLPSGGDEDDLSEEDLQFAERYLRSYYHPTNLAGILKENAASSMTERLREMQSFFGLEVTGKLDDNTLDVMKKPRCGVPDVGEYNVFPRTLKWSKMNLTYRIVNYTPDMTHSEVEKAFKKAFKVWSDVTPLNFTRLHDGIADIMISFGIKEHGDFYPFDGPSGLLAHAFPPGPNYGGDAHFDDDETWTSSSKGYNLFLVAAHEFGHSLGLDHSKDPGALMFPIYTYTGKSHFMLPDDDVQGIQSLYGPGDEDPNPKHPKTPDKCDPSLSLDAITSLRGETMIFKDRFFWRLHPQQVDAELFLTKSFWPELPNRIDAAYEHPSHDLIFIFRGRKFWALNGYDILEGYPKKISELGLPKEVKKISAAVHFEDTGKTLLFSGNQVWRYDDTNHIMDKDYPRLIEEDFPGIGDKVDAVYEKNGYIYFFNGPIQFEYSIWSNRIVRVMPANSILWC,mmp13,71
|
73 |
+
PISPIEPVPVKLKPGMDGPKVKQWPLTEEKIKALVEICTEMEKEGKISKIGPENPYNTPVFAIKKKDSTRWRKLVDFRELNKRTQDFWEVQLGIPHPAGLKKKRSVTVLDVGDAYFSVPLDKEFRKYTAFTIPSINNETPGIRYQYNVLPQGWKGSPAIFQSSMTKILEPFRKQNPDIVIYQYMDDLYVGSDLEIGQHRTKIEELRQHLLKWGFTTPDKKHQKEPPFLWMGYEHHPDKWTVQPIVLPEKDSWTVNDIQK,hivrt,72
|
74 |
+
MASRLLLNNGAKMPILGLGTWKSPPGQVTEAVKVAIDVGYRHIDCAHVYQNENEVGVAIQEKLREQVVKREELFIVSKLWCTYHEKGLVKGACQKTLSDLKLDYLDLYLIHWPTGFKPGKEFFPLDESGNVVPSDTNILDTWAAMEELVDEGLVKAIGISNFNHLQVEMILNKPGLKYKPAVNQIECHPYLTQEKLIQYCQSKGIVVTAYSPLGSPDRPWAKPEDPSLLEDPRIKAIAAKHNKTTAQVLIRFPMQRNLVVIPKSVTPERIAENFKVFDFELSSQDMTTLLSYNRNWRVCALLSCTSHKDYPFHEEF,aldr,73
|
75 |
+
MVGSLNCIVAVSQNMGIGKNGDLPWPPLRNEFRYFQRMTTTSSVEGKQNLVIMGKKTWFSIPEKNRPLKGRINLVLSRELKEPPQGAHFLSRSLDDALKLTEQPELANKVDMVWIVGGSSVYKEAMNHPGHLKLFVTRIMQDFESDTFFPEIDLEKYKLLPEYPGVLSDVQEEKGIKYKFEVYEKND,dyr,74
|
76 |
+
MEMEKEFEQIDKSGSWAAIYQDIRHEASDFPCRVAKLPKNKNRNRYRDVSPFDHSRIKLHQEDNDYINASLIKMEEAQRSYILTQGPLPNTCGHFWEMVWEQKSRGVVMLNRVMEKGSLKCAQYWPQKEEKEMIFEDTNLKLTLISEDIKSYYTVRQLELENLTTQETREILHFHYTTWPDFGVPESPASFLNFLFKVRESGSLSPEHGPVVVHCSAGIGRSGTFCLADTCLLLMDKRKDPSSVDIKKVLLEMRKFRMGLIQTADQLRFSYLAVIEGAKFIMGDSSVQDQWKELSHEDLEPPPEHIPPPPRPPKRILEPHNGKCREFFPNHQWVKEETQEDKDCPIKEEKGSPLNAAPYGIESMSQDTEVRSRVVGGSLRGAQAASPAKGEPSLPEKDEDHALSYWKPFLVNMCVATVLTAGAYLCYRFLFNSNT,ptn1,75
|
77 |
+
MAQTPAFDKPKVELHVHLDGSIKPETILYYGRRRGIALPANTAEGLLNVIGMDKPLTLPDFLAKFDYYMPAIAGCREAIKRIAYEFVEMKAKEGVVYVEVRYSPHLLANSKVEPIPWNQAEGDLTPDEVVALVGQGLQEGERDFGVKARSILCCMRHQPNWSPKVVELCKKYQQQTVVAIDLAGDETIPGSSLLPGHVQAYQEAVKSGIHRTVHAGEVGSAEVVKEAVDILKTERLGHGYHTLEDQALYNRLRQENMHFEICPWSSYLTGAWKPDTEHAVIRLKNDQANYSLNTDDPLIFKSTLDTDYQMTKRDMGFTEEEFKRLNINAAKSSFLPEDEKRELLDLLYKAYGMPPSASAGQNL,ada,76
|
78 |
+
MGCGCSSHPEDDWMENIDVCENCHYPIVPLDGKGTLLIRNGSEVRDPLVTYEGSNPPASPLQDNLVIALHSYEPSHDGDLGFEKGEQLRILEQSGEWWKAQSLTTGQEGFIPFNFVAKANSLEPEPWFFKNLSRKDAERQLLAPGNTHGSFLIRESESTAGSFSLSVRDFDQNQGEVVKHYKIRNLDNGGFYISPRITFPGLHELVRHYTNASDGLCTRLSRPCQTQKPQKPWWEDEWEVPRETLKLVERLGAGQFGEVWMGYYNGHTKVAVKSLKQGSMSPDAFLAEANLMKQLQHQRLVRLYAVVTQEPIYIITEYMENGSLVDFLKTPSGIKLTINKLLDMAAQIAEGMAFIEERNYIHRDLRAANILVSDTLSCKIADFGLARLIEDNEYTAREGAKFPIKWTAPEAINYGTFTIKSDVWSFGILLTEIVTHGRIPYPGMTNPEVIQNLERGYRMVRPDNCPEELYQLMRLCWKERPEDRPTFDYLRSVLEDFFTATEGQYQPQP,lck,77
|
79 |
+
MGRPLHLVLLSASLAGLLLLGESLFIRREQANNILARVTRANSFLEEMKKGHLERECMEETCSYEEAREVFEDSDKTNEFWNKYKDGDQCETSPCQNQGKCKDGLGEYTCTCLEGFEGKNCELFTRKLCSLDNGDCDQFCHEEQNSVVCSCARGYTLADNGKACIPTGPYPCGKQTLERRKRSVAQATSSSGEAPDSITWKPYDAADLDPTENPFDLLDFNQTQPERGDNNLTRIVGGQECKDGECPWQALLINEENEGFCGGTILSEFYILTAAHCLYQAKRFKVRVGDRNTEQEEGGEAVHEVEVVIKHNRFTKETYDFDIAVLRLKTPITFRMNVAPACLPERDWAESTLMTQKTGIVSGFGRTHEKGRQSTRLKMLEVPYVDRNSCKLSSSFIITQNMFCAGYDTKQEDACQGDSGGPHVTRFKDTYFVTGIVSWGEGCARKGKYGIYTKVTAFLKWIDRSMKTRGLPKAKSHAPEVITSSPLK,fa10,78
|
80 |
+
MNEVSVIKEGWLHKRGEYIKTWRPRYFLLKSDGSFIGYKERPEAPDQTLPPLNNFSVAECQLMKTERPRPNTFVIRCLQWTTVIERTFHVDSPDEREEWMRAIQMVANSLKQRAPGEDPMDYKCGSPSDSSTTEEMEVAVSKARAKVTMNDFDYLKLLGKGTFGKVILVREKATGRYYAMKILRKEVIIAKDEVAHTVTESRVLQNTRHPFLTALKYAFQTHDRLCFVMEYANGGELFFHLSRERVFTEERARFYGAEIVSALEYLHSRDVVYRDIKLENLMLDKDGHIKITDFGLCKEGISDGATMKTFCGTPEYLAPEVLEDNDYGRAVDWWGLGVVMYEMMCGRLPFYNQDHERLFELILMEEIRFPRTLSPEAKSLLAGLLKKDPKQRLGGGPSDAKEVMEHRFFLSINWQDVVQKKLLPPFKPQVTSEVDTRYFDDEFTAQSITITPPDRYDSLGLLELDQRTHFPQFSYSASIRE,akt2,79
|
81 |
+
MESKALLAVALWFCVETRAASVGLPGDFLHPPKLSTQKDILTILANTTLQITCRGQRDLDWLWPNAQRDSEERVLVTECGGGDSIFCKTLTIPRVVGNDTGAYKCSYRDVDIASTVYVYVRDYRSPFIASVSDQHGIVYITENKNKTVVIPCRGSISNLNVSLCARYPEKRFVPDGNRISWDSEIGFTLPSYMISYAGMVFCEAKINDETYQSIMYIVVVVGYRIYDVILSPPHEIELSAGEKLVLNCTARTELNVGLDFTWHSPPSKSHHKKIVNRDVKPFPGTVAKMFLSTLTIESVTKSDQGEYTCVASSGRMIKRNRTFVRVHTKPFIAFGSGMKSLVEATVGSQVRIPVKYLSYPAPDIKWYRNGRPIESNYTMIVGDELTIMEVTERDAGNYTVILTNPISMEKQSHMVSLVVNVPPQIGEKALISPMDSYQYGTMQTLTCTVYANPPLHHIQWYWQLEEACSYRPGQTSPYACKEWRHVEDFQGGNKIEVTKNQYALIEGKNKTVSTLVIQAANVSALYKCEAINKAGRGERVISFHVIRGPEITVQPAAQPTEQESVSLLCTADRNTFENLTWYKLGSQATSVHMGESLTPVCKNLDALWKLNGTMFSNSTNDILIVAFQNASLQDQGDYVCSAQDKKTKKRHCLVKQLIILERMAPMITGNLENQTTTIGETIEVTCPASGNPTPHITWFKDNETLVEDSGIVLRDGNRNLTIRRVRKEDGGLYTCQACNVLGCARAETLFIIEGAQEKTNLEVIILVGTAVIAMFFWLLLVILVRTVKRANEGELKTGYLSIVMDPDELPLDERCERLPYDASKWEFPRDRLKLGKPLGRGAFGQVIEADAFGIDKTATCKTVAVKMLKEGATHSEHRALMSELKILIHIGHHLNVVNLLGACTKPGGPLMVIVEFSKFGNLSTYLRGKRNEFVPYKSKGARFRQGKDYVGELSVDLKRRLDSITSSQSSASSGFVEEKSLSDVEEEEASEELYKDFLTLEHLICYSFQVAKGMEFLASRKCIHRDLAARNILLSEKNVVKICDFGLARDIYKDPDYVRKGDARLPLKWMAPETIFDRVYTIQSDVWSFGVLLWEIFSLGASPYPGVKIDEEFCRRLKEGTRMRAPDYTTPEMYQTMLDCWHEDPNQRPSFSELVEHLGNLLQANAQQDGKDYIVLPMSETLSMEEDSGLSLPTSPVSCMEEEEVCDPKFHYDNTAGISHYLQNSKRKSRPVSVKTFEDIPLEEPEVKVIPDDSQTDSGMVLASEELKTLEDRNKLSPSFGGMMPSKSRESVASEGSNQTSGYQSGYHSDDTDTTVYSSDEAGLLKMVDAAVHADSGTTLQLTSCLNGSGPVPAPPPTPGNHERGAA,vgfr2,80
|
82 |
+
MFKTTLCALLITASCSTFAAPQQINDIVHRTITPLIEQQKIPGMAVAVIYQGKPYYFTWGYADIAKKQPVTQQTLFELGSVSKTFTGVLGGDAIARGEIKLSDPTTKYWPELTAKQWNGITLLHLATYTAGGLPLQVPDEVKSSSDLLRFYQNWQPAWAPGTQRLYANSSIGLFGALAVKPSGLSFEQAMQTRVFQPLKLNHTWINVPPAEEKNYAWGYREGKAVHVSPGALDAEAYGVKSTIEDMARWVQSNLKPLDINEKTLQQGIQLAQSRYWQTGDMYQGLGWEMLDWPVNPDSIINGSDNKIALAARPVKAITPPTPAVRASWVHKTGATGGFGSYVAFIPEKELGIVMLANKNYPNPARVDAAWQILNALQ,ampc,81
|
83 |
+
MAFMKKYLLPILGLFMAYYYYSANEEFRPEMLQGKKVIVTGASKGIGREMAYHLAKMGAHVVVTARSKETLQKVVSHCLELGAASAHYIAGTMEDMTFAEQFVAQAGKLMGGLDMLILNHITNTSLNLFHDDIHHVRKSMEVNFLSYVVLTVAALPMLKQSNGSIVVVSSLAGKVAYPMVAAYSASKFALDGFFSSIRKEYSVSRVNVSITLCVLGLIDTETAMKAVSGIVHMQAAPKEECALEIIKGGALRQEEVYYDSSLWTTLLIRNPCRKILEFLYSTSYNMDRFINK,dhi1,82
|
84 |
+
MASLSQLSSHLNYTCGAENSTGASQARPHAYYALSYCALILAIVFGNGLVCMAVLKERALQTTTNYLVVSLAVADLLVATLVMPWVVYLEVTGGVWNFSRICCDVFVTLDVMMCTASILNLCAISIDRYTAVVMPVHYQHGTGQSSCRRVALMITAVWVLAFAVSCPLLFGFNTTGDPTVCSISNPDFVIYSSVVSFYLPFGVTVLVYARIYVVLKQRRRKRILTRQNSQCNSVRPGFPQQTLSPDPAHLELKRYYSICQDTALGGPGFQERGGELKREEKTRNSLSPTIAPKLSLEVRKLSNGRLSTSLKLGPLQPRGVPLREKKATQMVAIVLGAFIVCWLPFFLTHVLNTHCQTCHVSPELYSATTWLGYVNSALNPVIYTTFNIEFRKAFLKILSC,drd3,83
|
85 |
+
MGAGVLVLGASEPGNLSSAAPLPDGAATAARLLVPASPPASLLPPASESPEPLSQQWTAGMGLLMALIVLLIVAGNVLVIVAIAKTPRLQTLTNLFIMSLASADLVMGLLVVPFGATIVVWGRWEYGSFFCELWTSVDVLCVTASIETLCVIALDRYLAITSPFRYQSLLTRARARGLVCTVWAISALVSFLPILMHWWRAESDEARRCYNDPKCCDFVTNRAYAIASSVVSFYVPLCIMAFVYLRVFREAQKQVKKIDSCERRFLGGPARPPSPSPSPVPAPAPPPGPPRPAAAAATAPLANGRAGKRRPSRLVALREQKALKTLGIIMGVFTLCWLPFFLANVVKAFHRELVPDRLFVFFNWLGYANSAFNPIIYCRSPDFRKAFQRLLCCARRAARRRHATHGDRPRASGCLARPGPPPSPGAASDDDDDDVVGATPPARLLEPWAGCNGGAAADSDSSLDEPCRPGFASESKV,adrb1,84
|
86 |
+
MQKIMHISVLLSPVLWGLIFGVSSNSIQIGGLFPRGADQEYSAFRVGMVQFSTSEFRLTPHIDNLEVANSFAVTNAFCSQFSRGVYAIFGFYDKKSVNTITSFCGTLHVSFITPSFPTDGTHPFVIQMRPDLKGALLSLIEYYQWDKFAYLYDSDRGLSTLQAVLDSAAEKKWQVTAINVGNINNDKKDEMYRSLFQDLELKKERRVILDCERDKVNDIVDQVITIGKHVKGYHYIIANLGFTDGDLLKIQFGGANVSGFQIVDYDDSLVSKFIERWSTLEEKEYPGAHTTTIKYTSALTYDAVQVMTEAFRNLRKQRIEISRRGNAGDCLANPAVPWGQGVEIERALKQVQVEGLSGNIKFDQNGKRINYTINIMELKTNGPRKIGYWSEVDKMVVTLTELPSGNDTSGLENKTVVVTTILESPYVMMKKNHEMLEGNERYEGYCVDLAAEIAKHCGFKYKLTIVGDGKYGARDADTKIWNGMVGELVYGKADIAIAPLTITLVREEVIDFSKPFMSLGISIMIKKPQKSKPGVFSFLDPLAYEIWMCIVFAYIGVSVVLFLVSRFSPYEWHTEEFEDGRETQSSESTNEFGIFNSLWFSLGAFMQQGCDISPRSLSGRIVGGVWWFFTLIIISSYTANLAAFLTVERMVSPIESAEDLSKQTEIAYGTLDSGSTKEFFRRSKIAVFDKMWTYMRSAEPSVFVRTTAEGVARVRKSKGKYAYLLESTMNEYIEQRKPCDTMKVGGNLDSKGYGIATPKGSSLRNAVNLAVLKLNEQGLLDKLKNKWWYDKGECGSGGGDSKEKTSALSLSNVAGVFYILVGGLGLAMLVALIEFCYKSRAEAKRMKVAKNAQNINPSSSQNSQNFATYKEGYNVYGIESVKI,gria2,85
|
87 |
+
MLNLLLLALPVLASRAYAAPAPGQALQRVGIVGGQEAPRSKWPWQVSLRVHGPYWMHFCGGSLIHPQWVLTAAHCVGPDVKDLAALRVQLREQHLYYQDQLLPVSRIIVHPQFYTAQIGADIALLELEEPVNVSSHVHTVTLPPASETFPPGMPCWVTGWGDVDNDERLPPPFPLKQVKVPIMENHICDAKYHLGAYTGDDVRIVRDDMLCAGNTRRDSCQGDSGGPLVCKVNGTWLQAGVVSWGEGCAQPNRPGIYTRVTYYLDWIHHYVPKKP,tryb1,86
|
88 |
+
MAARVLIIGSGGREHTLAWKLAQSHHVKQVLVAPGNAGTACSEKISNTAISISDHTALAQFCKEKKIEFVVVGPEAPLAAGIVGNLRSAGVQCFGPTAEAAQLESSKRFAKEFMDRHGIPTAQWKAFTKPEEACSFILSADFPALVVKASGLAAGKGVIVAKSKEEACKAVQEIMQEKAFGAAGETIVIEELLDGEEVSCLCFTDGKTVAPMPPAQDHKRLLEGDGGPNTGGMGAYCPAPQVSNDLLLKIKDTVLQRTVDGMQQEGTPYTGILYAGIMLTKNGPKVLEFNCRFGDPECQVILPLLKSDLYEVIQSTLDGLLCTSLPVWLENHTALTVVMASKGYPGDYTKGVEITGFPEAQALGLEVFHAGTALKNGKVVTHGGRVLAVTAIRENLISALEEAKKGLAAIKFEGAIYRKDVGFRAIAFLQQPRSLTYKESGVDIAAGNMLVKKIQPLAKATSRSGCKVDLGGFAGLFDLKAAGFKDPLLASGTDGVGTKLKIAQLCNKHDTIGQDLVAMCVNDILAQGAEPLFFLDYFSCGKLDLSVTEAVVAGIAKACGKAGCALLGGETAEMPDMYPPGEYDLAGFAVGAMERDQKLPHLERITEGDVVVGIASSGLHSNGFSLVRKIVAKSSLQYSSPAPDGCGDQTLGDLLLTPTRIYSHSLLPVLRSGHVKAFAHITGGGLLENIPRVLPEKLGVDLDAQTWRIPRVFSWLQQEGHLSEEEMARTFNCGVGAVLVVSKEQTEQILRDIQQHKEEAWVIGSVVARAEGSPRVKVKNLIESMQINGSVLKNGSLTNHFSFEKKKARVAVLISGTGSNLQALIDSTREPNSSAQIDIVISNKAAVAGLDKAERAGIPTRVINHKLYKNRVEFDSAIDLVLEEFSIDIVCLAGFMRILSGPFVQKWNGKMLNIHPSLLPSFKGSNAHEQALETGVTVTGCTVHFVAEDVDAGQIILQEAVPVKRGDTVATLSERVKLAEHKIFPAALQLVASGTVQLGENGKICWVKEE,pur2,87
|
89 |
+
MGPGVLLLLLVATAWHGQGIPVIEPSVPELVVKPGATVTLRCVGNGSVEWDGPPSPHWTLYSDGSSSILSTNNATFQNTGTYRCTEPGDPLGGSAAIHLYVKDPARPWNVLAQEVVVFEDQDALLPCLLTDPVLEAGVSLVRVRGRPLMRHTNYSFSPWHGFTIHRAKFIQSQDYQCSALMGGRKVMSISIRLKVQKVIPGPPALTLVPAELVRIRGEAAQIVCSASSVDVNFDVFLQHNNTKLAIPQQSDFHNNRYQKVLTLNLDQVDFQHAGNYSCVASNVQGKHSTSMFFRVVESAYLNLSSEQNLIQEVTVGEGLNLKVMVEAYPGLQGFNWTYLGPFSDHQPEPKLANATTKDTYRHTFTLSLPRLKPSEAGRYSFLARNPGGWRALTFELTLRYPPEVSVIWTFINGSGTLLCAASGYPQPNVTWLQCSGHTDRCDEAQVLQVWDDPYPEVLSQEPFHKVTVQSLLTVETLEHNQTYECRAHNSVGSGSWAFIPISAGAHTHPPDEFLFTPVVVACMSIMALLLLLLLLLLYKYKQKPKYQVRWKIIESYEGNSYTFIDPTQLPYNEKWEFPRNNLQFGKTLGAGAFGKVVEATAFGLGKEDAVLKVAVKMLKSTAHADEKEALMSELKIMSHLGQHENIVNLLGACTHGGPVLVITEYCCYGDLLNFLRRKAEAMLGPSLSPGQDPEGGVDYKNIHLEKKYVRRDSGFSSQGVDTYVEMRPVSTSSNDSFSEQDLDKEDGRPLELRDLLHFSSQVAQGMAFLASKNCIHRDVAARNVLLTNGHVAKIGDFGLARDIMNDSNYIVKGNARLPVKWMAPESIFDCVYTVQSDVWSYGILLWEIFSLGLNPYPGILVNSKFYKLVKDGYQMAQPAFAPKNIYSIMQACWALEPTHRPTFQQICSFLQEQAQEDRRERDYTNLPSSSRSGGSGSSSSELEEESSSEHLTCCEQGDIAQPLLQPNNYQFC,csf1r,88
|
90 |
+
MASQPNSSAKKKEEKGKNIQVVVRCRPFNLAERKASAHSIVECDPVRKEVSVRTGGLADKSSRKTYTFDMVFGASTKQIDVYRSVVCPILDEVIMGYNCTIFAYGQTGTGKTFTMEGERSPNEEYTWEEDPLAGIIPRTLHQIFEKLTDNGTEFSVKVSLLEIYNEELFDLLNPSSDVSERLQMFDDPRNKRGVIIKGLEEITVHNKDEVYQILEKGAAKRTTAATLMNAYSSRSHSVFSVTIHMKETTIDGEELVKIGKLNLVDLAGSENIGRSGAVDKRAREAGNINQSLLTLGRVITALVERTPHVPYRESKLTRILQDSLGGRTRTSIIATISPASLNLEETLSTLEYAHRAKNILNKPEVNQKLTKKALIKEYTEEIERLKRDLAAAREKNGVYISEENFRVMSGKLTVQEEQIVELIEKIGAVEEELNRVTELFMDNKNELDQCKSDLQNKTQELETTQKHLQETKLQLVKEEYITSALESTEEKLHDAASKLLNTVEETTKDVSGLHSKLDRKKAVDQHNAEAQDIFGKNLNSLFNNMEELIKDGSSKQKAMLEVHKTLFGNLLSSSVSALDTITTVALGSLTSIPENVSTHVSQIFNMILKEQSLAAESKTVLQELINVLKTDLLSSLEMILSPTVVSILKINSQLKHIFKTSLTVADKIEDQKKELDGFLSILCNNLHELQENTICSLVESQKQCGNLTEDLKTIKQTHSQELCKLMNLWTERFCALEEKCENIQKPLSSVQENIQQKSKDIVNKMTFHSQKFCADSDGFSQELRNFNQEGTKLVEESVKHSDKLNGNLEKISQETEQRCESLNTRTVYFSEQWVSSLNEREQELHNLLEVVSQCCEASSSDITEKSDGRKAAHEKQHNIFLDQMTIDEDKLIAQNLELNETIKIGLTKLNCFLEQDLKLDIPTGTTPQRKSYLYPSTLVRTEPREHLLDQLKRKQPELLMMLNCSENNKEETIPDVDVEEAVLGQYTEEPLSQEPSVDAGVDCSSIGGVPFFQHKKSHGKDKENRGINTLERSKVEETTEHLVTKSRLPLRAQINL,kif11,89
|
91 |
+
MGQPGNGSAFLLAPNGSHAPDHDVTQERDEVWVVGMGIVMSLIVLAIVFGNVLVITAIAKFERLQTVTNYFITSLACADLVMGLAVVPFGAAHILMKMWTFGNFWCEFWTSIDVLCVTASIETLCVIAVDRYFAITSPFKYQSLLTKNKARVIILMVWIVSGLTSFLPIQMHWYRATHQEAINCYANETCCDFFTNQAYAIASSIVSFYVPLVIMVFVYSRVFQEAKRQLQKIDKSEGRFHVQNLSQVEQDGRTGHGLRRSSKFCLKEHKALKTLGIIMGTFTLCWLPFFIVNIVHVIQDNLIRKEVYILLNWIGYVNSGFNPLIYCRSPDFRIAFQELLCLRRSSLKAYGNGYSSNGNTGEQSGYHVEQEKENKLLCEDLPGTEDFVGHQGTVPSDNIDSQGRNCSTNDSLL,adrb2,90
|
92 |
+
MAESSDKLYRVEYAKSGRASCKKCSESIPKDSLRMAIMVQSPMFDGKVPHWYHFSCFWKVGHSIRHPDVEVDGFSELRWDDQQKVKKTAEAGGVTGKGQDGIGSKAEKTLGDFAAEYAKSNRSTCKGCMEKIEKGQVRLSKKMVDPEKPQLGMIDRWYHPGCFVKNREELGFRPEYSASQLKGFSLLATEDKEALKKQLPGVKSEGKRKGDEVDGVDEVAKKKSKKEKDKDSKLEKALKAQNDLIWNIKDELKKVCSTNDLKELLIFNKQQVPSGESAILDRVADGMVFGALLPCEECSGQLVFKSDAYYCTGDVTAWTKCMVKTQTPNRKEWVTPKEFREISYLKKLKVKKQDRIFPPETSASVAATPPPSTASAPAAVNSSASADKPLSNMKILTLGKLSRNKDEVKAMIEKLGGKLTGTANKASLCISTKKEVEKMNKKMEEVKEANIRVVSEDFLQDVSASTKSLQELFLAHILSPWGAEVKAEPVEVVAPRGKSGAALSKKSKGQVKEEGINKSEKRMKLTLKGGAAVDPDSGLEHSAHVLEKGGKVFSATLGLVDIVKGTNSYYKLQLLEDDKENRYWIFRSWGRVGTVIGSNKLEQMPSKEDAIEHFMKLYEEKTGNAWHSKNFTKYPKKFYPLEIDYGQDEEAVKKLTVNPGTKSKLPKPVQDLIKMIFDVESMKKAMVEYEIDLQKMPLGKLSKRQIQAAYSILSEVQQAVSQGSSDSQILDLSNRFYTLIPHDFGMKKPPLLNNADSVQAKVEMLDNLLDIEVAYSLLRGGSDDSSKDPIDVNYEKLKTDIKVVDRDSEEAEIIRKYVKNTHATTHNAYDLEVIDIFKIEREGECQRYKPFKQLHNRRLLWHGSRTTNFAGILSQGLRIAPPEAPVTGYMFGKGIYFADMVSKSANYCHTSQGDPIGLILLGEVALGNMYELKHASHISKLPKGKHSVKGLGKTTPDPSANISLDGVDVPLGTGISSGVNDTSLLYNEYIVYDIAQVNLKYLLKLKFNFKTSLW,parp1,91
|
93 |
+
MSRPLSDQEKRKQISVRGLAGVENVTELKKNFNRHLHFTLVKDRNVATPRDYYFALAHTVRDHLVGRWIRTQQHYYEKDPKRIYYLSLEFYMGRTLQNTMVNLALENACDEATYQLGLDMEELEEIEEDAGLGNGGLGRLAACFLDSMATLGLAAYGYGIRYEFGIFNQKISGGWQMEEADDWLRYGNPWEKARPEFTLPVHFYGHVEHTSQGAKWVDTQVVLAMPYDTPVPGYRNNVVNTMRLWSAKAPNDFNLKDFNVGGYIQAVLDRNLAENISRVLYPNDNFFEGKELRLKQEYFVVAATLQDIIRRFKSSKFGCRDPVRTNFDAFPDKVAIQLNDTHPSLAIPELMRILVDLERMDWDKAWDVTVRTCAYTNHTVLPEALERWPVHLLETLLPRHLQIIYEINQRFLNRVAAAFPGDVDRLRRMSLVEEGAVKRINMAHLCIAGSHAVNGVARIHSEILKKTIFKDFYELEPHKFQNKTNGITPRRWLVLCNPGLAEVIAERIGEDFISDLDQLRKLLSFVDDEAFIRDVAKVKQENKLKFAAYLEREYKVHINPNSLFDIQVKRIHEYKRQLLNCLHVITLYNRIKREPNKFFVPRTVMIGGKAAPGYHMAKMIIRLVTAIGDVVNHDPAVGDRLRVIFLENYRVSLAEKVIPAADLSEQISTAGTEASGTGNMKFMLNGALTIGTMDGANVEMAEEAGEENFFIFGMRVEDVDKLDQRGYNAQEYYDRIPELRQVIEQLSSGFFSPKQPDLFKDIVNMLMHHDRFKVFADYEDYIKCQEKVSALYKNPREWTRMVIRNIATSGKFSSDRTIAQYAREIWGVEPSRQRLPAPDEAI,pygm,92
|
94 |
+
MTMTLHTKASGMALLHQIQGNELEPLNRPQLKIPLERPLGEVYLDSSKPAVYNYPEGAAYEFNAAAAANAQVYGQTGLPYGPGSEAAAFGSNGLGGFPPLNSVSPSPLMLLHPPPQLSPFLQPHGQQVPYYLENEPSGYTVREAGPPAFYRPNSDNRRQGGRERLASTNDKGSMAMESAKETRYCAVCNDYASGYHYGVWSCEGCKAFFKRSIQGHNDYMCPATNQCTIDKNRRKSCQACRLRKCYEVGMMKGGIRKDRRGGRMLKHKRQRDDGEGRGEVGSAGDMRAANLWPSPLMIKRSKKNSLALSLTADQMVSALLDAEPPILYSEYDPTRPFSEASMMGLLTNLADRELVHMINWAKRVPGFVDLTLHDQVHLLECAWLEILMIGLVWRSMEHPGKLLFAPNLLLDRNQGKCVEGMVEIFDMLLATSSRFRMMNLQGEEFVCLKSIILLNSGVYTFLSSTLKSLEEKDHIHRVLDKITDTLIHLMAKAGLTLQQQHQRLAQLLLILSHIRHMSNKGMEHLYSMKCKNVVPLYDLLLEMLDAHRLHAPTSRGGASVEETDQSHLATAGSTSSHSLQKYYITGEAEGFPATV,esr1,93
|
95 |
+
MPEETQTQDQPMEEEEVETFAFQAEIAQLMSLIINTFYSNKEIFLRELISNSSDALDKIRYESLTDPSKLDSGKELHINLIPNKQDRTLTIVDTGIGMTKADLINNLGTIAKSGTKAFMEALQAGADISMIGQFGVGFYSAYLVAEKVTVITKHNDDEQYAWESSAGGSFTVRTDTGEPMGRGTKVILHLKEDQTEYLEERRIKEIVKKHSQFIGYPITLFVEKERDKEVSDDEAEEKEDKEEEKEKEEKESEDKPEIEDVGSDEEEEKKDGDKKKKKKIKEKYIDQEELNKTKPIWTRNPDDITNEEYGEFYKSLTNDWEDHLAVKHFSVEGQLEFRALLFVPRRAPFDLFENRKKKNNIKLYVRRVFIMDNCEELIPEYLNFIRGVVDSEDLPLNISREMLQQSKILKVIRKNLVKKCLELFTELAEDKENYKKFYEQFSKNIKLGIHEDSQNRKKLSELLRYYTSASGDEMVSLKDYCTRMKENQKHIYYITGETKDQVANSAFVERLRKHGLEVIYMIEPIDEYCVQQLKEFEGKTLVSVTKEGLELPEDEEEKKKQEEKKTKFENLCKIMKDILEKKVEKVVVSNRLVTSPCCIVTSTYGWTANMERIMKAQALRDNSTMGYMAAKKHLEINPDHSIIETLRQKAEADKNDKSVKDLVILLYETALLSSGFSLEDPQTHANRIYRMIKLGLGIDEDDPTADDTSAAVTEEMPPLEGDDDTSRMEEVD,hs90a,94
|
96 |
+
MGAASGRRGPGLLLPLPLLLLLPPQPALALDPGLQPGNFSADEAGAQLFAQSYNSSAEQVLFQSVAASWAHDTNITAENARRQEEAALLSQEFAEAWGQKAKELYEPIWQNFTDPQLRRIIGAVRTLGSANLPLAKRQQYNALLSNMSRIYSTAKVCLPNKTATCWSLDPDLTNILASSRSYAMLLFAWEGWHNAAGIPLKPLYEDFTALSNEAYKQDGFTDTGAYWRSWYNSPTFEDDLEHLYQQLEPLYLNLHAFVRRALHRRYGDRYINLRGPIPAHLLGDMWAQSWENIYDMVVPFPDKPNLDVTSTMLQQGWNATHMFRVAEEFFTSLELSPMPPEFWEGSMLEKPADGREVVCHASAWDFYNRKDFRIKQCTRVTMDQLSTVHHEMGHIQYYLQYKDLPVSLRRGANPGFHEAIGDVLALSVSTPEHLHKIGLLDRVTNDTESDINYLLKMALEKIAFLPFGYLVDQWRWGVFSGRTPPSRYNFDWWYLRTKYQGICPPVTRNETHFDAGAKFHVPNVTPYIRYFVSFVLQFQFHEALCKEAGYEGPLHQCDIYRSTKAGAKLRKVLQAGSSRPWQEVLKDMVGLDALDAQPLLKYFQPVTQWLQEQNQQNGEVLGWPEYQWHPPLPDNYPEGIDLVTDEAEASKFVEEYDRTSQVVWNEYAEANWNYNTNITTETSKILLQKNMQIANHTLKYGTQARKFDVNQLQNTTIKRIIKKVQDLERAALPAQELEEYNKILLDMETTYSVATVCHPNGSCLQLEPDLTNVMATSRKYEDLLWAWEGWRDKAGRAILQFYPKYVELINQAARLNGYVDAGDSWRSMYETPSLEQDLERLFQELQPLYLNLHAYVRRALHRHYGAQHINLEGPIPAHLLGNMWAQTWSNIYDLVVPFPSAPSMDTTEAMLKQGWTPRRMFKEADDFFTSLGLLPVPPEFWNKSMLEKPTDGREVVCHASAWDFYNGKDFRIKQCTTVNLEDLVVAHHEMGHIQYFMQYKDLPVALREGANPGFHEAIGDVLALSVSTPKHLHSLNLLSSEGGSDEHDINFLMKMALDKIAFIPFSYLVDQWRWRVFDGSITKENYNQEWWSLRLKYQGLCPPVPRTQGDFDPGAKFHIPSSVPYIRYFVSFIIQFQFHEALCQAAGHTGPLHKCDIYQSKEAGQRLATAMKLGFSRPWPEAMQLITGQPNMSASAMLSYFKPLLDWLRTENELHGEKLGWPQYNWTPNSARSEGPLPDSGRVSFLGLDLDAQQARVGQWLLLFLGIALLVATLGLSQRLFSIRHRSLHRHSHGPQFGSEVELRHS,ace,95
|
97 |
+
MRPPQCLLHTPSLASPLLLLLLWLLGGGVGAEGREDAELLVTVRGGRLRGIRLKTPGGPVSAFLGIPFAEPPMGPRRFLPPEPKQPWSGVVDATTFQSVCYQYVDTLYPGFEGTEMWNPNRELSEDCLYLNVWTPYPRPTSPTPVLVWIYGGGFYSGASSLDVYDGRFLVQAERTVLVSMNYRVGAFGFLALPGSREAPGNVGLLDQRLALQWVQENVAAFGGDPTSVTLFGESAGAASVGMHLLSPPSRGLFHRAVLQSGAPNGPWATVGMGEARRRATQLAHLVGCPPGGTGGNDTELVACLRTRPAQVLVNHEWHVLPQESVFRFSFVPVVDGDFLSDTPEALINAGDFHGLQVLVGVVKDEGSYFLVYGAPGFSKDNESLISRAEFLAGVRVGVPQVSDLAAEAVVLHYTDWLHPEDPARLREALSDVVGDHNVVCPVAQLAGRLAAQGARVYAYVFEHRASTLSWPLWMGVPHGYEIEFIFGIPLDPSRNYTAEEKIFAQRLMRYWANFARTGDPNEPRDPKAPQWPPYTAGAQQYVSLDLRPLEVRRGLRAQACAFWNRFLPKLLSATDTLDEAERQWKAEFHRWSSYMVHWKNQFDHYSKQDRCSDL,aces,96
|
98 |
+
MEQPQEEAPEVREEEEKEEVAEAEGAPELNGGPQHALPSSSYTDLSRSSSPPSLLDQLQMGCDGASCGSLNMECRVCGDKASGFHYGVHACEGCKGFFRRTIRMKLEYEKCERSCKIQKKNRNKCQYCRFQKCLALGMSHNAIRFGRMPEAEKRKLVAGLTANEGSQYNPQVADLKAFSKHIYNAYLKNFNMTKKKARSILTGKASHTAPFVIHDIETLWQAEKGLVWKQLVNGLPPYKEISVHVFYRCQCTTVETVRELTEFAKSIPSFSSLFLNDQVTLLKYGVHEAIFAMLASIVNKDGLLVANGSGFVTREFLRSLRKPFSDIIEPKFEFAVKFNALELDDSDLALFIAAIILCGDRPGLMNVPRVEAIQDTILRALEFHLQANHPDAQYLFPKLLQKMADLRQLVTEHAQMMQRIKKTETETSLHPLLQEIYKDMY,ppard,97
|
99 |
+
MENGYTYEDYKNTAEWLLSHTKHRPQVAIICGSGLGGLTDKLTQAQIFDYGEIPNFPRSTVPGHAGRLVFGFLNGRACVMMQGRFHMYEGYPLWKVTFPVRVFHLLGVDTLVVTNAAGGLNPKFEVGDIMLIRDHINLPGFSGQNPLRGPNDERFGDRFPAMSDAYDRTMRQRALSTWKQMGEQRELQEGTYVMVAGPSFETVAECRVLQKLGADAVGMSTVPEVIVARHCGLRVFGFSLITNKVIMDYESLEKANHEEVLAAGKQAAQKLEQFVSILMASIPLPDKAS,pnph,98
|
100 |
+
MLEICLKLVGCKSKKGLSSSSSCYLEEALQRPVASDFEPQGLSEAARWNSKENLLAGPSENDPNLFVALYDFVASGDNTLSITKGEKLRVLGYNHNGEWCEAQTKNGQGWVPSNYITPVNSLEKHSWYHGPVSRNAAEYLLSSGINGSFLVRESESSPGQRSISLRYEGRVYHYRINTASDGKLYVSSESRFNTLAELVHHHSTVADGLITTLHYPAPKRNKPTVYGVSPNYDKWEMERTDITMKHKLGGGQYGEVYEGVWKKYSLTVAVKTLKEDTMEVEEFLKEAAVMKEIKHPNLVQLLGVCTREPPFYIITEFMTYGNLLDYLRECNRQEVNAVVLLYMATQISSAMEYLEKKNFIHRDLAARNCLVGENHLVKVADFGLSRLMTGDTYTAHAGAKFPIKWTAPESLAYNKFSIKSDVWAFGVLLWEIATYGMSPYPGIDLSQVYELLEKDYRMERPEGCPEKVYELMRACWQWNPSDRPSFAEIHQAFETMFQESSISDEVEKELGKQGVRGAVSTLLQAPELPTKTRTSRRAAEHRDTTDVPEMPHSKGQGESDPLDHEPAVSPLLPRKERGPPEGGLNEDERLLPKDKKTNLFSALIKKKKKTAPTPPKRSSSFREMDGQPERRGAGEEEGRDISNGALAFTPLDTADPAKSPKPSNGAGVPNGALRESGGSGFRSPHLWKKSSTLTSSRLATGEEEGGGSSSKRFLRSCSASCVPHGAKDTEWRSVTLPRDLQSTGRQFDSSTFGGHKSEKPALPRKRAGENRSDQVTRGTVTPPPRLVKKNEEAADEVFKDIMESSPGSSPPNLTPKPLRRQVTVAPASGLPHKEEAGKGSALGTPAAAEPVTPTSKAGSGAPGGTSKGPAEESRVRRHKHSSESPGRDKGKLSRLKPAPPPPPAASAGKAGGKPSQSPSQEAAGEAVLGAKTKATSLVDAVNSDAAKPSQPGEGLKKPVLPATPKPQSAKPSGTPISPAPVPSTLPSASSALAGDQPSSTAFIPLISTRVSLRKTRQPPERIASGAITKGVVLDSTEALCLAISRNSEQMASHSAVLEAGKNLYTFCVSYVDSIQQMRNKFAFREAINKLENNLRELQICPATAGSGPAATQDFSKLLSSVKEISDIVQR,abl1,99
|
101 |
+
MDSLVVLVLCLSCLLLLSLWRQSSGRGKLPPGPTPLPVIGNILQIGIKDISKSLTNLSKVYGPVFTLYFGLKPIVVLHGYEAVKEALIDLGEEFSGRGIFPLAERANRGFGIVFSNGKKWKEIRRFSLMTLRNFGMGKRSIEDRVQEEARCLVEELRKTKASPCDPTFILGCAPCNVICSIIFHKRFDYKDQQFLNLMEKLNENIKILSSPWIQICNNFSPIIDYFPGTHNKLLKNVAFMKSYILEKVKEHQESMDMNNPQDFIDCFLMKMEKEKHNQPSEFTIESLENTAVDLFGAGTETTSTTLRYALLLLLKHPEVTAKVQEEIERVIGRNRSPCMQDRSHMPYTDAVVHEVQRYIDLLPTSLPHAVTCDIKFRNYLIPKGTTILISLTSVLHDNKEFPNPEMFDPHHFLDEGGNFKKSKYFMPFSAGKRICVGEALAGMELFLFLTSILQNFNLKSLVDPKNLDTTPVVNGFASVPPFYQLCFIPV,cp2c9,100
|
102 |
+
METKGYHSLPEGLDMERRWGQVSQAVERSSLGPTERTDENNYMEIVNVSCVSGAIPNNSTQGSSKEKQELLPCLQQDNNRPGILTSDIKTELESKELSATVAESMGLYMDSVRDADYSYEQQNQQGSMSPAKIYQNVEQLVKFYKGNGHRPSTLSCVNTPLRSFMSDSGSSVNGGVMRAVVKSPIMCHEKSPSVCSPLNMTSSVCSPAGINSVSSTTASFGSFPVHSPITQGTPLTCSPNVENRGSRSHSPAHASNVGSPLSSPLSSMKSSISSPPSHCSVKSPVSSPNNVTLRSSVSSPANINNSRCSVSSPSNTNNRSTLSSPAASTVGSICSPVNNAFSYTASGTSAGSSTLRDVVPSPDTQEKGAQEVPFPKTEEVESAISNGVTGQLNIVQYIKPEPDGAFSSSCLGGNSKINSDSSFSVPIKQESTKHSCSGTSFKGNPTVNPFPFMDGSYFSFMDDKDYYSLSGILGPPVPGFDGNCEGSGFPVGIKQEPDDGSYYPEASIPSSAIVGVNSGGQSFHYRIGAQGTISLSRSARDQSFQHLSSFPPVNTLVESWKSHGDLSSRRSDGYPVLEYIPENVSSSTLRSVSTGSSRPSKICLVCGDEASGCHYGVVTCGSCKVFFKRAVEGQHNYLCAGRNDCIIDKIRRKNCPACRLQKCLQAGMNLGARKSKKLGKLKGIHEEQPQQQQPPPPPPPPQSPEEGTTYIAPAKEPSVNTALVPQLSTISRALTPSPVMVLENIEPEIVYAGYDSSKPDTAENLLSTLNRLAGKQMIQVVKWAKVLPGFKNLPLEDQITLIQYSWMCLSSFALSWRSYKHTNSQFLYFAPDLVFNEEKMHQSAMYELCQGMHQISLQFVRLQLTFEEYTIMKVLLLLSTIPKDGLKSQAAFEEMRTNYIKELRKMVTKCPNNSGQSWQRFYQLTKLLDSMHDLVSDLLEFCFYTFRESHALKVEFPAMLVEIISDQLPKVESGNAKPLYFHRK,mcr,101
|
data/datasets/AD/saliency.csv
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
CCCCCCC(C(C)O)n1cncn1,ada,1
|
data/datasets/DUDE/Smiles_1.csv
ADDED
The diff for this file is too large to render.
See raw diff
|
|
data/datasets/DUDE/Smiles_2.csv
ADDED
The diff for this file is too large to render.
See raw diff
|
|
data/datasets/DUDE/Smiles_3.csv
ADDED
The diff for this file is too large to render.
See raw diff
|
|
data/datasets/DUDE/Smiles_4.csv
ADDED
The diff for this file is too large to render.
See raw diff
|
|
data/datasets/DUDE/Smiles_Test.csv
ADDED
The diff for this file is too large to render.
See raw diff
|
|
data/features.py
ADDED
@@ -0,0 +1,136 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# allowable multiple choice node and edge features
|
2 |
+
allowable_features = {
|
3 |
+
'possible_atomic_num_list' : list(range(1, 119)) + ['misc'],
|
4 |
+
'possible_chirality_list' : [
|
5 |
+
'CHI_UNSPECIFIED',
|
6 |
+
'CHI_TETRAHEDRAL_CW',
|
7 |
+
'CHI_TETRAHEDRAL_CCW',
|
8 |
+
'CHI_OTHER'
|
9 |
+
],
|
10 |
+
'possible_degree_list' : [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 'misc'],
|
11 |
+
'possible_formal_charge_list' : [-5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 'misc'],
|
12 |
+
'possible_numH_list' : [0, 1, 2, 3, 4, 5, 6, 7, 8, 'misc'],
|
13 |
+
'possible_number_radical_e_list': [0, 1, 2, 3, 4, 'misc'],
|
14 |
+
'possible_hybridization_list' : [
|
15 |
+
'SP', 'SP2', 'SP3', 'SP3D', 'SP3D2', 'misc'
|
16 |
+
],
|
17 |
+
'possible_is_aromatic_list': [False, True],
|
18 |
+
'possible_is_in_ring_list': [False, True],
|
19 |
+
'possible_bond_type_list' : [
|
20 |
+
'SINGLE',
|
21 |
+
'DOUBLE',
|
22 |
+
'TRIPLE',
|
23 |
+
'AROMATIC',
|
24 |
+
'misc'
|
25 |
+
],
|
26 |
+
'possible_bond_stereo_list': [
|
27 |
+
'STEREONONE',
|
28 |
+
'STEREOZ',
|
29 |
+
'STEREOE',
|
30 |
+
'STEREOCIS',
|
31 |
+
'STEREOTRANS',
|
32 |
+
'STEREOANY',
|
33 |
+
],
|
34 |
+
'possible_is_conjugated_list': [False, True],
|
35 |
+
'posible_explicit_valence': [1, 2, 3, 4, 5, 6, 7, 'misc'],
|
36 |
+
'posible_implicit_valence': [1, 2, 3, 4, 5, 6, 7, 'misc']
|
37 |
+
}
|
38 |
+
|
39 |
+
def safe_index(l, e):
|
40 |
+
"""
|
41 |
+
Return index of element e in list l. If e is not present, return the last index
|
42 |
+
"""
|
43 |
+
try:
|
44 |
+
return l.index(e)
|
45 |
+
except:
|
46 |
+
return len(l) - 1
|
47 |
+
|
48 |
+
def atom_to_feature_vector(atom):
|
49 |
+
"""
|
50 |
+
Converts rdkit atom object to feature list of indices
|
51 |
+
:param mol: rdkit atom object
|
52 |
+
:return: list
|
53 |
+
"""
|
54 |
+
atom_feature = [
|
55 |
+
safe_index(allowable_features['possible_atomic_num_list'], atom.GetAtomicNum()),
|
56 |
+
allowable_features['possible_chirality_list'].index(str(atom.GetChiralTag())),
|
57 |
+
safe_index(allowable_features['possible_degree_list'], atom.GetTotalDegree()),
|
58 |
+
safe_index(allowable_features['possible_formal_charge_list'], atom.GetFormalCharge()),
|
59 |
+
safe_index(allowable_features['possible_numH_list'], atom.GetTotalNumHs()),
|
60 |
+
safe_index(allowable_features['possible_number_radical_e_list'], atom.GetNumRadicalElectrons()),
|
61 |
+
safe_index(allowable_features['possible_hybridization_list'], str(atom.GetHybridization())),
|
62 |
+
allowable_features['possible_is_aromatic_list'].index(atom.GetIsAromatic()),
|
63 |
+
allowable_features['possible_is_in_ring_list'].index(atom.IsInRing()),
|
64 |
+
]
|
65 |
+
return atom_feature
|
66 |
+
|
67 |
+
def get_atom_feature_dims():
|
68 |
+
return list(map(len, [
|
69 |
+
allowable_features['possible_atomic_num_list'],
|
70 |
+
allowable_features['possible_chirality_list'],
|
71 |
+
allowable_features['possible_degree_list'],
|
72 |
+
allowable_features['possible_formal_charge_list'],
|
73 |
+
allowable_features['possible_numH_list'],
|
74 |
+
allowable_features['possible_number_radical_e_list'],
|
75 |
+
allowable_features['possible_hybridization_list'],
|
76 |
+
allowable_features['possible_is_aromatic_list'],
|
77 |
+
allowable_features['possible_is_in_ring_list'],
|
78 |
+
]))
|
79 |
+
|
80 |
+
def bond_to_feature_vector(bond):
|
81 |
+
"""
|
82 |
+
Converts rdkit bond object to feature list of indices
|
83 |
+
:param mol: rdkit bond object
|
84 |
+
:return: list
|
85 |
+
"""
|
86 |
+
bond_feature = [
|
87 |
+
safe_index(allowable_features['possible_bond_type_list'], str(bond.GetBondType())),
|
88 |
+
allowable_features['possible_bond_stereo_list'].index(str(bond.GetStereo())),
|
89 |
+
allowable_features['possible_is_conjugated_list'].index(bond.GetIsConjugated()),
|
90 |
+
]
|
91 |
+
return bond_feature
|
92 |
+
|
93 |
+
def get_bond_feature_dims():
|
94 |
+
return list(map(len, [
|
95 |
+
allowable_features['possible_bond_type_list'],
|
96 |
+
allowable_features['possible_bond_stereo_list'],
|
97 |
+
allowable_features['possible_is_conjugated_list']
|
98 |
+
]))
|
99 |
+
|
100 |
+
def atom_feature_vector_to_dict(atom_feature):
|
101 |
+
[atomic_num_idx,
|
102 |
+
chirality_idx,
|
103 |
+
degree_idx,
|
104 |
+
formal_charge_idx,
|
105 |
+
num_h_idx,
|
106 |
+
number_radical_e_idx,
|
107 |
+
hybridization_idx,
|
108 |
+
is_aromatic_idx,
|
109 |
+
is_in_ring_idx] = atom_feature
|
110 |
+
|
111 |
+
feature_dict = {
|
112 |
+
'atomic_num': allowable_features['possible_atomic_num_list'][atomic_num_idx],
|
113 |
+
'chirality': allowable_features['possible_chirality_list'][chirality_idx],
|
114 |
+
'degree': allowable_features['possible_degree_list'][degree_idx],
|
115 |
+
'formal_charge': allowable_features['possible_formal_charge_list'][formal_charge_idx],
|
116 |
+
'num_h': allowable_features['possible_numH_list'][num_h_idx],
|
117 |
+
'num_rad_e': allowable_features['possible_number_radical_e_list'][number_radical_e_idx],
|
118 |
+
'hybridization': allowable_features['possible_hybridization_list'][hybridization_idx],
|
119 |
+
'is_aromatic': allowable_features['possible_is_aromatic_list'][is_aromatic_idx],
|
120 |
+
'is_in_ring': allowable_features['possible_is_in_ring_list'][is_in_ring_idx]
|
121 |
+
}
|
122 |
+
|
123 |
+
return feature_dict
|
124 |
+
|
125 |
+
def bond_feature_vector_to_dict(bond_feature):
|
126 |
+
[bond_type_idx,
|
127 |
+
bond_stereo_idx,
|
128 |
+
is_conjugated_idx] = bond_feature
|
129 |
+
|
130 |
+
feature_dict = {
|
131 |
+
'bond_type': allowable_features['possible_bond_type_list'][bond_type_idx],
|
132 |
+
'bond_stereo': allowable_features['possible_bond_stereo_list'][bond_stereo_idx],
|
133 |
+
'is_conjugated': allowable_features['possible_is_conjugated_list'][is_conjugated_idx]
|
134 |
+
}
|
135 |
+
|
136 |
+
return feature_dict
|
example/input_smiles.csv
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
smiles
|
2 |
+
Cn4c(CCC(=O)Nc3ccc2ccn(CC[C@H](CO)n1cnc(C(N)=O)c1)c2c3)nc5ccccc45
|
3 |
+
OCCCCCn1cnc2C(O)CN=CNc12
|
4 |
+
Nc4nc(c1ccco1)c3ncn(C(=O)NCCc2ccccc2)c3n4
|
gcn_lib/__init__.py
ADDED
File without changes
|
gcn_lib/dense/__init__.py
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from .torch_nn import *
|
2 |
+
from .torch_edge import *
|
3 |
+
from .torch_vertex import *
|
4 |
+
|
gcn_lib/dense/torch_edge.py
ADDED
@@ -0,0 +1,101 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
from torch import nn
|
3 |
+
from torch_cluster import knn_graph
|
4 |
+
|
5 |
+
|
6 |
+
class DenseDilated(nn.Module):
|
7 |
+
"""
|
8 |
+
Find dilated neighbor from neighbor list
|
9 |
+
|
10 |
+
edge_index: (2, batch_size, num_points, k)
|
11 |
+
"""
|
12 |
+
def __init__(self, k=9, dilation=1, stochastic=False, epsilon=0.0):
|
13 |
+
super(DenseDilated, self).__init__()
|
14 |
+
self.dilation = dilation
|
15 |
+
self.stochastic = stochastic
|
16 |
+
self.epsilon = epsilon
|
17 |
+
self.k = k
|
18 |
+
|
19 |
+
def forward(self, edge_index):
|
20 |
+
if self.stochastic:
|
21 |
+
if torch.rand(1) < self.epsilon and self.training:
|
22 |
+
num = self.k * self.dilation
|
23 |
+
randnum = torch.randperm(num)[:self.k]
|
24 |
+
edge_index = edge_index[:, :, :, randnum]
|
25 |
+
else:
|
26 |
+
edge_index = edge_index[:, :, :, ::self.dilation]
|
27 |
+
else:
|
28 |
+
edge_index = edge_index[:, :, :, ::self.dilation]
|
29 |
+
return edge_index
|
30 |
+
|
31 |
+
|
32 |
+
def pairwise_distance(x):
|
33 |
+
"""
|
34 |
+
Compute pairwise distance of a point cloud.
|
35 |
+
Args:
|
36 |
+
x: tensor (batch_size, num_points, num_dims)
|
37 |
+
Returns:
|
38 |
+
pairwise distance: (batch_size, num_points, num_points)
|
39 |
+
"""
|
40 |
+
x_inner = -2*torch.matmul(x, x.transpose(2, 1))
|
41 |
+
x_square = torch.sum(torch.mul(x, x), dim=-1, keepdim=True)
|
42 |
+
return x_square + x_inner + x_square.transpose(2, 1)
|
43 |
+
|
44 |
+
|
45 |
+
def dense_knn_matrix(x, k=16):
|
46 |
+
"""Get KNN based on the pairwise distance.
|
47 |
+
Args:
|
48 |
+
x: (batch_size, num_dims, num_points, 1)
|
49 |
+
k: int
|
50 |
+
Returns:
|
51 |
+
nearest neighbors: (batch_size, num_points ,k) (batch_size, num_points, k)
|
52 |
+
"""
|
53 |
+
with torch.no_grad():
|
54 |
+
x = x.transpose(2, 1).squeeze(-1)
|
55 |
+
batch_size, n_points, n_dims = x.shape
|
56 |
+
_, nn_idx = torch.topk(-pairwise_distance(x.detach()), k=k)
|
57 |
+
center_idx = torch.arange(0, n_points, device=x.device).repeat(batch_size, k, 1).transpose(2, 1)
|
58 |
+
return torch.stack((nn_idx, center_idx), dim=0)
|
59 |
+
|
60 |
+
|
61 |
+
class DenseDilatedKnnGraph(nn.Module):
|
62 |
+
"""
|
63 |
+
Find the neighbors' indices based on dilated knn
|
64 |
+
"""
|
65 |
+
def __init__(self, k=9, dilation=1, stochastic=False, epsilon=0.0):
|
66 |
+
super(DenseDilatedKnnGraph, self).__init__()
|
67 |
+
self.dilation = dilation
|
68 |
+
self.stochastic = stochastic
|
69 |
+
self.epsilon = epsilon
|
70 |
+
self.k = k
|
71 |
+
self._dilated = DenseDilated(k, dilation, stochastic, epsilon)
|
72 |
+
self.knn = dense_knn_matrix
|
73 |
+
|
74 |
+
def forward(self, x):
|
75 |
+
edge_index = self.knn(x, self.k * self.dilation)
|
76 |
+
return self._dilated(edge_index)
|
77 |
+
|
78 |
+
|
79 |
+
class DilatedKnnGraph(nn.Module):
|
80 |
+
"""
|
81 |
+
Find the neighbors' indices based on dilated knn
|
82 |
+
"""
|
83 |
+
def __init__(self, k=9, dilation=1, stochastic=False, epsilon=0.0):
|
84 |
+
super(DilatedKnnGraph, self).__init__()
|
85 |
+
self.dilation = dilation
|
86 |
+
self.stochastic = stochastic
|
87 |
+
self.epsilon = epsilon
|
88 |
+
self.k = k
|
89 |
+
self._dilated = DenseDilated(k, dilation, stochastic, epsilon)
|
90 |
+
self.knn = knn_graph
|
91 |
+
|
92 |
+
def forward(self, x):
|
93 |
+
x = x.squeeze(-1)
|
94 |
+
B, C, N = x.shape
|
95 |
+
edge_index = []
|
96 |
+
for i in range(B):
|
97 |
+
edgeindex = self.knn(x[i].contiguous().transpose(1, 0).contiguous(), self.k * self.dilation)
|
98 |
+
edgeindex = edgeindex.view(2, N, self.k * self.dilation)
|
99 |
+
edge_index.append(edgeindex)
|
100 |
+
edge_index = torch.stack(edge_index, dim=1)
|
101 |
+
return self._dilated(edge_index)
|
gcn_lib/dense/torch_nn.py
ADDED
@@ -0,0 +1,93 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
from torch import nn
|
3 |
+
from torch.nn import Sequential as Seq, Linear as Lin, Conv2d
|
4 |
+
|
5 |
+
|
6 |
+
##############################
|
7 |
+
# Basic layers
|
8 |
+
##############################
|
9 |
+
def act_layer(act, inplace=False, neg_slope=0.2, n_prelu=1):
|
10 |
+
# activation layer
|
11 |
+
|
12 |
+
act = act.lower()
|
13 |
+
if act == 'relu':
|
14 |
+
layer = nn.ReLU(inplace)
|
15 |
+
elif act == 'leakyrelu':
|
16 |
+
layer = nn.LeakyReLU(neg_slope, inplace)
|
17 |
+
elif act == 'prelu':
|
18 |
+
layer = nn.PReLU(num_parameters=n_prelu, init=neg_slope)
|
19 |
+
else:
|
20 |
+
raise NotImplementedError('activation layer [%s] is not found' % act)
|
21 |
+
return layer
|
22 |
+
|
23 |
+
|
24 |
+
def norm_layer(norm, nc):
|
25 |
+
# normalization layer 2d
|
26 |
+
norm = norm.lower()
|
27 |
+
if norm == 'batch':
|
28 |
+
layer = nn.BatchNorm2d(nc, affine=True)
|
29 |
+
elif norm == 'instance':
|
30 |
+
layer = nn.InstanceNorm2d(nc, affine=False)
|
31 |
+
else:
|
32 |
+
raise NotImplementedError('normalization layer [%s] is not found' % norm)
|
33 |
+
return layer
|
34 |
+
|
35 |
+
|
36 |
+
class MLP(Seq):
|
37 |
+
def __init__(self, channels, act='relu', norm=None, bias=True):
|
38 |
+
m = []
|
39 |
+
for i in range(1, len(channels)):
|
40 |
+
m.append(Lin(channels[i - 1], channels[i], bias))
|
41 |
+
if act is not None and act.lower() != 'none':
|
42 |
+
m.append(act_layer(act))
|
43 |
+
if norm is not None and norm.lower() != 'none':
|
44 |
+
m.append(norm_layer(norm, channels[-1]))
|
45 |
+
super(MLP, self).__init__(*m)
|
46 |
+
|
47 |
+
|
48 |
+
class BasicConv(Seq):
|
49 |
+
def __init__(self, channels, act='relu', norm=None, bias=True, drop=0.):
|
50 |
+
m = []
|
51 |
+
for i in range(1, len(channels)):
|
52 |
+
m.append(Conv2d(channels[i - 1], channels[i], 1, bias=bias))
|
53 |
+
if act is not None and act.lower() != 'none':
|
54 |
+
m.append(act_layer(act))
|
55 |
+
if norm is not None and norm.lower() != 'none':
|
56 |
+
m.append(norm_layer(norm, channels[-1]))
|
57 |
+
if drop > 0:
|
58 |
+
m.append(nn.Dropout2d(drop))
|
59 |
+
|
60 |
+
super(BasicConv, self).__init__(*m)
|
61 |
+
|
62 |
+
self.reset_parameters()
|
63 |
+
|
64 |
+
def reset_parameters(self):
|
65 |
+
for m in self.modules():
|
66 |
+
if isinstance(m, nn.Conv2d):
|
67 |
+
nn.init.kaiming_normal_(m.weight)
|
68 |
+
if m.bias is not None:
|
69 |
+
nn.init.zeros_(m.bias)
|
70 |
+
elif isinstance(m, nn.BatchNorm2d) or isinstance(m, nn.InstanceNorm2d):
|
71 |
+
m.weight.data.fill_(1)
|
72 |
+
m.bias.data.zero_()
|
73 |
+
|
74 |
+
|
75 |
+
def batched_index_select(inputs, index):
|
76 |
+
"""
|
77 |
+
|
78 |
+
:param inputs: torch.Size([batch_size, num_dims, num_vertices, 1])
|
79 |
+
:param index: torch.Size([batch_size, num_vertices, k])
|
80 |
+
:return: torch.Size([batch_size, num_dims, num_vertices, k])
|
81 |
+
"""
|
82 |
+
|
83 |
+
batch_size, num_dims, num_vertices, _ = inputs.shape
|
84 |
+
k = index.shape[2]
|
85 |
+
idx = torch.arange(0, batch_size) * num_vertices
|
86 |
+
idx = idx.view(batch_size, -1)
|
87 |
+
|
88 |
+
inputs = inputs.transpose(2, 1).contiguous().view(-1, num_dims)
|
89 |
+
index = index.view(batch_size, -1) + idx.type(index.dtype).to(inputs.device)
|
90 |
+
index = index.view(-1)
|
91 |
+
|
92 |
+
return torch.index_select(inputs, 0, index).view(batch_size, -1, num_dims).transpose(2, 1).view(batch_size, num_dims, -1, k)
|
93 |
+
|
gcn_lib/dense/torch_vertex.py
ADDED
@@ -0,0 +1,115 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
from torch import nn
|
3 |
+
from .torch_nn import BasicConv, batched_index_select
|
4 |
+
from .torch_edge import DenseDilatedKnnGraph, DilatedKnnGraph
|
5 |
+
import torch.nn.functional as F
|
6 |
+
|
7 |
+
|
8 |
+
class MRConv2d(nn.Module):
|
9 |
+
"""
|
10 |
+
Max-Relative Graph Convolution (Paper: https://arxiv.org/abs/1904.03751) for dense data type
|
11 |
+
"""
|
12 |
+
def __init__(self, in_channels, out_channels, act='relu', norm=None, bias=True):
|
13 |
+
super(MRConv2d, self).__init__()
|
14 |
+
self.nn = BasicConv([in_channels*2, out_channels], act, norm, bias)
|
15 |
+
|
16 |
+
def forward(self, x, edge_index):
|
17 |
+
x_i = batched_index_select(x, edge_index[1])
|
18 |
+
x_j = batched_index_select(x, edge_index[0])
|
19 |
+
x_j, _ = torch.max(x_j - x_i, -1, keepdim=True)
|
20 |
+
return self.nn(torch.cat([x, x_j], dim=1))
|
21 |
+
|
22 |
+
|
23 |
+
class EdgeConv2d(nn.Module):
|
24 |
+
"""
|
25 |
+
Edge convolution layer (with activation, batch normalization) for dense data type
|
26 |
+
"""
|
27 |
+
def __init__(self, in_channels, out_channels, act='relu', norm=None, bias=True):
|
28 |
+
super(EdgeConv2d, self).__init__()
|
29 |
+
self.nn = BasicConv([in_channels * 2, out_channels], act, norm, bias)
|
30 |
+
|
31 |
+
def forward(self, x, edge_index):
|
32 |
+
x_i = batched_index_select(x, edge_index[1])
|
33 |
+
x_j = batched_index_select(x, edge_index[0])
|
34 |
+
max_value, _ = torch.max(self.nn(torch.cat([x_i, x_j - x_i], dim=1)), -1, keepdim=True)
|
35 |
+
return max_value
|
36 |
+
|
37 |
+
|
38 |
+
class GraphConv2d(nn.Module):
|
39 |
+
"""
|
40 |
+
Static graph convolution layer
|
41 |
+
"""
|
42 |
+
def __init__(self, in_channels, out_channels, conv='edge', act='relu', norm=None, bias=True):
|
43 |
+
super(GraphConv2d, self).__init__()
|
44 |
+
if conv == 'edge':
|
45 |
+
self.gconv = EdgeConv2d(in_channels, out_channels, act, norm, bias)
|
46 |
+
elif conv == 'mr':
|
47 |
+
self.gconv = MRConv2d(in_channels, out_channels, act, norm, bias)
|
48 |
+
else:
|
49 |
+
raise NotImplementedError('conv:{} is not supported'.format(conv))
|
50 |
+
|
51 |
+
def forward(self, x, edge_index):
|
52 |
+
return self.gconv(x, edge_index)
|
53 |
+
|
54 |
+
|
55 |
+
class DynConv2d(GraphConv2d):
|
56 |
+
"""
|
57 |
+
Dynamic graph convolution layer
|
58 |
+
"""
|
59 |
+
def __init__(self, in_channels, out_channels, kernel_size=9, dilation=1, conv='edge', act='relu',
|
60 |
+
norm=None, bias=True, stochastic=False, epsilon=0.0, knn='matrix'):
|
61 |
+
super(DynConv2d, self).__init__(in_channels, out_channels, conv, act, norm, bias)
|
62 |
+
self.k = kernel_size
|
63 |
+
self.d = dilation
|
64 |
+
if knn == 'matrix':
|
65 |
+
self.dilated_knn_graph = DenseDilatedKnnGraph(kernel_size, dilation, stochastic, epsilon)
|
66 |
+
else:
|
67 |
+
self.dilated_knn_graph = DilatedKnnGraph(kernel_size, dilation, stochastic, epsilon)
|
68 |
+
|
69 |
+
def forward(self, x):
|
70 |
+
edge_index = self.dilated_knn_graph(x)
|
71 |
+
return super(DynConv2d, self).forward(x, edge_index)
|
72 |
+
|
73 |
+
|
74 |
+
class PlainDynBlock2d(nn.Module):
|
75 |
+
"""
|
76 |
+
Plain Dynamic graph convolution block
|
77 |
+
"""
|
78 |
+
def __init__(self, in_channels, kernel_size=9, dilation=1, conv='edge', act='relu', norm=None,
|
79 |
+
bias=True, stochastic=False, epsilon=0.0, knn='matrix'):
|
80 |
+
super(PlainDynBlock2d, self).__init__()
|
81 |
+
self.body = DynConv2d(in_channels, in_channels, kernel_size, dilation, conv,
|
82 |
+
act, norm, bias, stochastic, epsilon, knn)
|
83 |
+
|
84 |
+
def forward(self, x):
|
85 |
+
return self.body(x)
|
86 |
+
|
87 |
+
|
88 |
+
class ResDynBlock2d(nn.Module):
|
89 |
+
"""
|
90 |
+
Residual Dynamic graph convolution block
|
91 |
+
"""
|
92 |
+
def __init__(self, in_channels, kernel_size=9, dilation=1, conv='edge', act='relu', norm=None,
|
93 |
+
bias=True, stochastic=False, epsilon=0.0, knn='matrix', res_scale=1):
|
94 |
+
super(ResDynBlock2d, self).__init__()
|
95 |
+
self.body = DynConv2d(in_channels, in_channels, kernel_size, dilation, conv,
|
96 |
+
act, norm, bias, stochastic, epsilon, knn)
|
97 |
+
self.res_scale = res_scale
|
98 |
+
|
99 |
+
def forward(self, x):
|
100 |
+
return self.body(x) + x*self.res_scale
|
101 |
+
|
102 |
+
|
103 |
+
class DenseDynBlock2d(nn.Module):
|
104 |
+
"""
|
105 |
+
Dense Dynamic graph convolution block
|
106 |
+
"""
|
107 |
+
def __init__(self, in_channels, out_channels=64, kernel_size=9, dilation=1, conv='edge',
|
108 |
+
act='relu', norm=None,bias=True, stochastic=False, epsilon=0.0, knn='matrix'):
|
109 |
+
super(DenseDynBlock2d, self).__init__()
|
110 |
+
self.body = DynConv2d(in_channels, out_channels, kernel_size, dilation, conv,
|
111 |
+
act, norm, bias, stochastic, epsilon, knn)
|
112 |
+
|
113 |
+
def forward(self, x):
|
114 |
+
dense = self.body(x)
|
115 |
+
return torch.cat((x, dense), 1)
|
gcn_lib/sparse/__init__.py
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from .torch_nn import *
|
2 |
+
from .torch_edge import *
|
3 |
+
from .torch_vertex import *
|
4 |
+
|
gcn_lib/sparse/torch_edge.py
ADDED
@@ -0,0 +1,113 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
from torch import nn
|
3 |
+
from torch_cluster import knn_graph
|
4 |
+
|
5 |
+
|
6 |
+
class Dilated(nn.Module):
|
7 |
+
"""
|
8 |
+
Find dilated neighbor from neighbor list
|
9 |
+
"""
|
10 |
+
def __init__(self, k=9, dilation=1, stochastic=False, epsilon=0.0):
|
11 |
+
super(Dilated, self).__init__()
|
12 |
+
self.dilation = dilation
|
13 |
+
self.stochastic = stochastic
|
14 |
+
self.epsilon = epsilon
|
15 |
+
self.k = k
|
16 |
+
|
17 |
+
def forward(self, edge_index, batch=None):
|
18 |
+
if self.stochastic:
|
19 |
+
if torch.rand(1) < self.epsilon and self.training:
|
20 |
+
num = self.k * self.dilation
|
21 |
+
randnum = torch.randperm(num)[:self.k]
|
22 |
+
edge_index = edge_index.view(2, -1, num)
|
23 |
+
edge_index = edge_index[:, :, randnum]
|
24 |
+
return edge_index.view(2, -1)
|
25 |
+
else:
|
26 |
+
edge_index = edge_index[:, ::self.dilation]
|
27 |
+
else:
|
28 |
+
edge_index = edge_index[:, ::self.dilation]
|
29 |
+
return edge_index
|
30 |
+
|
31 |
+
|
32 |
+
class DilatedKnnGraph(nn.Module):
|
33 |
+
"""
|
34 |
+
Find the neighbors' indices based on dilated knn
|
35 |
+
"""
|
36 |
+
def __init__(self, k=9, dilation=1, stochastic=False, epsilon=0.0, knn='matrix'):
|
37 |
+
super(DilatedKnnGraph, self).__init__()
|
38 |
+
self.dilation = dilation
|
39 |
+
self.stochastic = stochastic
|
40 |
+
self.epsilon = epsilon
|
41 |
+
self.k = k
|
42 |
+
self._dilated = Dilated(k, dilation, stochastic, epsilon)
|
43 |
+
if knn == 'matrix':
|
44 |
+
self.knn = knn_graph_matrix
|
45 |
+
else:
|
46 |
+
self.knn = knn_graph
|
47 |
+
|
48 |
+
def forward(self, x, batch):
|
49 |
+
edge_index = self.knn(x, self.k * self.dilation, batch)
|
50 |
+
return self._dilated(edge_index, batch)
|
51 |
+
|
52 |
+
|
53 |
+
def pairwise_distance(x):
|
54 |
+
"""
|
55 |
+
Compute pairwise distance of a point cloud.
|
56 |
+
Args:
|
57 |
+
x: tensor (batch_size, num_points, num_dims)
|
58 |
+
Returns:
|
59 |
+
pairwise distance: (batch_size, num_points, num_points)
|
60 |
+
"""
|
61 |
+
x_inner = -2*torch.matmul(x, x.transpose(2, 1))
|
62 |
+
x_square = torch.sum(torch.mul(x, x), dim=-1, keepdim=True)
|
63 |
+
return x_square + x_inner + x_square.transpose(2, 1)
|
64 |
+
|
65 |
+
|
66 |
+
def knn_matrix(x, k=16, batch=None):
|
67 |
+
"""Get KNN based on the pairwise distance.
|
68 |
+
Args:
|
69 |
+
pairwise distance: (num_points, num_points)
|
70 |
+
k: int
|
71 |
+
Returns:
|
72 |
+
nearest neighbors: (num_points*k ,1) (num_points, k)
|
73 |
+
"""
|
74 |
+
with torch.no_grad():
|
75 |
+
if batch is None:
|
76 |
+
batch_size = 1
|
77 |
+
else:
|
78 |
+
batch_size = batch[-1] + 1
|
79 |
+
x = x.view(batch_size, -1, x.shape[-1])
|
80 |
+
|
81 |
+
neg_adj = -pairwise_distance(x.detach())
|
82 |
+
_, nn_idx = torch.topk(neg_adj, k=k)
|
83 |
+
del neg_adj
|
84 |
+
|
85 |
+
n_points = x.shape[1]
|
86 |
+
start_idx = torch.arange(0, n_points*batch_size, n_points).long().view(batch_size, 1, 1)
|
87 |
+
if x.is_cuda:
|
88 |
+
start_idx = start_idx.cuda()
|
89 |
+
nn_idx += start_idx
|
90 |
+
del start_idx
|
91 |
+
|
92 |
+
if x.is_cuda:
|
93 |
+
torch.cuda.empty_cache()
|
94 |
+
|
95 |
+
nn_idx = nn_idx.view(1, -1)
|
96 |
+
center_idx = torch.arange(0, n_points*batch_size).repeat(k, 1).transpose(1, 0).contiguous().view(1, -1)
|
97 |
+
if x.is_cuda:
|
98 |
+
center_idx = center_idx.cuda()
|
99 |
+
return nn_idx, center_idx
|
100 |
+
|
101 |
+
|
102 |
+
def knn_graph_matrix(x, k=16, batch=None):
|
103 |
+
"""Construct edge feature for each point
|
104 |
+
Args:
|
105 |
+
x: (num_points, num_dims)
|
106 |
+
batch: (num_points, )
|
107 |
+
k: int
|
108 |
+
Returns:
|
109 |
+
edge_index: (2, num_points*k)
|
110 |
+
"""
|
111 |
+
nn_idx, center_idx = knn_matrix(x, k, batch)
|
112 |
+
return torch.cat((nn_idx, center_idx), dim=0)
|
113 |
+
|
gcn_lib/sparse/torch_message.py
ADDED
@@ -0,0 +1,98 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
import torch.nn.functional as F
|
3 |
+
from torch_geometric.nn import MessagePassing
|
4 |
+
from torch_scatter import scatter, scatter_softmax
|
5 |
+
from torch_geometric.utils import degree
|
6 |
+
|
7 |
+
|
8 |
+
class GenMessagePassing(MessagePassing):
|
9 |
+
def __init__(self, aggr='softmax',
|
10 |
+
t=1.0, learn_t=False,
|
11 |
+
p=1.0, learn_p=False,
|
12 |
+
y=0.0, learn_y=False):
|
13 |
+
|
14 |
+
if aggr in ['softmax_sg', 'softmax', 'softmax_sum']:
|
15 |
+
|
16 |
+
super(GenMessagePassing, self).__init__(aggr=None)
|
17 |
+
self.aggr = aggr
|
18 |
+
|
19 |
+
if learn_t and (aggr == 'softmax' or aggr == 'softmax_sum'):
|
20 |
+
self.learn_t = True
|
21 |
+
self.t = torch.nn.Parameter(torch.Tensor([t]), requires_grad=True)
|
22 |
+
else:
|
23 |
+
self.learn_t = False
|
24 |
+
self.t = t
|
25 |
+
|
26 |
+
if aggr == 'softmax_sum':
|
27 |
+
self.y = torch.nn.Parameter(torch.Tensor([y]), requires_grad=learn_y)
|
28 |
+
|
29 |
+
elif aggr in ['power', 'power_sum']:
|
30 |
+
|
31 |
+
super(GenMessagePassing, self).__init__(aggr=None)
|
32 |
+
self.aggr = aggr
|
33 |
+
|
34 |
+
if learn_p:
|
35 |
+
self.p = torch.nn.Parameter(torch.Tensor([p]), requires_grad=True)
|
36 |
+
else:
|
37 |
+
self.p = p
|
38 |
+
|
39 |
+
if aggr == 'power_sum':
|
40 |
+
self.y = torch.nn.Parameter(torch.Tensor([y]), requires_grad=learn_y)
|
41 |
+
else:
|
42 |
+
super(GenMessagePassing, self).__init__(aggr=aggr)
|
43 |
+
|
44 |
+
def aggregate(self, inputs, index, ptr=None, dim_size=None):
|
45 |
+
|
46 |
+
if self.aggr in ['add', 'mean', 'max', None]:
|
47 |
+
return super(GenMessagePassing, self).aggregate(inputs, index, ptr, dim_size)
|
48 |
+
|
49 |
+
elif self.aggr in ['softmax_sg', 'softmax', 'softmax_sum']:
|
50 |
+
|
51 |
+
if self.learn_t:
|
52 |
+
out = scatter_softmax(inputs*self.t, index, dim=self.node_dim)
|
53 |
+
else:
|
54 |
+
with torch.no_grad():
|
55 |
+
out = scatter_softmax(inputs*self.t, index, dim=self.node_dim)
|
56 |
+
|
57 |
+
out = scatter(inputs*out, index, dim=self.node_dim,
|
58 |
+
dim_size=dim_size, reduce='sum')
|
59 |
+
|
60 |
+
if self.aggr == 'softmax_sum':
|
61 |
+
self.sigmoid_y = torch.sigmoid(self.y)
|
62 |
+
degrees = degree(index, num_nodes=dim_size).unsqueeze(1)
|
63 |
+
out = torch.pow(degrees, self.sigmoid_y) * out
|
64 |
+
|
65 |
+
return out
|
66 |
+
|
67 |
+
|
68 |
+
elif self.aggr in ['power', 'power_sum']:
|
69 |
+
min_value, max_value = 1e-7, 1e1
|
70 |
+
torch.clamp_(inputs, min_value, max_value)
|
71 |
+
out = scatter(torch.pow(inputs, self.p), index, dim=self.node_dim,
|
72 |
+
dim_size=dim_size, reduce='mean')
|
73 |
+
torch.clamp_(out, min_value, max_value)
|
74 |
+
out = torch.pow(out, 1/self.p)
|
75 |
+
|
76 |
+
if self.aggr == 'power_sum':
|
77 |
+
self.sigmoid_y = torch.sigmoid(self.y)
|
78 |
+
degrees = degree(index, num_nodes=dim_size).unsqueeze(1)
|
79 |
+
out = torch.pow(degrees, self.sigmoid_y) * out
|
80 |
+
|
81 |
+
return out
|
82 |
+
|
83 |
+
else:
|
84 |
+
raise NotImplementedError('To be implemented')
|
85 |
+
|
86 |
+
|
87 |
+
class MsgNorm(torch.nn.Module):
|
88 |
+
def __init__(self, learn_msg_scale=False):
|
89 |
+
super(MsgNorm, self).__init__()
|
90 |
+
|
91 |
+
self.msg_scale = torch.nn.Parameter(torch.Tensor([1.0]),
|
92 |
+
requires_grad=learn_msg_scale)
|
93 |
+
|
94 |
+
def forward(self, x, msg, p=2):
|
95 |
+
msg = F.normalize(msg, p=p, dim=1)
|
96 |
+
x_norm = x.norm(p=p, dim=1, keepdim=True)
|
97 |
+
msg = msg * x_norm * self.msg_scale
|
98 |
+
return msg
|
gcn_lib/sparse/torch_nn.py
ADDED
@@ -0,0 +1,160 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
from torch import nn
|
3 |
+
from torch.nn import Sequential as Seq, Linear as Lin
|
4 |
+
from utils.data_util import get_atom_feature_dims, get_bond_feature_dims
|
5 |
+
|
6 |
+
|
7 |
+
##############################
|
8 |
+
# Basic layers
|
9 |
+
##############################
|
10 |
+
def act_layer(act_type, inplace=False, neg_slope=0.2, n_prelu=1):
|
11 |
+
# activation layer
|
12 |
+
act = act_type.lower()
|
13 |
+
if act == 'relu':
|
14 |
+
layer = nn.ReLU(inplace)
|
15 |
+
elif act == 'leakyrelu':
|
16 |
+
layer = nn.LeakyReLU(neg_slope, inplace)
|
17 |
+
elif act == 'prelu':
|
18 |
+
layer = nn.PReLU(num_parameters=n_prelu, init=neg_slope)
|
19 |
+
else:
|
20 |
+
raise NotImplementedError('activation layer [%s] is not found' % act)
|
21 |
+
return layer
|
22 |
+
|
23 |
+
|
24 |
+
def norm_layer(norm_type, nc):
|
25 |
+
# normalization layer 1d
|
26 |
+
norm = norm_type.lower()
|
27 |
+
if norm == 'batch':
|
28 |
+
layer = nn.BatchNorm1d(nc, affine=True)
|
29 |
+
elif norm == 'layer':
|
30 |
+
layer = nn.LayerNorm(nc, elementwise_affine=True)
|
31 |
+
elif norm == 'instance':
|
32 |
+
layer = nn.InstanceNorm1d(nc, affine=False)
|
33 |
+
else:
|
34 |
+
raise NotImplementedError('normalization layer [%s] is not found' % norm)
|
35 |
+
return layer
|
36 |
+
|
37 |
+
|
38 |
+
class MultiSeq(Seq):
|
39 |
+
def __init__(self, *args):
|
40 |
+
super(MultiSeq, self).__init__(*args)
|
41 |
+
|
42 |
+
def forward(self, *inputs):
|
43 |
+
for module in self._modules.values():
|
44 |
+
if type(inputs) == tuple:
|
45 |
+
inputs = module(*inputs)
|
46 |
+
else:
|
47 |
+
inputs = module(inputs)
|
48 |
+
return inputs
|
49 |
+
|
50 |
+
|
51 |
+
class MLP(Seq):
|
52 |
+
def __init__(self, channels, act='relu',
|
53 |
+
norm=None, bias=True,
|
54 |
+
drop=0., last_lin=False):
|
55 |
+
m = []
|
56 |
+
|
57 |
+
for i in range(1, len(channels)):
|
58 |
+
|
59 |
+
m.append(Lin(channels[i - 1], channels[i], bias))
|
60 |
+
|
61 |
+
if (i == len(channels) - 1) and last_lin:
|
62 |
+
pass
|
63 |
+
else:
|
64 |
+
if norm is not None and norm.lower() != 'none':
|
65 |
+
m.append(norm_layer(norm, channels[i]))
|
66 |
+
if act is not None and act.lower() != 'none':
|
67 |
+
m.append(act_layer(act))
|
68 |
+
if drop > 0:
|
69 |
+
m.append(nn.Dropout2d(drop))
|
70 |
+
|
71 |
+
self.m = m
|
72 |
+
super(MLP, self).__init__(*self.m)
|
73 |
+
|
74 |
+
|
75 |
+
class AtomEncoder(nn.Module):
|
76 |
+
|
77 |
+
def __init__(self, emb_dim):
|
78 |
+
super(AtomEncoder, self).__init__()
|
79 |
+
|
80 |
+
self.atom_embedding_list = nn.ModuleList()
|
81 |
+
full_atom_feature_dims = get_atom_feature_dims()
|
82 |
+
|
83 |
+
for i, dim in enumerate(full_atom_feature_dims):
|
84 |
+
emb = nn.Embedding(dim, emb_dim)
|
85 |
+
nn.init.xavier_uniform_(emb.weight.data)
|
86 |
+
self.atom_embedding_list.append(emb)
|
87 |
+
|
88 |
+
def forward(self, x):
|
89 |
+
x_embedding = 0
|
90 |
+
for i in range(x.shape[1]):
|
91 |
+
x_embedding += self.atom_embedding_list[i](x[:, i])
|
92 |
+
|
93 |
+
return x_embedding
|
94 |
+
|
95 |
+
|
96 |
+
class BondEncoder(nn.Module):
|
97 |
+
|
98 |
+
def __init__(self, emb_dim):
|
99 |
+
super(BondEncoder, self).__init__()
|
100 |
+
|
101 |
+
self.bond_embedding_list = nn.ModuleList()
|
102 |
+
full_bond_feature_dims = get_bond_feature_dims()
|
103 |
+
|
104 |
+
for i, dim in enumerate(full_bond_feature_dims):
|
105 |
+
emb = nn.Embedding(dim, emb_dim)
|
106 |
+
nn.init.xavier_uniform_(emb.weight.data)
|
107 |
+
self.bond_embedding_list.append(emb)
|
108 |
+
|
109 |
+
def forward(self, edge_attr):
|
110 |
+
bond_embedding = 0
|
111 |
+
for i in range(edge_attr.shape[1]):
|
112 |
+
bond_embedding += self.bond_embedding_list[i](edge_attr[:, i])
|
113 |
+
|
114 |
+
return bond_embedding
|
115 |
+
|
116 |
+
class MM_BondEncoder(nn.Module):
|
117 |
+
#Replaces de lookup in embedding module by one-hot-encoding
|
118 |
+
# followed by matrix multiplication to allow Float type input
|
119 |
+
# instead of Long type input (backpropagate through layer)
|
120 |
+
|
121 |
+
def __init__(self, emb_dim):
|
122 |
+
super(MM_BondEncoder, self).__init__()
|
123 |
+
|
124 |
+
self.bond_embedding_list = nn.ModuleList()
|
125 |
+
self.full_bond_feature_dims = get_bond_feature_dims()
|
126 |
+
|
127 |
+
for i, dim in enumerate(self.full_bond_feature_dims):
|
128 |
+
emb = nn.Linear(dim, emb_dim, bias=False)
|
129 |
+
nn.init.xavier_uniform_(emb.weight.data)
|
130 |
+
self.bond_embedding_list.append(emb)
|
131 |
+
|
132 |
+
def forward(self, edge_attr):
|
133 |
+
#Change each feature in edge_attr to one-hot-vector and embed
|
134 |
+
edge_attr1, edge_attr2, edge_attr3 = torch.split(edge_attr, self.full_bond_feature_dims, dim=1)
|
135 |
+
bond_embedding = self.bond_embedding_list[0](edge_attr1) + self.bond_embedding_list[1](edge_attr2) + self.bond_embedding_list[2](edge_attr3)
|
136 |
+
return bond_embedding
|
137 |
+
|
138 |
+
class MM_AtomEncoder(nn.Module):
|
139 |
+
#Replaces de lookup in embedding module by one-hot-encoding
|
140 |
+
# followed by matrix multiplication to allow Float type input
|
141 |
+
# instead of Long type input (backpropagate through layer)
|
142 |
+
|
143 |
+
def __init__(self, emb_dim):
|
144 |
+
super(MM_AtomEncoder, self).__init__()
|
145 |
+
|
146 |
+
self.atom_embedding_list = nn.ModuleList()
|
147 |
+
self.full_atom_feature_dims = get_atom_feature_dims()
|
148 |
+
|
149 |
+
for i, dim in enumerate(self.full_atom_feature_dims):
|
150 |
+
emb = nn.Linear(dim, emb_dim, bias=False)
|
151 |
+
nn.init.xavier_uniform_(emb.weight.data)
|
152 |
+
self.atom_embedding_list.append(emb)
|
153 |
+
|
154 |
+
def forward(self, x):
|
155 |
+
#Change each feature in edge_attr to one-hot-vector and embed
|
156 |
+
split = torch.split(x, self.full_atom_feature_dims, dim=1)
|
157 |
+
atom_embedding = 0
|
158 |
+
for i in range(len(self.full_atom_feature_dims)):
|
159 |
+
atom_embedding += self.atom_embedding_list[i](split[i])
|
160 |
+
return atom_embedding
|
gcn_lib/sparse/torch_vertex.py
ADDED
@@ -0,0 +1,355 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
from torch import nn
|
3 |
+
import torch.nn.functional as F
|
4 |
+
import torch_geometric as tg
|
5 |
+
from .torch_nn import MLP, act_layer, norm_layer, BondEncoder, MM_BondEncoder
|
6 |
+
from .torch_edge import DilatedKnnGraph
|
7 |
+
from .torch_message import GenMessagePassing, MsgNorm
|
8 |
+
from torch_geometric.utils import remove_self_loops, add_self_loops
|
9 |
+
|
10 |
+
|
11 |
+
class GENConv(GenMessagePassing):
|
12 |
+
"""
|
13 |
+
GENeralized Graph Convolution (GENConv): https://arxiv.org/pdf/2006.07739.pdf
|
14 |
+
SoftMax & PowerMean Aggregation
|
15 |
+
"""
|
16 |
+
def __init__(self, in_dim, emb_dim, args,
|
17 |
+
aggr='softmax',
|
18 |
+
t=1.0, learn_t=False,
|
19 |
+
p=1.0, learn_p=False,
|
20 |
+
y=0.0, learn_y=False,
|
21 |
+
msg_norm=False, learn_msg_scale=True,
|
22 |
+
encode_edge=False, bond_encoder=False,
|
23 |
+
edge_feat_dim=None,
|
24 |
+
norm='batch', mlp_layers=2,
|
25 |
+
eps=1e-7):
|
26 |
+
|
27 |
+
super(GENConv, self).__init__(aggr=aggr,
|
28 |
+
t=t, learn_t=learn_t,
|
29 |
+
p=p, learn_p=learn_p,
|
30 |
+
y=y, learn_y=learn_y)
|
31 |
+
|
32 |
+
channels_list = [in_dim]
|
33 |
+
|
34 |
+
for i in range(mlp_layers-1):
|
35 |
+
channels_list.append(in_dim*2)
|
36 |
+
|
37 |
+
channels_list.append(emb_dim)
|
38 |
+
|
39 |
+
self.mlp = MLP(channels=channels_list,
|
40 |
+
norm=norm,
|
41 |
+
last_lin=True)
|
42 |
+
|
43 |
+
self.msg_encoder = torch.nn.ReLU()
|
44 |
+
self.eps = eps
|
45 |
+
|
46 |
+
self.msg_norm = msg_norm
|
47 |
+
self.encode_edge = encode_edge
|
48 |
+
self.bond_encoder = bond_encoder
|
49 |
+
self.advs = args.advs
|
50 |
+
if msg_norm:
|
51 |
+
self.msg_norm = MsgNorm(learn_msg_scale=learn_msg_scale)
|
52 |
+
else:
|
53 |
+
self.msg_norm = None
|
54 |
+
|
55 |
+
if self.encode_edge:
|
56 |
+
if self.bond_encoder:
|
57 |
+
if self.advs:
|
58 |
+
self.edge_encoder = MM_BondEncoder(emb_dim=in_dim)
|
59 |
+
else:
|
60 |
+
self.edge_encoder = BondEncoder(emb_dim=in_dim)
|
61 |
+
else:
|
62 |
+
self.edge_encoder = torch.nn.Linear(edge_feat_dim, in_dim)
|
63 |
+
|
64 |
+
def forward(self, x, edge_index, edge_attr=None):
|
65 |
+
x = x
|
66 |
+
|
67 |
+
if self.encode_edge and edge_attr is not None:
|
68 |
+
edge_emb = self.edge_encoder(edge_attr)
|
69 |
+
else:
|
70 |
+
edge_emb = edge_attr
|
71 |
+
|
72 |
+
m = self.propagate(edge_index, x=x, edge_attr=edge_emb)
|
73 |
+
|
74 |
+
if self.msg_norm is not None:
|
75 |
+
m = self.msg_norm(x, m)
|
76 |
+
|
77 |
+
h = x + m
|
78 |
+
out = self.mlp(h)
|
79 |
+
|
80 |
+
return out
|
81 |
+
|
82 |
+
def message(self, x_j, edge_attr=None):
|
83 |
+
|
84 |
+
if edge_attr is not None:
|
85 |
+
msg = x_j + edge_attr
|
86 |
+
else:
|
87 |
+
msg = x_j
|
88 |
+
|
89 |
+
return self.msg_encoder(msg) + self.eps
|
90 |
+
|
91 |
+
def update(self, aggr_out):
|
92 |
+
return aggr_out
|
93 |
+
|
94 |
+
|
95 |
+
class MRConv(nn.Module):
|
96 |
+
"""
|
97 |
+
Max-Relative Graph Convolution (Paper: https://arxiv.org/abs/1904.03751)
|
98 |
+
"""
|
99 |
+
def __init__(self, in_channels, out_channels, act='relu', norm=None, bias=True, aggr='max'):
|
100 |
+
super(MRConv, self).__init__()
|
101 |
+
self.nn = MLP([in_channels*2, out_channels], act, norm, bias)
|
102 |
+
self.aggr = aggr
|
103 |
+
|
104 |
+
def forward(self, x, edge_index):
|
105 |
+
""""""
|
106 |
+
x_j = tg.utils.scatter_(self.aggr, torch.index_select(x, 0, edge_index[0]) - torch.index_select(x, 0, edge_index[1]), edge_index[1], dim_size=x.shape[0])
|
107 |
+
return self.nn(torch.cat([x, x_j], dim=1))
|
108 |
+
|
109 |
+
|
110 |
+
class EdgConv(tg.nn.EdgeConv):
|
111 |
+
"""
|
112 |
+
Edge convolution layer (with activation, batch normalization)
|
113 |
+
"""
|
114 |
+
def __init__(self, in_channels, out_channels, act='relu', norm=None, bias=True, aggr='max'):
|
115 |
+
super(EdgConv, self).__init__(MLP([in_channels*2, out_channels], act, norm, bias), aggr)
|
116 |
+
|
117 |
+
def forward(self, x, edge_index):
|
118 |
+
return super(EdgConv, self).forward(x, edge_index)
|
119 |
+
|
120 |
+
|
121 |
+
class GATConv(nn.Module):
|
122 |
+
"""
|
123 |
+
Graph Attention Convolution layer (with activation, batch normalization)
|
124 |
+
"""
|
125 |
+
def __init__(self, in_channels, out_channels, act='relu', norm=None, bias=True, heads=8):
|
126 |
+
super(GATConv, self).__init__()
|
127 |
+
self.gconv = tg.nn.GATConv(in_channels, out_channels, heads, bias=bias)
|
128 |
+
m =[]
|
129 |
+
if act:
|
130 |
+
m.append(act_layer(act))
|
131 |
+
if norm:
|
132 |
+
m.append(norm_layer(norm, out_channels))
|
133 |
+
self.unlinear = nn.Sequential(*m)
|
134 |
+
|
135 |
+
def forward(self, x, edge_index):
|
136 |
+
out = self.unlinear(self.gconv(x, edge_index))
|
137 |
+
return out
|
138 |
+
|
139 |
+
|
140 |
+
class SAGEConv(tg.nn.SAGEConv):
|
141 |
+
r"""The GraphSAGE operator from the `"Inductive Representation Learning on
|
142 |
+
Large Graphs" <https://arxiv.org/abs/1706.02216>`_ paper
|
143 |
+
|
144 |
+
.. math::
|
145 |
+
\mathbf{\hat{x}}_i &= \mathbf{\Theta} \cdot
|
146 |
+
\mathrm{mean}_{j \in \mathcal{N(i) \cup \{ i \}}}(\mathbf{x}_j)
|
147 |
+
|
148 |
+
\mathbf{x}^{\prime}_i &= \frac{\mathbf{\hat{x}}_i}
|
149 |
+
{\| \mathbf{\hat{x}}_i \|_2}.
|
150 |
+
|
151 |
+
Args:
|
152 |
+
in_channels (int): Size of each input sample.
|
153 |
+
out_channels (int): Size of each output sample.
|
154 |
+
normalize (bool, optional): If set to :obj:`False`, output features
|
155 |
+
will not be :math:`\ell_2`-normalized. (default: :obj:`True`)
|
156 |
+
bias (bool, optional): If set to :obj:`False`, the layer will not learn
|
157 |
+
an additive bias. (default: :obj:`True`)
|
158 |
+
**kwargs (optional): Additional arguments of
|
159 |
+
:class:`torch_geometric.nn.conv.MessagePassing`.
|
160 |
+
"""
|
161 |
+
|
162 |
+
def __init__(self,
|
163 |
+
in_channels,
|
164 |
+
out_channels,
|
165 |
+
nn,
|
166 |
+
norm=True,
|
167 |
+
bias=True,
|
168 |
+
relative=False,
|
169 |
+
**kwargs):
|
170 |
+
self.relative = relative
|
171 |
+
if norm is not None:
|
172 |
+
super(SAGEConv, self).__init__(in_channels, out_channels, True, bias, **kwargs)
|
173 |
+
else:
|
174 |
+
super(SAGEConv, self).__init__(in_channels, out_channels, False, bias, **kwargs)
|
175 |
+
self.nn = nn
|
176 |
+
|
177 |
+
def forward(self, x, edge_index, size=None):
|
178 |
+
""""""
|
179 |
+
if size is None:
|
180 |
+
edge_index, _ = remove_self_loops(edge_index)
|
181 |
+
edge_index, _ = add_self_loops(edge_index, num_nodes=x.size(0))
|
182 |
+
|
183 |
+
x = x.unsqueeze(-1) if x.dim() == 1 else x
|
184 |
+
return self.propagate(edge_index, size=size, x=x)
|
185 |
+
|
186 |
+
def message(self, x_i, x_j):
|
187 |
+
if self.relative:
|
188 |
+
x = torch.matmul(x_j - x_i, self.weight)
|
189 |
+
else:
|
190 |
+
x = torch.matmul(x_j, self.weight)
|
191 |
+
return x
|
192 |
+
|
193 |
+
def update(self, aggr_out, x):
|
194 |
+
out = self.nn(torch.cat((x, aggr_out), dim=1))
|
195 |
+
if self.bias is not None:
|
196 |
+
out = out + self.bias
|
197 |
+
if self.normalize:
|
198 |
+
out = F.normalize(out, p=2, dim=-1)
|
199 |
+
return out
|
200 |
+
|
201 |
+
|
202 |
+
class RSAGEConv(SAGEConv):
|
203 |
+
"""
|
204 |
+
Residual SAGE convolution layer (with activation, batch normalization)
|
205 |
+
"""
|
206 |
+
|
207 |
+
def __init__(self, in_channels, out_channels, act='relu', norm=None, bias=True, relative=False):
|
208 |
+
nn = MLP([out_channels + in_channels, out_channels], act, norm, bias)
|
209 |
+
super(RSAGEConv, self).__init__(in_channels, out_channels, nn, norm, bias, relative)
|
210 |
+
|
211 |
+
|
212 |
+
class SemiGCNConv(nn.Module):
|
213 |
+
"""
|
214 |
+
SemiGCN convolution layer (with activation, batch normalization)
|
215 |
+
"""
|
216 |
+
|
217 |
+
def __init__(self, in_channels, out_channels, act='relu', norm=None, bias=True):
|
218 |
+
super(SemiGCNConv, self).__init__()
|
219 |
+
self.gconv = tg.nn.GCNConv(in_channels, out_channels, bias=bias)
|
220 |
+
m = []
|
221 |
+
if act:
|
222 |
+
m.append(act_layer(act))
|
223 |
+
if norm:
|
224 |
+
m.append(norm_layer(norm, out_channels))
|
225 |
+
self.unlinear = nn.Sequential(*m)
|
226 |
+
|
227 |
+
def forward(self, x, edge_index):
|
228 |
+
out = self.unlinear(self.gconv(x, edge_index))
|
229 |
+
return out
|
230 |
+
|
231 |
+
|
232 |
+
class GinConv(tg.nn.GINConv):
|
233 |
+
"""
|
234 |
+
GINConv layer (with activation, batch normalization)
|
235 |
+
"""
|
236 |
+
def __init__(self, in_channels, out_channels, act='relu', norm=None, bias=True, aggr='add'):
|
237 |
+
super(GinConv, self).__init__(MLP([in_channels, out_channels], act, norm, bias))
|
238 |
+
|
239 |
+
def forward(self, x, edge_index):
|
240 |
+
return super(GinConv, self).forward(x, edge_index)
|
241 |
+
|
242 |
+
|
243 |
+
class GraphConv(nn.Module):
|
244 |
+
"""
|
245 |
+
Static graph convolution layer
|
246 |
+
"""
|
247 |
+
def __init__(self, in_channels, out_channels, conv='edge',
|
248 |
+
act='relu', norm=None, bias=True, heads=8):
|
249 |
+
super(GraphConv, self).__init__()
|
250 |
+
if conv.lower() == 'edge':
|
251 |
+
self.gconv = EdgConv(in_channels, out_channels, act, norm, bias)
|
252 |
+
elif conv.lower() == 'mr':
|
253 |
+
self.gconv = MRConv(in_channels, out_channels, act, norm, bias)
|
254 |
+
elif conv.lower() == 'gat':
|
255 |
+
self.gconv = GATConv(in_channels, out_channels//heads, act, norm, bias, heads)
|
256 |
+
elif conv.lower() == 'gcn':
|
257 |
+
self.gconv = SemiGCNConv(in_channels, out_channels, act, norm, bias)
|
258 |
+
elif conv.lower() == 'gin':
|
259 |
+
self.gconv = GinConv(in_channels, out_channels, act, norm, bias)
|
260 |
+
elif conv.lower() == 'sage':
|
261 |
+
self.gconv = RSAGEConv(in_channels, out_channels, act, norm, bias, False)
|
262 |
+
elif conv.lower() == 'rsage':
|
263 |
+
self.gconv = RSAGEConv(in_channels, out_channels, act, norm, bias, True)
|
264 |
+
else:
|
265 |
+
raise NotImplementedError('conv {} is not implemented'.format(conv))
|
266 |
+
|
267 |
+
def forward(self, x, edge_index):
|
268 |
+
return self.gconv(x, edge_index)
|
269 |
+
|
270 |
+
|
271 |
+
class DynConv(GraphConv):
|
272 |
+
"""
|
273 |
+
Dynamic graph convolution layer
|
274 |
+
"""
|
275 |
+
def __init__(self, in_channels, out_channels, kernel_size=9, dilation=1, conv='edge', act='relu',
|
276 |
+
norm=None, bias=True, heads=8, **kwargs):
|
277 |
+
super(DynConv, self).__init__(in_channels, out_channels, conv, act, norm, bias, heads)
|
278 |
+
self.k = kernel_size
|
279 |
+
self.d = dilation
|
280 |
+
self.dilated_knn_graph = DilatedKnnGraph(kernel_size, dilation, **kwargs)
|
281 |
+
|
282 |
+
def forward(self, x, batch=None):
|
283 |
+
edge_index = self.dilated_knn_graph(x, batch)
|
284 |
+
return super(DynConv, self).forward(x, edge_index)
|
285 |
+
|
286 |
+
|
287 |
+
class PlainDynBlock(nn.Module):
|
288 |
+
"""
|
289 |
+
Plain Dynamic graph convolution block
|
290 |
+
"""
|
291 |
+
def __init__(self, channels, kernel_size=9, dilation=1, conv='edge', act='relu', norm=None,
|
292 |
+
bias=True, res_scale=1, **kwargs):
|
293 |
+
super(PlainDynBlock, self).__init__()
|
294 |
+
self.body = DynConv(channels, channels, kernel_size, dilation, conv,
|
295 |
+
act, norm, bias, **kwargs)
|
296 |
+
self.res_scale = res_scale
|
297 |
+
|
298 |
+
def forward(self, x, batch=None):
|
299 |
+
return self.body(x, batch), batch
|
300 |
+
|
301 |
+
|
302 |
+
class ResDynBlock(nn.Module):
|
303 |
+
"""
|
304 |
+
Residual Dynamic graph convolution block
|
305 |
+
"""
|
306 |
+
def __init__(self, channels, kernel_size=9, dilation=1, conv='edge', act='relu', norm=None,
|
307 |
+
bias=True, res_scale=1, **kwargs):
|
308 |
+
super(ResDynBlock, self).__init__()
|
309 |
+
self.body = DynConv(channels, channels, kernel_size, dilation, conv,
|
310 |
+
act, norm, bias, **kwargs)
|
311 |
+
self.res_scale = res_scale
|
312 |
+
|
313 |
+
def forward(self, x, batch=None):
|
314 |
+
return self.body(x, batch) + x*self.res_scale, batch
|
315 |
+
|
316 |
+
|
317 |
+
class DenseDynBlock(nn.Module):
|
318 |
+
"""
|
319 |
+
Dense Dynamic graph convolution block
|
320 |
+
"""
|
321 |
+
def __init__(self, in_channels, out_channels=64, kernel_size=9, dilation=1, conv='edge', act='relu', norm=None, bias=True, **kwargs):
|
322 |
+
super(DenseDynBlock, self).__init__()
|
323 |
+
self.body = DynConv(in_channels, out_channels, kernel_size, dilation, conv,
|
324 |
+
act, norm, bias, **kwargs)
|
325 |
+
|
326 |
+
def forward(self, x, batch=None):
|
327 |
+
dense = self.body(x, batch)
|
328 |
+
return torch.cat((x, dense), 1), batch
|
329 |
+
|
330 |
+
|
331 |
+
class ResGraphBlock(nn.Module):
|
332 |
+
"""
|
333 |
+
Residual Static graph convolution block
|
334 |
+
"""
|
335 |
+
def __init__(self, channels, conv='edge', act='relu', norm=None, bias=True, heads=8, res_scale=1):
|
336 |
+
super(ResGraphBlock, self).__init__()
|
337 |
+
self.body = GraphConv(channels, channels, conv, act, norm, bias, heads)
|
338 |
+
self.res_scale = res_scale
|
339 |
+
|
340 |
+
def forward(self, x, edge_index):
|
341 |
+
return self.body(x, edge_index) + x*self.res_scale, edge_index
|
342 |
+
|
343 |
+
|
344 |
+
class DenseGraphBlock(nn.Module):
|
345 |
+
"""
|
346 |
+
Dense Static graph convolution block
|
347 |
+
"""
|
348 |
+
def __init__(self, in_channels, out_channels, conv='edge', act='relu', norm=None, bias=True, heads=8):
|
349 |
+
super(DenseGraphBlock, self).__init__()
|
350 |
+
self.body = GraphConv(in_channels, out_channels, conv, act, norm, bias, heads)
|
351 |
+
|
352 |
+
def forward(self, x, edge_index):
|
353 |
+
dense = self.body(x, edge_index)
|
354 |
+
return torch.cat((x, dense), 1), edge_index
|
355 |
+
|
gradio/title.md
ADDED
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<div>
|
2 |
+
<div>
|
3 |
+
<div style="display: flex; justify-content: center; align-items: center; text-align: center; font-size: 40px;">
|
4 |
+
<b>⚛️ PLA-Net</b>
|
5 |
+
</div>
|
6 |
+
<br>
|
7 |
+
<div style="display: flex; justify-content: center; align-items: center; text-align: center;">
|
8 |
+
<a href="#"><img src="https://img.shields.io/static/v1?label=11.6&message=CUDA&color=green"></a>  
|
9 |
+
<a href="#"><img src="https://img.shields.io/static/v1?label=1.12.0&message=Pytorch&color=red"></a>  
|
10 |
+
<a href="https://github.com/juliocesar-io/PLA-Net"><img src="https://img.shields.io/static/v1?logo=github&label=Github&message=Fork"></a>
|
11 |
+
</div>
|
12 |
+
<br>
|
13 |
+
<div style="justify-content: center; align-items: center; text-align: center; font-size: 14px;">
|
14 |
+
<p>
|
15 |
+
Run Inference of PLA-Net for a single protein in the and multiple ligands to predict the binding affinities using Graph Neural Networks.
|
16 |
+
</p>
|
17 |
+
</div>
|
18 |
+
</div>
|
19 |
+
</div>
|
model/__init__.py
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import sys
|
2 |
+
import os
|
3 |
+
ROOT_DIR = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
|
4 |
+
sys.path.append(ROOT_DIR)
|
model/model.py
ADDED
@@ -0,0 +1,246 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
import torch.nn.functional as F
|
3 |
+
from torch_geometric.nn import global_add_pool, global_mean_pool, global_max_pool
|
4 |
+
|
5 |
+
from gcn_lib.sparse.torch_vertex import GENConv
|
6 |
+
from gcn_lib.sparse.torch_nn import norm_layer, MLP, MM_AtomEncoder
|
7 |
+
|
8 |
+
from model.model_encoder import AtomEncoder, BondEncoder
|
9 |
+
|
10 |
+
import logging
|
11 |
+
|
12 |
+
|
13 |
+
class DeeperGCN(torch.nn.Module):
|
14 |
+
def __init__(self, args, is_prot=False, saliency=False):
|
15 |
+
super(DeeperGCN, self).__init__()
|
16 |
+
|
17 |
+
# Set PM configuration
|
18 |
+
if is_prot:
|
19 |
+
self.num_layers = args.num_layers_prot
|
20 |
+
mlp_layers = args.mlp_layers_prot
|
21 |
+
hidden_channels = args.hidden_channels_prot
|
22 |
+
self.msg_norm = args.msg_norm_prot
|
23 |
+
learn_msg_scale = args.learn_msg_scale_prot
|
24 |
+
self.conv_encode_edge = args.conv_encode_edge_prot
|
25 |
+
|
26 |
+
# Set LM configuration
|
27 |
+
else:
|
28 |
+
self.num_layers = args.num_layers
|
29 |
+
mlp_layers = args.mlp_layers
|
30 |
+
hidden_channels = args.hidden_channels
|
31 |
+
self.msg_norm = args.msg_norm
|
32 |
+
learn_msg_scale = args.learn_msg_scale
|
33 |
+
self.conv_encode_edge = args.conv_encode_edge
|
34 |
+
|
35 |
+
# Set overall model configuration
|
36 |
+
self.dropout = args.dropout
|
37 |
+
self.block = args.block
|
38 |
+
self.add_virtual_node = args.add_virtual_node
|
39 |
+
self.training = True
|
40 |
+
self.args = args
|
41 |
+
|
42 |
+
num_classes = args.nclasses
|
43 |
+
conv = args.conv
|
44 |
+
aggr = args.gcn_aggr
|
45 |
+
t = args.t
|
46 |
+
self.learn_t = args.learn_t
|
47 |
+
p = args.p
|
48 |
+
self.learn_p = args.learn_p
|
49 |
+
|
50 |
+
norm = args.norm
|
51 |
+
|
52 |
+
graph_pooling = args.graph_pooling
|
53 |
+
|
54 |
+
# Print model parameters
|
55 |
+
print(
|
56 |
+
"The number of layers {}".format(self.num_layers),
|
57 |
+
"Aggr aggregation method {}".format(aggr),
|
58 |
+
"block: {}".format(self.block),
|
59 |
+
)
|
60 |
+
if self.block == "res+":
|
61 |
+
print("LN/BN->ReLU->GraphConv->Res")
|
62 |
+
elif self.block == "res":
|
63 |
+
print("GraphConv->LN/BN->ReLU->Res")
|
64 |
+
elif self.block == "dense":
|
65 |
+
raise NotImplementedError("To be implemented")
|
66 |
+
elif self.block == "plain":
|
67 |
+
print("GraphConv->LN/BN->ReLU")
|
68 |
+
else:
|
69 |
+
raise Exception("Unknown block Type")
|
70 |
+
|
71 |
+
self.gcns = torch.nn.ModuleList()
|
72 |
+
self.norms = torch.nn.ModuleList()
|
73 |
+
|
74 |
+
if self.add_virtual_node:
|
75 |
+
self.virtualnode_embedding = torch.nn.Embedding(1, hidden_channels)
|
76 |
+
torch.nn.init.constant_(self.virtualnode_embedding.weight.data, 0)
|
77 |
+
|
78 |
+
self.mlp_virtualnode_list = torch.nn.ModuleList()
|
79 |
+
|
80 |
+
for layer in range(self.num_layers - 1):
|
81 |
+
self.mlp_virtualnode_list.append(MLP([hidden_channels] * 3, norm=norm))
|
82 |
+
|
83 |
+
# Set GCN layer configuration
|
84 |
+
for layer in range(self.num_layers):
|
85 |
+
if conv == "gen":
|
86 |
+
gcn = GENConv(
|
87 |
+
hidden_channels,
|
88 |
+
hidden_channels,
|
89 |
+
args,
|
90 |
+
aggr=aggr,
|
91 |
+
t=t,
|
92 |
+
learn_t=self.learn_t,
|
93 |
+
p=p,
|
94 |
+
learn_p=self.learn_p,
|
95 |
+
msg_norm=self.msg_norm,
|
96 |
+
learn_msg_scale=learn_msg_scale,
|
97 |
+
encode_edge=self.conv_encode_edge,
|
98 |
+
bond_encoder=True,
|
99 |
+
norm=norm,
|
100 |
+
mlp_layers=mlp_layers,
|
101 |
+
)
|
102 |
+
else:
|
103 |
+
raise Exception("Unknown Conv Type")
|
104 |
+
self.gcns.append(gcn)
|
105 |
+
self.norms.append(norm_layer(norm, hidden_channels))
|
106 |
+
|
107 |
+
# Set embbeding layers
|
108 |
+
self.atom_encoder = AtomEncoder(emb_dim=hidden_channels)
|
109 |
+
|
110 |
+
if saliency:
|
111 |
+
self.atom_encoder = MM_AtomEncoder(emb_dim=hidden_channels)
|
112 |
+
else:
|
113 |
+
self.atom_encoder = AtomEncoder(emb_dim=hidden_channels)
|
114 |
+
|
115 |
+
if not self.conv_encode_edge:
|
116 |
+
self.bond_encoder = BondEncoder(emb_dim=hidden_channels)
|
117 |
+
|
118 |
+
# Set type of pooling
|
119 |
+
if graph_pooling == "sum":
|
120 |
+
self.pool = global_add_pool
|
121 |
+
elif graph_pooling == "mean":
|
122 |
+
self.pool = global_mean_pool
|
123 |
+
elif graph_pooling == "max":
|
124 |
+
self.pool = global_max_pool
|
125 |
+
else:
|
126 |
+
raise Exception("Unknown Pool Type")
|
127 |
+
|
128 |
+
# Set classification layer
|
129 |
+
self.graph_pred_linear = torch.nn.Linear(hidden_channels, num_classes)
|
130 |
+
|
131 |
+
def forward(self, input_batch, dropout=True, embeddings=False):
|
132 |
+
|
133 |
+
x = input_batch.x
|
134 |
+
edge_index = input_batch.edge_index
|
135 |
+
edge_attr = input_batch.edge_attr
|
136 |
+
batch = input_batch.batch
|
137 |
+
|
138 |
+
h = self.atom_encoder(x)
|
139 |
+
|
140 |
+
if self.add_virtual_node:
|
141 |
+
virtualnode_embedding = self.virtualnode_embedding(
|
142 |
+
torch.zeros(batch[-1].item() + 1)
|
143 |
+
.to(edge_index.dtype)
|
144 |
+
.to(edge_index.device)
|
145 |
+
)
|
146 |
+
h = h + virtualnode_embedding[batch]
|
147 |
+
|
148 |
+
if self.conv_encode_edge:
|
149 |
+
edge_emb = edge_attr
|
150 |
+
else:
|
151 |
+
edge_emb = self.bond_encoder(edge_attr)
|
152 |
+
|
153 |
+
if self.block == "res+":
|
154 |
+
|
155 |
+
h = self.gcns[0](h, edge_index, edge_emb)
|
156 |
+
|
157 |
+
for layer in range(1, self.num_layers):
|
158 |
+
h1 = self.norms[layer - 1](h)
|
159 |
+
h2 = F.relu(h1)
|
160 |
+
if dropout:
|
161 |
+
h2 = F.dropout(h2, p=self.dropout, training=self.training)
|
162 |
+
|
163 |
+
if self.add_virtual_node:
|
164 |
+
virtualnode_embedding_temp = (
|
165 |
+
global_add_pool(h2, batch) + virtualnode_embedding
|
166 |
+
)
|
167 |
+
if dropout:
|
168 |
+
virtualnode_embedding = F.dropout(
|
169 |
+
self.mlp_virtualnode_list[layer - 1](
|
170 |
+
virtualnode_embedding_temp
|
171 |
+
),
|
172 |
+
self.dropout,
|
173 |
+
training=self.training,
|
174 |
+
)
|
175 |
+
|
176 |
+
h2 = h2 + virtualnode_embedding[batch]
|
177 |
+
|
178 |
+
h = self.gcns[layer](h2, edge_index, edge_emb) + h
|
179 |
+
|
180 |
+
h = self.norms[self.num_layers - 1](h)
|
181 |
+
if dropout:
|
182 |
+
h = F.dropout(h, p=self.dropout, training=self.training)
|
183 |
+
|
184 |
+
elif self.block == "res":
|
185 |
+
|
186 |
+
h = F.relu(self.norms[0](self.gcns[0](h, edge_index, edge_emb)))
|
187 |
+
h = F.dropout(h, p=self.dropout, training=self.training)
|
188 |
+
|
189 |
+
for layer in range(1, self.num_layers):
|
190 |
+
h1 = self.gcns[layer](h, edge_index, edge_emb)
|
191 |
+
h2 = self.norms[layer](h1)
|
192 |
+
h = F.relu(h2) + h
|
193 |
+
h = F.dropout(h, p=self.dropout, training=self.training)
|
194 |
+
|
195 |
+
elif self.block == "dense":
|
196 |
+
raise NotImplementedError("To be implemented")
|
197 |
+
|
198 |
+
elif self.block == "plain":
|
199 |
+
|
200 |
+
h = F.relu(self.norms[0](self.gcns[0](h, edge_index, edge_emb)))
|
201 |
+
h = F.dropout(h, p=self.dropout, training=self.training)
|
202 |
+
|
203 |
+
for layer in range(1, self.num_layers):
|
204 |
+
h1 = self.gcns[layer](h, edge_index, edge_emb)
|
205 |
+
h2 = self.norms[layer](h1)
|
206 |
+
if layer != (self.num_layers - 1):
|
207 |
+
h = F.relu(h2)
|
208 |
+
else:
|
209 |
+
h = h2
|
210 |
+
h = F.dropout(h, p=self.dropout, training=self.training)
|
211 |
+
else:
|
212 |
+
raise Exception("Unknown block Type")
|
213 |
+
|
214 |
+
h_graph = self.pool(h, batch)
|
215 |
+
|
216 |
+
if self.args.use_prot or embeddings:
|
217 |
+
return h_graph
|
218 |
+
else:
|
219 |
+
return self.graph_pred_linear(h_graph)
|
220 |
+
|
221 |
+
def print_params(self, epoch=None, final=False):
|
222 |
+
|
223 |
+
if self.learn_t:
|
224 |
+
ts = []
|
225 |
+
for gcn in self.gcns:
|
226 |
+
ts.append(gcn.t.item())
|
227 |
+
if final:
|
228 |
+
print("Final t {}".format(ts))
|
229 |
+
else:
|
230 |
+
logging.info("Epoch {}, t {}".format(epoch, ts))
|
231 |
+
if self.learn_p:
|
232 |
+
ps = []
|
233 |
+
for gcn in self.gcns:
|
234 |
+
ps.append(gcn.p.item())
|
235 |
+
if final:
|
236 |
+
print("Final p {}".format(ps))
|
237 |
+
else:
|
238 |
+
logging.info("Epoch {}, p {}".format(epoch, ps))
|
239 |
+
if self.msg_norm:
|
240 |
+
ss = []
|
241 |
+
for gcn in self.gcns:
|
242 |
+
ss.append(gcn.msg_norm.msg_scale.item())
|
243 |
+
if final:
|
244 |
+
print("Final s {}".format(ss))
|
245 |
+
else:
|
246 |
+
logging.info("Epoch {}, s {}".format(epoch, ss))
|
model/model_concatenation.py
ADDED
@@ -0,0 +1,92 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
import torch.nn as nn
|
3 |
+
import torch.nn.functional as F
|
4 |
+
|
5 |
+
from gcn_lib.sparse.torch_nn import MLP
|
6 |
+
|
7 |
+
from model.model import DeeperGCN
|
8 |
+
|
9 |
+
import numpy as np
|
10 |
+
import logging
|
11 |
+
|
12 |
+
|
13 |
+
class PLANet(torch.nn.Module):
|
14 |
+
def __init__(self, args,saliency=False):
|
15 |
+
super(PLANet, self).__init__()
|
16 |
+
|
17 |
+
# Args
|
18 |
+
self.args = args
|
19 |
+
# Molecule and protein networks
|
20 |
+
self.molecule_gcn = DeeperGCN(args, saliency=saliency)
|
21 |
+
self.target_gcn = DeeperGCN(args, is_prot=True)
|
22 |
+
|
23 |
+
# Individual modules' final embbeding size
|
24 |
+
output_molecule = args.hidden_channels
|
25 |
+
output_protein = args.hidden_channels_prot
|
26 |
+
# Concatenated embbeding size
|
27 |
+
Final_output = output_molecule + output_protein
|
28 |
+
# Overall model's final embbeding size
|
29 |
+
hidden_channels = args.hidden_channels
|
30 |
+
|
31 |
+
# Multiplier
|
32 |
+
if args.multi_concat:
|
33 |
+
self.multiplier_prot = torch.nn.Parameter(torch.zeros(hidden_channels))
|
34 |
+
self.multiplier_ligand = torch.nn.Parameter(torch.ones(hidden_channels))
|
35 |
+
elif self.args.MLP:
|
36 |
+
# MLP
|
37 |
+
hidden_channel = 64
|
38 |
+
channels_concat = [256, hidden_channel, hidden_channel, 128]
|
39 |
+
self.concatenation_gcn = MLP(channels_concat, norm=args.norm, last_lin=True)
|
40 |
+
# breakpoint()
|
41 |
+
indices = np.diag_indices(hidden_channel)
|
42 |
+
tensor_linear_layer = torch.zeros(hidden_channel, Final_output)
|
43 |
+
tensor_linear_layer[indices[0], indices[1]] = 1
|
44 |
+
self.concatenation_gcn[0].weight = torch.nn.Parameter(tensor_linear_layer)
|
45 |
+
self.concatenation_gcn[0].bias = torch.nn.Parameter(
|
46 |
+
torch.zeros(hidden_channel)
|
47 |
+
)
|
48 |
+
else:
|
49 |
+
# Concatenation Layer
|
50 |
+
self.concatenation_gcn = nn.Linear(Final_output, hidden_channels)
|
51 |
+
indices = np.diag_indices(output_molecule)
|
52 |
+
tensor_linear_layer = torch.zeros(hidden_channels, Final_output)
|
53 |
+
tensor_linear_layer[indices[0], indices[1]] = 1
|
54 |
+
self.concatenation_gcn.weight = torch.nn.Parameter(tensor_linear_layer)
|
55 |
+
self.concatenation_gcn.bias = torch.nn.Parameter(
|
56 |
+
torch.zeros(hidden_channels)
|
57 |
+
)
|
58 |
+
|
59 |
+
# Classification Layer
|
60 |
+
num_classes = args.nclasses
|
61 |
+
self.classification = nn.Linear(hidden_channels, num_classes)
|
62 |
+
|
63 |
+
def forward(self, molecule, target):
|
64 |
+
|
65 |
+
molecule_features = self.molecule_gcn(molecule)
|
66 |
+
target_features = self.target_gcn(target)
|
67 |
+
# Multiplier
|
68 |
+
if self.args.multi_concat:
|
69 |
+
All_features = (
|
70 |
+
target_features * self.multiplier_prot
|
71 |
+
+ molecule_features * self.multiplier_ligand
|
72 |
+
)
|
73 |
+
else:
|
74 |
+
# Concatenation of LM and PM modules
|
75 |
+
All_features = torch.cat((molecule_features, target_features), dim=1)
|
76 |
+
All_features = self.concatenation_gcn(All_features)
|
77 |
+
# Classification
|
78 |
+
classification = self.classification(All_features)
|
79 |
+
|
80 |
+
return classification
|
81 |
+
|
82 |
+
def print_params(self, epoch=None, final=False):
|
83 |
+
|
84 |
+
logging.info("======= Molecule GCN ========")
|
85 |
+
self.molecule_gcn.print_params(epoch)
|
86 |
+
logging.info("======= Protein GCN ========")
|
87 |
+
self.target_gcn.print_params(epoch)
|
88 |
+
if self.args.multi_concat:
|
89 |
+
sum_prot_multi = sum(self.multiplier_prot)
|
90 |
+
sum_lig_multi = sum(self.multiplier_ligand)
|
91 |
+
logging.info("Sumed prot multi: {}".format(sum_prot_multi))
|
92 |
+
logging.info("Sumed lig multi: {}".format(sum_lig_multi))
|
model/model_encoder.py
ADDED
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
from data.features import get_atom_feature_dims, get_bond_feature_dims
|
3 |
+
|
4 |
+
full_atom_feature_dims = get_atom_feature_dims()
|
5 |
+
full_bond_feature_dims = get_bond_feature_dims()
|
6 |
+
|
7 |
+
class AtomEncoder(torch.nn.Module):
|
8 |
+
|
9 |
+
def __init__(self, emb_dim):
|
10 |
+
super(AtomEncoder, self).__init__()
|
11 |
+
|
12 |
+
self.atom_embedding_list = torch.nn.ModuleList()
|
13 |
+
|
14 |
+
for i, dim in enumerate(full_atom_feature_dims):
|
15 |
+
emb = torch.nn.Embedding(dim, emb_dim)
|
16 |
+
torch.nn.init.xavier_uniform_(emb.weight.data)
|
17 |
+
self.atom_embedding_list.append(emb)
|
18 |
+
|
19 |
+
def forward(self, x):
|
20 |
+
x_embedding = 0
|
21 |
+
for i in range(x.shape[1]):
|
22 |
+
x_embedding += self.atom_embedding_list[i](x[:,i])
|
23 |
+
|
24 |
+
return x_embedding
|
25 |
+
|
26 |
+
|
27 |
+
class BondEncoder(torch.nn.Module):
|
28 |
+
|
29 |
+
def __init__(self, emb_dim):
|
30 |
+
super(BondEncoder, self).__init__()
|
31 |
+
|
32 |
+
self.bond_embedding_list = torch.nn.ModuleList()
|
33 |
+
|
34 |
+
for i, dim in enumerate(full_bond_feature_dims):
|
35 |
+
emb = torch.nn.Embedding(dim, emb_dim)
|
36 |
+
torch.nn.init.xavier_uniform_(emb.weight.data)
|
37 |
+
self.bond_embedding_list.append(emb)
|
38 |
+
|
39 |
+
def forward(self, edge_attr):
|
40 |
+
bond_embedding = 0
|
41 |
+
for i in range(edge_attr.shape[1]):
|
42 |
+
bond_embedding += self.bond_embedding_list[i](edge_attr[:,i])
|
43 |
+
|
44 |
+
return bond_embedding
|
45 |
+
|
46 |
+
|
47 |
+
if __name__ == '__main__':
|
48 |
+
from loader import GraphClassificationPygDataset
|
49 |
+
dataset = GraphClassificationPygDataset(name = 'tox21')
|
50 |
+
atom_enc = AtomEncoder(100)
|
51 |
+
bond_enc = BondEncoder(100)
|
52 |
+
|
53 |
+
print(atom_enc(dataset[0].x))
|
54 |
+
print(bond_enc(dataset[0].edge_attr))
|
pretrained-models/BINARY_ada/Fold1/Best_Model.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d20694b2317f52610baf8126b11587b0732f85724a9322a7f10aaac44d1d5ca0
|
3 |
+
size 22503711
|
pretrained-models/BINARY_ada/Fold2/Best_Model.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:70bd05db3d218f477792f56f681f09e571f817298c1d2f94c4b8f5c850725a88
|
3 |
+
size 22506847
|
pretrained-models/BINARY_ada/Fold3/Best_Model.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2dbfbe976cb712448f80eb354d45670c37c4d67c6949f80cfd9ce8c2716fdc9b
|
3 |
+
size 22505567
|
pretrained-models/BINARY_ada/Fold4/Best_Model.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1f9d16bc2669153e692680c8e0db26c81029ef47f65755e34e1ae71f4c359526
|
3 |
+
size 22506783
|
requirements.txt
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
rdkit-pypi==2021.9.3
|
2 |
+
modlamp==4.3.0
|
3 |
+
ogb==1.3.6
|
4 |
+
tqdm==4.63.0
|
5 |
+
h5py==3.11.0
|
6 |
+
scipy==1.9.0
|
7 |
+
numpy==1.24.4
|
8 |
+
gradio==4.43.0
|
9 |
+
fastapi==0.112.4
|
scripts/__init__.py
ADDED
File without changes
|
scripts/model/__init__.py
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import sys
|
2 |
+
import os
|
3 |
+
ROOT_DIR = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
|
4 |
+
sys.path.append(ROOT_DIR)
|
scripts/model/model.py
ADDED
@@ -0,0 +1,246 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
import torch.nn.functional as F
|
3 |
+
from torch_geometric.nn import global_add_pool, global_mean_pool, global_max_pool
|
4 |
+
|
5 |
+
from gcn_lib.sparse.torch_vertex import GENConv
|
6 |
+
from gcn_lib.sparse.torch_nn import norm_layer, MLP, MM_AtomEncoder
|
7 |
+
|
8 |
+
from model.model_encoder import AtomEncoder, BondEncoder
|
9 |
+
|
10 |
+
import logging
|
11 |
+
|
12 |
+
|
13 |
+
class DeeperGCN(torch.nn.Module):
|
14 |
+
def __init__(self, args, is_prot=False, saliency=False):
|
15 |
+
super(DeeperGCN, self).__init__()
|
16 |
+
|
17 |
+
# Set PM configuration
|
18 |
+
if is_prot:
|
19 |
+
self.num_layers = args.num_layers_prot
|
20 |
+
mlp_layers = args.mlp_layers_prot
|
21 |
+
hidden_channels = args.hidden_channels_prot
|
22 |
+
self.msg_norm = args.msg_norm_prot
|
23 |
+
learn_msg_scale = args.learn_msg_scale_prot
|
24 |
+
self.conv_encode_edge = args.conv_encode_edge_prot
|
25 |
+
|
26 |
+
# Set LM configuration
|
27 |
+
else:
|
28 |
+
self.num_layers = args.num_layers
|
29 |
+
mlp_layers = args.mlp_layers
|
30 |
+
hidden_channels = args.hidden_channels
|
31 |
+
self.msg_norm = args.msg_norm
|
32 |
+
learn_msg_scale = args.learn_msg_scale
|
33 |
+
self.conv_encode_edge = args.conv_encode_edge
|
34 |
+
|
35 |
+
# Set overall model configuration
|
36 |
+
self.dropout = args.dropout
|
37 |
+
self.block = args.block
|
38 |
+
self.add_virtual_node = args.add_virtual_node
|
39 |
+
self.training = True
|
40 |
+
self.args = args
|
41 |
+
|
42 |
+
num_classes = args.nclasses
|
43 |
+
conv = args.conv
|
44 |
+
aggr = args.gcn_aggr
|
45 |
+
t = args.t
|
46 |
+
self.learn_t = args.learn_t
|
47 |
+
p = args.p
|
48 |
+
self.learn_p = args.learn_p
|
49 |
+
|
50 |
+
norm = args.norm
|
51 |
+
|
52 |
+
graph_pooling = args.graph_pooling
|
53 |
+
|
54 |
+
# Print model parameters
|
55 |
+
print(
|
56 |
+
"The number of layers {}".format(self.num_layers),
|
57 |
+
"Aggr aggregation method {}".format(aggr),
|
58 |
+
"block: {}".format(self.block),
|
59 |
+
)
|
60 |
+
if self.block == "res+":
|
61 |
+
print("LN/BN->ReLU->GraphConv->Res")
|
62 |
+
elif self.block == "res":
|
63 |
+
print("GraphConv->LN/BN->ReLU->Res")
|
64 |
+
elif self.block == "dense":
|
65 |
+
raise NotImplementedError("To be implemented")
|
66 |
+
elif self.block == "plain":
|
67 |
+
print("GraphConv->LN/BN->ReLU")
|
68 |
+
else:
|
69 |
+
raise Exception("Unknown block Type")
|
70 |
+
|
71 |
+
self.gcns = torch.nn.ModuleList()
|
72 |
+
self.norms = torch.nn.ModuleList()
|
73 |
+
|
74 |
+
if self.add_virtual_node:
|
75 |
+
self.virtualnode_embedding = torch.nn.Embedding(1, hidden_channels)
|
76 |
+
torch.nn.init.constant_(self.virtualnode_embedding.weight.data, 0)
|
77 |
+
|
78 |
+
self.mlp_virtualnode_list = torch.nn.ModuleList()
|
79 |
+
|
80 |
+
for layer in range(self.num_layers - 1):
|
81 |
+
self.mlp_virtualnode_list.append(MLP([hidden_channels] * 3, norm=norm))
|
82 |
+
|
83 |
+
# Set GCN layer configuration
|
84 |
+
for layer in range(self.num_layers):
|
85 |
+
if conv == "gen":
|
86 |
+
gcn = GENConv(
|
87 |
+
hidden_channels,
|
88 |
+
hidden_channels,
|
89 |
+
args,
|
90 |
+
aggr=aggr,
|
91 |
+
t=t,
|
92 |
+
learn_t=self.learn_t,
|
93 |
+
p=p,
|
94 |
+
learn_p=self.learn_p,
|
95 |
+
msg_norm=self.msg_norm,
|
96 |
+
learn_msg_scale=learn_msg_scale,
|
97 |
+
encode_edge=self.conv_encode_edge,
|
98 |
+
bond_encoder=True,
|
99 |
+
norm=norm,
|
100 |
+
mlp_layers=mlp_layers,
|
101 |
+
)
|
102 |
+
else:
|
103 |
+
raise Exception("Unknown Conv Type")
|
104 |
+
self.gcns.append(gcn)
|
105 |
+
self.norms.append(norm_layer(norm, hidden_channels))
|
106 |
+
|
107 |
+
# Set embbeding layers
|
108 |
+
self.atom_encoder = AtomEncoder(emb_dim=hidden_channels)
|
109 |
+
|
110 |
+
if saliency:
|
111 |
+
self.atom_encoder = MM_AtomEncoder(emb_dim=hidden_channels)
|
112 |
+
else:
|
113 |
+
self.atom_encoder = AtomEncoder(emb_dim=hidden_channels)
|
114 |
+
|
115 |
+
if not self.conv_encode_edge:
|
116 |
+
self.bond_encoder = BondEncoder(emb_dim=hidden_channels)
|
117 |
+
|
118 |
+
# Set type of pooling
|
119 |
+
if graph_pooling == "sum":
|
120 |
+
self.pool = global_add_pool
|
121 |
+
elif graph_pooling == "mean":
|
122 |
+
self.pool = global_mean_pool
|
123 |
+
elif graph_pooling == "max":
|
124 |
+
self.pool = global_max_pool
|
125 |
+
else:
|
126 |
+
raise Exception("Unknown Pool Type")
|
127 |
+
|
128 |
+
# Set classification layer
|
129 |
+
self.graph_pred_linear = torch.nn.Linear(hidden_channels, num_classes)
|
130 |
+
|
131 |
+
def forward(self, input_batch, dropout=True, embeddings=False):
|
132 |
+
|
133 |
+
x = input_batch.x
|
134 |
+
edge_index = input_batch.edge_index
|
135 |
+
edge_attr = input_batch.edge_attr
|
136 |
+
batch = input_batch.batch
|
137 |
+
|
138 |
+
h = self.atom_encoder(x)
|
139 |
+
|
140 |
+
if self.add_virtual_node:
|
141 |
+
virtualnode_embedding = self.virtualnode_embedding(
|
142 |
+
torch.zeros(batch[-1].item() + 1)
|
143 |
+
.to(edge_index.dtype)
|
144 |
+
.to(edge_index.device)
|
145 |
+
)
|
146 |
+
h = h + virtualnode_embedding[batch]
|
147 |
+
|
148 |
+
if self.conv_encode_edge:
|
149 |
+
edge_emb = edge_attr
|
150 |
+
else:
|
151 |
+
edge_emb = self.bond_encoder(edge_attr)
|
152 |
+
|
153 |
+
if self.block == "res+":
|
154 |
+
|
155 |
+
h = self.gcns[0](h, edge_index, edge_emb)
|
156 |
+
|
157 |
+
for layer in range(1, self.num_layers):
|
158 |
+
h1 = self.norms[layer - 1](h)
|
159 |
+
h2 = F.relu(h1)
|
160 |
+
if dropout:
|
161 |
+
h2 = F.dropout(h2, p=self.dropout, training=self.training)
|
162 |
+
|
163 |
+
if self.add_virtual_node:
|
164 |
+
virtualnode_embedding_temp = (
|
165 |
+
global_add_pool(h2, batch) + virtualnode_embedding
|
166 |
+
)
|
167 |
+
if dropout:
|
168 |
+
virtualnode_embedding = F.dropout(
|
169 |
+
self.mlp_virtualnode_list[layer - 1](
|
170 |
+
virtualnode_embedding_temp
|
171 |
+
),
|
172 |
+
self.dropout,
|
173 |
+
training=self.training,
|
174 |
+
)
|
175 |
+
|
176 |
+
h2 = h2 + virtualnode_embedding[batch]
|
177 |
+
|
178 |
+
h = self.gcns[layer](h2, edge_index, edge_emb) + h
|
179 |
+
|
180 |
+
h = self.norms[self.num_layers - 1](h)
|
181 |
+
if dropout:
|
182 |
+
h = F.dropout(h, p=self.dropout, training=self.training)
|
183 |
+
|
184 |
+
elif self.block == "res":
|
185 |
+
|
186 |
+
h = F.relu(self.norms[0](self.gcns[0](h, edge_index, edge_emb)))
|
187 |
+
h = F.dropout(h, p=self.dropout, training=self.training)
|
188 |
+
|
189 |
+
for layer in range(1, self.num_layers):
|
190 |
+
h1 = self.gcns[layer](h, edge_index, edge_emb)
|
191 |
+
h2 = self.norms[layer](h1)
|
192 |
+
h = F.relu(h2) + h
|
193 |
+
h = F.dropout(h, p=self.dropout, training=self.training)
|
194 |
+
|
195 |
+
elif self.block == "dense":
|
196 |
+
raise NotImplementedError("To be implemented")
|
197 |
+
|
198 |
+
elif self.block == "plain":
|
199 |
+
|
200 |
+
h = F.relu(self.norms[0](self.gcns[0](h, edge_index, edge_emb)))
|
201 |
+
h = F.dropout(h, p=self.dropout, training=self.training)
|
202 |
+
|
203 |
+
for layer in range(1, self.num_layers):
|
204 |
+
h1 = self.gcns[layer](h, edge_index, edge_emb)
|
205 |
+
h2 = self.norms[layer](h1)
|
206 |
+
if layer != (self.num_layers - 1):
|
207 |
+
h = F.relu(h2)
|
208 |
+
else:
|
209 |
+
h = h2
|
210 |
+
h = F.dropout(h, p=self.dropout, training=self.training)
|
211 |
+
else:
|
212 |
+
raise Exception("Unknown block Type")
|
213 |
+
|
214 |
+
h_graph = self.pool(h, batch)
|
215 |
+
|
216 |
+
if self.args.use_prot or embeddings:
|
217 |
+
return h_graph
|
218 |
+
else:
|
219 |
+
return self.graph_pred_linear(h_graph)
|
220 |
+
|
221 |
+
def print_params(self, epoch=None, final=False):
|
222 |
+
|
223 |
+
if self.learn_t:
|
224 |
+
ts = []
|
225 |
+
for gcn in self.gcns:
|
226 |
+
ts.append(gcn.t.item())
|
227 |
+
if final:
|
228 |
+
print("Final t {}".format(ts))
|
229 |
+
else:
|
230 |
+
logging.info("Epoch {}, t {}".format(epoch, ts))
|
231 |
+
if self.learn_p:
|
232 |
+
ps = []
|
233 |
+
for gcn in self.gcns:
|
234 |
+
ps.append(gcn.p.item())
|
235 |
+
if final:
|
236 |
+
print("Final p {}".format(ps))
|
237 |
+
else:
|
238 |
+
logging.info("Epoch {}, p {}".format(epoch, ps))
|
239 |
+
if self.msg_norm:
|
240 |
+
ss = []
|
241 |
+
for gcn in self.gcns:
|
242 |
+
ss.append(gcn.msg_norm.msg_scale.item())
|
243 |
+
if final:
|
244 |
+
print("Final s {}".format(ss))
|
245 |
+
else:
|
246 |
+
logging.info("Epoch {}, s {}".format(epoch, ss))
|
scripts/model/model_concatenation.py
ADDED
@@ -0,0 +1,92 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
import torch.nn as nn
|
3 |
+
import torch.nn.functional as F
|
4 |
+
|
5 |
+
from gcn_lib.sparse.torch_nn import MLP
|
6 |
+
|
7 |
+
from model.model import DeeperGCN
|
8 |
+
|
9 |
+
import numpy as np
|
10 |
+
import logging
|
11 |
+
|
12 |
+
|
13 |
+
class PLANet(torch.nn.Module):
|
14 |
+
def __init__(self, args,saliency=False):
|
15 |
+
super(PLANet, self).__init__()
|
16 |
+
|
17 |
+
# Args
|
18 |
+
self.args = args
|
19 |
+
# Molecule and protein networks
|
20 |
+
self.molecule_gcn = DeeperGCN(args, saliency=saliency)
|
21 |
+
self.target_gcn = DeeperGCN(args, is_prot=True)
|
22 |
+
|
23 |
+
# Individual modules' final embbeding size
|
24 |
+
output_molecule = args.hidden_channels
|
25 |
+
output_protein = args.hidden_channels_prot
|
26 |
+
# Concatenated embbeding size
|
27 |
+
Final_output = output_molecule + output_protein
|
28 |
+
# Overall model's final embbeding size
|
29 |
+
hidden_channels = args.hidden_channels
|
30 |
+
|
31 |
+
# Multiplier
|
32 |
+
if args.multi_concat:
|
33 |
+
self.multiplier_prot = torch.nn.Parameter(torch.zeros(hidden_channels))
|
34 |
+
self.multiplier_ligand = torch.nn.Parameter(torch.ones(hidden_channels))
|
35 |
+
elif self.args.MLP:
|
36 |
+
# MLP
|
37 |
+
hidden_channel = 64
|
38 |
+
channels_concat = [256, hidden_channel, hidden_channel, 128]
|
39 |
+
self.concatenation_gcn = MLP(channels_concat, norm=args.norm, last_lin=True)
|
40 |
+
# breakpoint()
|
41 |
+
indices = np.diag_indices(hidden_channel)
|
42 |
+
tensor_linear_layer = torch.zeros(hidden_channel, Final_output)
|
43 |
+
tensor_linear_layer[indices[0], indices[1]] = 1
|
44 |
+
self.concatenation_gcn[0].weight = torch.nn.Parameter(tensor_linear_layer)
|
45 |
+
self.concatenation_gcn[0].bias = torch.nn.Parameter(
|
46 |
+
torch.zeros(hidden_channel)
|
47 |
+
)
|
48 |
+
else:
|
49 |
+
# Concatenation Layer
|
50 |
+
self.concatenation_gcn = nn.Linear(Final_output, hidden_channels)
|
51 |
+
indices = np.diag_indices(output_molecule)
|
52 |
+
tensor_linear_layer = torch.zeros(hidden_channels, Final_output)
|
53 |
+
tensor_linear_layer[indices[0], indices[1]] = 1
|
54 |
+
self.concatenation_gcn.weight = torch.nn.Parameter(tensor_linear_layer)
|
55 |
+
self.concatenation_gcn.bias = torch.nn.Parameter(
|
56 |
+
torch.zeros(hidden_channels)
|
57 |
+
)
|
58 |
+
|
59 |
+
# Classification Layer
|
60 |
+
num_classes = args.nclasses
|
61 |
+
self.classification = nn.Linear(hidden_channels, num_classes)
|
62 |
+
|
63 |
+
def forward(self, molecule, target):
|
64 |
+
|
65 |
+
molecule_features = self.molecule_gcn(molecule)
|
66 |
+
target_features = self.target_gcn(target)
|
67 |
+
# Multiplier
|
68 |
+
if self.args.multi_concat:
|
69 |
+
All_features = (
|
70 |
+
target_features * self.multiplier_prot
|
71 |
+
+ molecule_features * self.multiplier_ligand
|
72 |
+
)
|
73 |
+
else:
|
74 |
+
# Concatenation of LM and PM modules
|
75 |
+
All_features = torch.cat((molecule_features, target_features), dim=1)
|
76 |
+
All_features = self.concatenation_gcn(All_features)
|
77 |
+
# Classification
|
78 |
+
classification = self.classification(All_features)
|
79 |
+
|
80 |
+
return classification
|
81 |
+
|
82 |
+
def print_params(self, epoch=None, final=False):
|
83 |
+
|
84 |
+
logging.info("======= Molecule GCN ========")
|
85 |
+
self.molecule_gcn.print_params(epoch)
|
86 |
+
logging.info("======= Protein GCN ========")
|
87 |
+
self.target_gcn.print_params(epoch)
|
88 |
+
if self.args.multi_concat:
|
89 |
+
sum_prot_multi = sum(self.multiplier_prot)
|
90 |
+
sum_lig_multi = sum(self.multiplier_ligand)
|
91 |
+
logging.info("Sumed prot multi: {}".format(sum_prot_multi))
|
92 |
+
logging.info("Sumed lig multi: {}".format(sum_lig_multi))
|
scripts/model/model_encoder.py
ADDED
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
from data.features import get_atom_feature_dims, get_bond_feature_dims
|
3 |
+
|
4 |
+
full_atom_feature_dims = get_atom_feature_dims()
|
5 |
+
full_bond_feature_dims = get_bond_feature_dims()
|
6 |
+
|
7 |
+
class AtomEncoder(torch.nn.Module):
|
8 |
+
|
9 |
+
def __init__(self, emb_dim):
|
10 |
+
super(AtomEncoder, self).__init__()
|
11 |
+
|
12 |
+
self.atom_embedding_list = torch.nn.ModuleList()
|
13 |
+
|
14 |
+
for i, dim in enumerate(full_atom_feature_dims):
|
15 |
+
emb = torch.nn.Embedding(dim, emb_dim)
|
16 |
+
torch.nn.init.xavier_uniform_(emb.weight.data)
|
17 |
+
self.atom_embedding_list.append(emb)
|
18 |
+
|
19 |
+
def forward(self, x):
|
20 |
+
x_embedding = 0
|
21 |
+
for i in range(x.shape[1]):
|
22 |
+
x_embedding += self.atom_embedding_list[i](x[:,i])
|
23 |
+
|
24 |
+
return x_embedding
|
25 |
+
|
26 |
+
|
27 |
+
class BondEncoder(torch.nn.Module):
|
28 |
+
|
29 |
+
def __init__(self, emb_dim):
|
30 |
+
super(BondEncoder, self).__init__()
|
31 |
+
|
32 |
+
self.bond_embedding_list = torch.nn.ModuleList()
|
33 |
+
|
34 |
+
for i, dim in enumerate(full_bond_feature_dims):
|
35 |
+
emb = torch.nn.Embedding(dim, emb_dim)
|
36 |
+
torch.nn.init.xavier_uniform_(emb.weight.data)
|
37 |
+
self.bond_embedding_list.append(emb)
|
38 |
+
|
39 |
+
def forward(self, edge_attr):
|
40 |
+
bond_embedding = 0
|
41 |
+
for i in range(edge_attr.shape[1]):
|
42 |
+
bond_embedding += self.bond_embedding_list[i](edge_attr[:,i])
|
43 |
+
|
44 |
+
return bond_embedding
|
45 |
+
|
46 |
+
|
47 |
+
if __name__ == '__main__':
|
48 |
+
from loader import GraphClassificationPygDataset
|
49 |
+
dataset = GraphClassificationPygDataset(name = 'tox21')
|
50 |
+
atom_enc = AtomEncoder(100)
|
51 |
+
bond_enc = BondEncoder(100)
|
52 |
+
|
53 |
+
print(atom_enc(dataset[0].x))
|
54 |
+
print(bond_enc(dataset[0].edge_attr))
|
scripts/pla_net_inference.py
ADDED
@@ -0,0 +1,82 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
import numpy as np
|
3 |
+
import pandas as pd
|
4 |
+
import time
|
5 |
+
from torch_geometric.data import DataLoader
|
6 |
+
from model.model_concatenation import PLANet
|
7 |
+
from utils.args import ArgsInit
|
8 |
+
from utils.model import get_dataset_inference, test_gcn
|
9 |
+
|
10 |
+
|
11 |
+
def main(args):
|
12 |
+
|
13 |
+
if args.use_gpu:
|
14 |
+
device = torch.device("cuda:" + str(args.device)) if torch.cuda.is_available() else torch.device("cpu")
|
15 |
+
else:
|
16 |
+
device = torch.device('cpu')
|
17 |
+
|
18 |
+
#Numpy and torch seeds
|
19 |
+
torch.manual_seed(args.seed)
|
20 |
+
np.random.seed(args.seed)
|
21 |
+
if device.type == 'cuda':
|
22 |
+
torch.cuda.manual_seed(args.seed)
|
23 |
+
print('%s' % args)
|
24 |
+
|
25 |
+
|
26 |
+
data_inference = pd.read_csv(
|
27 |
+
args.input_file_smiles,
|
28 |
+
names=["Smiles"],
|
29 |
+
header=0
|
30 |
+
)
|
31 |
+
|
32 |
+
print("Data Inference: ", data_inference)
|
33 |
+
|
34 |
+
data_target = pd.read_csv(
|
35 |
+
args.target_list, names=["Fasta", "Target", "Label"]
|
36 |
+
)
|
37 |
+
data_target = data_target[data_target.Target == args.target]
|
38 |
+
|
39 |
+
print("Data Target: ", data_target)
|
40 |
+
|
41 |
+
test = get_dataset_inference(
|
42 |
+
data_inference,
|
43 |
+
use_prot=args.use_prot,
|
44 |
+
target=data_target,
|
45 |
+
args=args,
|
46 |
+
advs=False,
|
47 |
+
saliency=False,
|
48 |
+
)
|
49 |
+
|
50 |
+
test_loader = DataLoader(test, batch_size=args.batch_size, shuffle=False,
|
51 |
+
num_workers=args.num_workers)
|
52 |
+
|
53 |
+
model = PLANet(args).to(device)
|
54 |
+
|
55 |
+
|
56 |
+
print('Model inference in: {}'.format(args.inference_path))
|
57 |
+
start_time = time.time()
|
58 |
+
|
59 |
+
#Load pre-trained molecule model
|
60 |
+
|
61 |
+
print('Evaluating...')
|
62 |
+
test_gcn(model, device, test_loader, args)
|
63 |
+
|
64 |
+
|
65 |
+
end_time = time.time()
|
66 |
+
total_time = end_time - start_time
|
67 |
+
print('Total time: {}'.format(time.strftime('%H:%M:%S', time.gmtime(total_time))))
|
68 |
+
|
69 |
+
|
70 |
+
if __name__ == "__main__":
|
71 |
+
args = ArgsInit().args
|
72 |
+
# Default args for inference
|
73 |
+
|
74 |
+
args.nclasses = 2
|
75 |
+
args.batch_size = 10
|
76 |
+
args.use_prot = True
|
77 |
+
args.freeze_molecule = True
|
78 |
+
args.conv_encode_edge = True
|
79 |
+
args.learn_t = True
|
80 |
+
args.binary = True
|
81 |
+
|
82 |
+
main(args)
|
setup.py
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# setup.py
|
2 |
+
from setuptools import setup, find_packages
|
3 |
+
|
4 |
+
with open('requirements.txt') as f:
|
5 |
+
requirements = f.read().splitlines()
|
6 |
+
|
7 |
+
setup(
|
8 |
+
name='pla_net',
|
9 |
+
version='0.0.0',
|
10 |
+
packages=find_packages(),
|
11 |
+
install_requires=[requirements],
|
12 |
+
classifiers=[],
|
13 |
+
python_requires='>=3.8',
|
14 |
+
)
|
utils/__init__.py
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import sys
|
2 |
+
import os
|
3 |
+
ROOT_DIR = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
|
4 |
+
sys.path.append(ROOT_DIR)
|