Spaces:
Running
Running
Anton Bushuiev
commited on
Commit
•
29bd8b5
1
Parent(s):
7103bfb
Initial commit
Browse files- .gitattributes +0 -35
- LICENSE +21 -0
- README.md +6 -11
- app.py +542 -0
- assets/logos.png +0 -0
- assets/readme-dimer-close-up.png +0 -0
- requirements.txt +7 -0
.gitattributes
DELETED
@@ -1,35 +0,0 @@
|
|
1 |
-
*.7z filter=lfs diff=lfs merge=lfs -text
|
2 |
-
*.arrow filter=lfs diff=lfs merge=lfs -text
|
3 |
-
*.bin filter=lfs diff=lfs merge=lfs -text
|
4 |
-
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
5 |
-
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
6 |
-
*.ftz filter=lfs diff=lfs merge=lfs -text
|
7 |
-
*.gz filter=lfs diff=lfs merge=lfs -text
|
8 |
-
*.h5 filter=lfs diff=lfs merge=lfs -text
|
9 |
-
*.joblib filter=lfs diff=lfs merge=lfs -text
|
10 |
-
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
11 |
-
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
12 |
-
*.model filter=lfs diff=lfs merge=lfs -text
|
13 |
-
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
14 |
-
*.npy filter=lfs diff=lfs merge=lfs -text
|
15 |
-
*.npz filter=lfs diff=lfs merge=lfs -text
|
16 |
-
*.onnx filter=lfs diff=lfs merge=lfs -text
|
17 |
-
*.ot filter=lfs diff=lfs merge=lfs -text
|
18 |
-
*.parquet filter=lfs diff=lfs merge=lfs -text
|
19 |
-
*.pb filter=lfs diff=lfs merge=lfs -text
|
20 |
-
*.pickle filter=lfs diff=lfs merge=lfs -text
|
21 |
-
*.pkl filter=lfs diff=lfs merge=lfs -text
|
22 |
-
*.pt filter=lfs diff=lfs merge=lfs -text
|
23 |
-
*.pth filter=lfs diff=lfs merge=lfs -text
|
24 |
-
*.rar filter=lfs diff=lfs merge=lfs -text
|
25 |
-
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
26 |
-
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
27 |
-
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
28 |
-
*.tar filter=lfs diff=lfs merge=lfs -text
|
29 |
-
*.tflite filter=lfs diff=lfs merge=lfs -text
|
30 |
-
*.tgz filter=lfs diff=lfs merge=lfs -text
|
31 |
-
*.wasm filter=lfs diff=lfs merge=lfs -text
|
32 |
-
*.xz filter=lfs diff=lfs merge=lfs -text
|
33 |
-
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
-
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
-
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
LICENSE
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
MIT License
|
2 |
+
|
3 |
+
Copyright (c) 2024 Anton Bushuiev
|
4 |
+
|
5 |
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6 |
+
of this software and associated documentation files (the "Software"), to deal
|
7 |
+
in the Software without restriction, including without limitation the rights
|
8 |
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9 |
+
copies of the Software, and to permit persons to whom the Software is
|
10 |
+
furnished to do so, subject to the following conditions:
|
11 |
+
|
12 |
+
The above copyright notice and this permission notice shall be included in all
|
13 |
+
copies or substantial portions of the Software.
|
14 |
+
|
15 |
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16 |
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17 |
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18 |
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19 |
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20 |
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21 |
+
SOFTWARE.
|
README.md
CHANGED
@@ -1,14 +1,9 @@
|
|
1 |
---
|
2 |
-
title: PPIformer
|
3 |
-
emoji:
|
4 |
-
colorFrom:
|
5 |
-
colorTo:
|
6 |
sdk: gradio
|
7 |
-
sdk_version: 5.3.0
|
8 |
app_file: app.py
|
9 |
-
pinned:
|
10 |
-
|
11 |
-
short_description: Learning to design protein-protein interactions with enhance
|
12 |
-
---
|
13 |
-
|
14 |
-
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
1 |
---
|
2 |
+
title: PPIformer
|
3 |
+
emoji: 🔬
|
4 |
+
colorFrom: pink
|
5 |
+
colorTo: green
|
6 |
sdk: gradio
|
|
|
7 |
app_file: app.py
|
8 |
+
pinned: true
|
9 |
+
---
|
|
|
|
|
|
|
|
app.py
ADDED
@@ -0,0 +1,542 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# print("""
|
2 |
+
# __ __ _ ___ _ _ _____ _____ _ _ _ _ _ ____ _____
|
3 |
+
# | \/ | / \ |_ _| \ | |_ _| ____| \ | | / \ | \ | |/ ___| ____|
|
4 |
+
# | |\/| | / _ \ | || \| | | | | _| | \| | / _ \ | \| | | | _|
|
5 |
+
# | | | |/ ___ \ | || |\ | | | | |___| |\ |/ ___ \| |\ | |___| |___
|
6 |
+
# |_| |_/_/ \_\___|_| \_| |_| |_____|_| \_/_/ \_\_| \_|\____|_____|
|
7 |
+
|
8 |
+
# ____ ____ _____ _ _ __
|
9 |
+
# | __ )| _ \| ____| / \ | |/ /
|
10 |
+
# | _ \| |_) | _| / _ \ | ' /
|
11 |
+
# | |_) | _ <| |___ / ___ \| . \
|
12 |
+
# |____/|_| \_\_____/_/ \_\_|\_\
|
13 |
+
# """)
|
14 |
+
import os
|
15 |
+
# os.system("pip uninstall -y gradio")
|
16 |
+
# os.system("pip install gradio==3.50.2")
|
17 |
+
# os.system("pip uninstall -y spaces")
|
18 |
+
# os.system("pip install spaces==0.8")
|
19 |
+
os.system("pip uninstall -y torch")
|
20 |
+
os.system("pip install torch==2.0.1")
|
21 |
+
|
22 |
+
import sys
|
23 |
+
import copy
|
24 |
+
import random
|
25 |
+
import tempfile
|
26 |
+
import shutil
|
27 |
+
import logging
|
28 |
+
from pathlib import Path
|
29 |
+
from functools import partial
|
30 |
+
|
31 |
+
import spaces
|
32 |
+
import gradio as gr
|
33 |
+
import torch
|
34 |
+
import numpy as np
|
35 |
+
import pandas as pd
|
36 |
+
from Bio.PDB.Polypeptide import protein_letters_3to1
|
37 |
+
from biopandas.pdb import PandasPdb
|
38 |
+
from colour import Color
|
39 |
+
from colour import RGB_TO_COLOR_NAMES
|
40 |
+
|
41 |
+
from mutils.proteins import AMINO_ACID_CODES_1
|
42 |
+
from mutils.pdb import download_pdb
|
43 |
+
from mutils.mutations import Mutation
|
44 |
+
from ppiref.extraction import PPIExtractor
|
45 |
+
from ppiref.utils.ppi import PPIPath
|
46 |
+
from ppiref.utils.residue import Residue
|
47 |
+
from ppiformer.tasks.node import DDGPPIformer
|
48 |
+
from ppiformer.utils.api import download_from_zenodo
|
49 |
+
from ppiformer.utils.api import predict_ddg as predict_ddg_
|
50 |
+
from ppiformer.utils.torch import fill_diagonal
|
51 |
+
from ppiformer.definitions import PPIFORMER_WEIGHTS_DIR
|
52 |
+
|
53 |
+
|
54 |
+
import pkg_resources
|
55 |
+
import sys
|
56 |
+
|
57 |
+
def print_package_versions():
|
58 |
+
installed_packages = sorted([f"{pkg.key}=={pkg.version}" for pkg in pkg_resources.working_set])
|
59 |
+
print("Installed packages and their versions:")
|
60 |
+
for package in installed_packages:
|
61 |
+
print(package)
|
62 |
+
|
63 |
+
print("\nPython version:")
|
64 |
+
print(sys.version)
|
65 |
+
|
66 |
+
print_package_versions()
|
67 |
+
|
68 |
+
|
69 |
+
logging.basicConfig(
|
70 |
+
level=logging.INFO,
|
71 |
+
format='%(asctime)s - %(levelname)s - %(message)s',
|
72 |
+
handlers=[logging.StreamHandler(sys.stdout)]
|
73 |
+
)
|
74 |
+
|
75 |
+
random.seed(0)
|
76 |
+
|
77 |
+
|
78 |
+
@spaces.GPU
|
79 |
+
def predict_ddg(models, ppi, muts, return_attn):
|
80 |
+
device = 'cuda' if torch.cuda.is_available() else 'cpu'
|
81 |
+
print(f"[INFO] Device on prediction: {device}")
|
82 |
+
models = [model.to(device) for model in models]
|
83 |
+
if return_attn:
|
84 |
+
ddg_pred, attns = predict_ddg_(models, ppi, muts, return_attn=return_attn)
|
85 |
+
return ddg_pred.detach().cpu(), attns.detach().cpu()
|
86 |
+
else:
|
87 |
+
ddg_pred = predict_ddg_(models, ppi, muts, return_attn=return_attn)
|
88 |
+
return ddg_pred.detach().cpu()
|
89 |
+
|
90 |
+
|
91 |
+
def process_inputs(inputs, temp_dir):
|
92 |
+
pdb_code, pdb_path, partners, muts, muts_path = inputs
|
93 |
+
|
94 |
+
# Check inputs
|
95 |
+
if not pdb_code and not pdb_path:
|
96 |
+
raise gr.Error("PPI structure not specified.")
|
97 |
+
|
98 |
+
if pdb_code and pdb_path:
|
99 |
+
gr.Warning("Both PDB code and PDB file specified. Using PDB file.")
|
100 |
+
|
101 |
+
if not partners:
|
102 |
+
raise gr.Error("Partners not specified.")
|
103 |
+
|
104 |
+
if not muts and not muts_path:
|
105 |
+
raise gr.Error("Mutations not specified.")
|
106 |
+
|
107 |
+
if muts and muts_path:
|
108 |
+
gr.Warning("Both mutations and mutations file specified. Using mutations file.")
|
109 |
+
|
110 |
+
# Prepare PDB input
|
111 |
+
if pdb_path:
|
112 |
+
# convert file name to PPIRef format
|
113 |
+
new_pdb_path = temp_dir / f"pdb/{pdb_path.name.replace('_', '-')}"
|
114 |
+
new_pdb_path.parent.mkdir(parents=True, exist_ok=True)
|
115 |
+
shutil.copy(str(pdb_path), str(new_pdb_path))
|
116 |
+
pdb_path = new_pdb_path
|
117 |
+
pdb_path = Path(pdb_path)
|
118 |
+
else:
|
119 |
+
try:
|
120 |
+
pdb_code = pdb_code.strip().lower()
|
121 |
+
pdb_path = temp_dir / f'pdb/{pdb_code}.pdb'
|
122 |
+
download_pdb(pdb_code, path=pdb_path)
|
123 |
+
except:
|
124 |
+
raise gr.Error("PDB download failed.")
|
125 |
+
|
126 |
+
# Parse partners
|
127 |
+
partners = list(map(lambda x: x.strip(), partners.split(',')))
|
128 |
+
|
129 |
+
# Add partners to file name
|
130 |
+
pdb_path = pdb_path.rename(pdb_path.with_stem(f"{pdb_path.stem}-{'-'.join(partners)}"))
|
131 |
+
|
132 |
+
# Extract PPI into temp dir
|
133 |
+
try:
|
134 |
+
ppi_dir = temp_dir / 'ppi'
|
135 |
+
extractor = PPIExtractor(out_dir=ppi_dir, nest_out_dir=True, join=True, radius=10.0)
|
136 |
+
extractor.extract(pdb_path, partners=partners)
|
137 |
+
ppi_path = PPIPath.construct(ppi_dir, pdb_path.stem, partners)
|
138 |
+
except:
|
139 |
+
raise gr.Error("PPI extraction failed.")
|
140 |
+
|
141 |
+
# Prepare mutations input
|
142 |
+
if muts_path:
|
143 |
+
muts_path = Path(muts_path)
|
144 |
+
muts = muts_path.read_text()
|
145 |
+
|
146 |
+
# Check mutations
|
147 |
+
|
148 |
+
# Basic format
|
149 |
+
try:
|
150 |
+
muts = [Mutation.from_str(m) for m in muts.strip().split(';') if m.strip()]
|
151 |
+
except Exception as e:
|
152 |
+
raise gr.Error(f'Mutations parsing failed: {e}')
|
153 |
+
|
154 |
+
# Partners
|
155 |
+
for mut in muts:
|
156 |
+
for pmut in mut.muts:
|
157 |
+
if pmut.chain not in partners:
|
158 |
+
raise gr.Error(f'Chain of point mutation {pmut} is not in the list of partners {partners}.')
|
159 |
+
|
160 |
+
# Consistency with provided .pdb
|
161 |
+
muts_on_interface = []
|
162 |
+
for mut in muts:
|
163 |
+
if mut.wt_in_pdb(ppi_path):
|
164 |
+
val = True
|
165 |
+
elif mut.wt_in_pdb(pdb_path):
|
166 |
+
val = False
|
167 |
+
else:
|
168 |
+
raise gr.Error(f'Wild-type of mutation {mut} is not in the provided .pdb file.')
|
169 |
+
muts_on_interface.append(val)
|
170 |
+
|
171 |
+
muts = [str(m) for m in muts]
|
172 |
+
|
173 |
+
return pdb_path, ppi_path, muts, muts_on_interface
|
174 |
+
|
175 |
+
|
176 |
+
def plot_3dmol(pdb_path, ppi_path, mut, attn, attn_mut_id=0):
|
177 |
+
# NOTE 3DMol.js adapted from https://huggingface.co/spaces/huhlim/cg2all/blob/main/app.py
|
178 |
+
|
179 |
+
# Read PDB for 3Dmol.js
|
180 |
+
with open(pdb_path, "r") as fp:
|
181 |
+
lines = fp.readlines()
|
182 |
+
mol = ""
|
183 |
+
for l in lines:
|
184 |
+
mol += l
|
185 |
+
mol = mol.replace("OT1", "O ")
|
186 |
+
mol = mol.replace("OT2", "OXT")
|
187 |
+
|
188 |
+
# Read PPI to customize 3Dmol.js visualization
|
189 |
+
ppi_df = PandasPdb().read_pdb(ppi_path).df['ATOM']
|
190 |
+
ppi_df = ppi_df.groupby(list(Residue._fields)).apply(lambda df: df[df['atom_name'] == 'CA'].iloc[0]).reset_index(drop=True)
|
191 |
+
ppi_df['id'] = ppi_df.apply(lambda row: ':'.join([row['residue_name'], row['chain_id'], str(row['residue_number']), row['insertion']]), axis=1)
|
192 |
+
ppi_df['id'] = ppi_df['id'].apply(lambda x: x[:-1] if x[-1] == ':' else x)
|
193 |
+
muts_id = Mutation.from_str(mut).wt_to_graphein() # flatten ids of all sp muts
|
194 |
+
ppi_df['mutated'] = ppi_df.apply(lambda row: row['id'] in muts_id, axis=1)
|
195 |
+
|
196 |
+
# Prepare attention coeffictients per residue (normalized sum of direct attention from mutated residues)
|
197 |
+
attn = torch.nan_to_num(attn, nan=1e-10)
|
198 |
+
attn_sub = attn[:, attn_mut_id, 0, :, 0, :, :, :] # models, layers, heads, tokens, tokens
|
199 |
+
idx_mutated = torch.from_numpy(ppi_df.index[ppi_df['mutated']].to_numpy())
|
200 |
+
attn_sub = fill_diagonal(attn_sub, 1e-10)
|
201 |
+
attn_mutated = attn_sub[..., idx_mutated, :]
|
202 |
+
attn_mutated.shape
|
203 |
+
attns_per_token = torch.sum(attn_mutated, dim=(0, 1, 2, 3))
|
204 |
+
attns_per_token = (attns_per_token - attns_per_token.min()) / (attns_per_token.max() - attns_per_token.min())
|
205 |
+
attns_per_token += 1e-10
|
206 |
+
ppi_df['attn'] = attns_per_token.numpy()
|
207 |
+
|
208 |
+
chains = ppi_df.sort_values('attn', ascending=False)['chain_id'].unique()
|
209 |
+
|
210 |
+
# Customize 3Dmol.js visualization https://3dmol.csb.pitt.edu/doc/
|
211 |
+
styles = []
|
212 |
+
zoom_atoms = []
|
213 |
+
|
214 |
+
# Cartoon chains
|
215 |
+
preferred_colors = ['LimeGreen', 'HotPink', 'RoyalBlue']
|
216 |
+
all_colors = [c[0] for c in RGB_TO_COLOR_NAMES.values()]
|
217 |
+
all_colors = [c for c in all_colors if c not in preferred_colors + ['Black', 'White']]
|
218 |
+
random.shuffle(all_colors)
|
219 |
+
all_colors = preferred_colors + all_colors
|
220 |
+
all_colors = [Color(c) for c in all_colors]
|
221 |
+
chain_to_color = dict(zip(chains, all_colors))
|
222 |
+
for chain in chains:
|
223 |
+
styles.append([{"chain": chain}, {"cartoon": {"color": chain_to_color[chain].hex_l, "opacity": 0.6}}])
|
224 |
+
|
225 |
+
# Stick PPI and atoms for zoom
|
226 |
+
# TODO Insertions
|
227 |
+
for _, row in ppi_df.iterrows():
|
228 |
+
color = copy.deepcopy(chain_to_color[row['chain_id']])
|
229 |
+
color.saturation = row['attn']
|
230 |
+
color = color.hex_l
|
231 |
+
if row['mutated']:
|
232 |
+
styles.append([
|
233 |
+
{'chain': row['chain_id'], 'resi': str(row['residue_number'])},
|
234 |
+
{'stick': {'color': 'red', 'radius': 0.2, 'opacity': 1.0}}
|
235 |
+
])
|
236 |
+
zoom_atoms.append(row['atom_number'])
|
237 |
+
else:
|
238 |
+
styles.append([
|
239 |
+
{'chain': row['chain_id'], 'resi': str(row['residue_number'])},
|
240 |
+
{'stick': {'color': color, 'radius': row['attn'] / 5, 'opacity': row['attn']}}
|
241 |
+
])
|
242 |
+
|
243 |
+
# Convert style dicts to JS lines
|
244 |
+
styles = ''.join(['viewer.addStyle(' + ', '.join([str(s).replace("'", '"') for s in dcts]) + ');\n' for dcts in styles])
|
245 |
+
|
246 |
+
# Convert zoom atoms to 3DMol.js selection and add labels for mutated residues
|
247 |
+
zoom_animation_duration = 500
|
248 |
+
sel = '{\"or\": [' + ', '.join(["{\"serial\": " + str(a) + "}" for a in zoom_atoms]) + ']}'
|
249 |
+
zoom = 'viewer.zoomTo(' + sel + ',' + f'{zoom_animation_duration});'
|
250 |
+
for atom in zoom_atoms:
|
251 |
+
sel = '{\"serial\": ' + str(atom) + '}'
|
252 |
+
row = ppi_df[ppi_df['atom_number'] == atom].iloc[0]
|
253 |
+
label = protein_letters_3to1[row['residue_name']] + row['chain_id'] + str(row['residue_number']) + row['insertion']
|
254 |
+
styles += 'viewer.addLabel(' + f"\"{label}\"," + "{fontSize:16, fontColor:\"red\", backgroundOpacity: 0.0}," + sel + ');\n'
|
255 |
+
|
256 |
+
# Construct 3Dmol.js visualization script embedded in HTML
|
257 |
+
html = (
|
258 |
+
"""<!DOCTYPE html>
|
259 |
+
<html>
|
260 |
+
<head>
|
261 |
+
<meta http-equiv="content-type" content="text/html; charset=UTF-8" />
|
262 |
+
<style>
|
263 |
+
body{
|
264 |
+
font-family:sans-serif
|
265 |
+
}
|
266 |
+
.mol-container {
|
267 |
+
width: 100%;
|
268 |
+
height: 600px;
|
269 |
+
position: relative;
|
270 |
+
}
|
271 |
+
.mol-container select{
|
272 |
+
background-image:None;
|
273 |
+
}
|
274 |
+
</style>
|
275 |
+
<script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.6.3/jquery.min.js" integrity="sha512-STof4xm1wgkfm7heWqFJVn58Hm3EtS31XFaagaa8VMReCXAkQnJZ+jEy8PCC/iT18dFy95WcExNHFTqLyp72eQ==" crossorigin="anonymous" referrerpolicy="no-referrer"></script>
|
276 |
+
<script src="https://3Dmol.csb.pitt.edu/build/3Dmol-min.js"></script>
|
277 |
+
</head>
|
278 |
+
<body>
|
279 |
+
<div id="container" class="mol-container"></div>
|
280 |
+
|
281 |
+
<script>
|
282 |
+
let pdb = `"""
|
283 |
+
+ mol
|
284 |
+
+ """`
|
285 |
+
|
286 |
+
$(document).ready(function () {
|
287 |
+
let element = $("#container");
|
288 |
+
let config = { backgroundColor: "white" };
|
289 |
+
let viewer = $3Dmol.createViewer(element, config);
|
290 |
+
viewer.addModel(pdb, "pdb");
|
291 |
+
viewer.setStyle({"model": 0}, {"ray_opaque_background": "off"}, {"stick": {"color": "lightgrey", "opacity": 0.5}});
|
292 |
+
"""
|
293 |
+
+ styles
|
294 |
+
+ zoom
|
295 |
+
+ """
|
296 |
+
viewer.render();
|
297 |
+
})
|
298 |
+
</script>
|
299 |
+
</body></html>"""
|
300 |
+
)
|
301 |
+
|
302 |
+
return f"""<iframe style="width: 100%; height: 600px" name="result" allow="midi; geolocation; microphone; camera;
|
303 |
+
display-capture; encrypted-media;" sandbox="allow-modals allow-forms
|
304 |
+
allow-scripts allow-same-origin allow-popups
|
305 |
+
allow-top-navigation-by-user-activation allow-downloads" allowfullscreen=""
|
306 |
+
allowpaymentrequest="" frameborder="0" srcdoc='{html}'></iframe>"""
|
307 |
+
|
308 |
+
|
309 |
+
def predict(models, temp_dir, *inputs):
|
310 |
+
logging.info('Starting prediction')
|
311 |
+
|
312 |
+
# Process input
|
313 |
+
pdb_path, ppi_path, muts, muts_on_interface = process_inputs(inputs, temp_dir)
|
314 |
+
|
315 |
+
# Create dataframe
|
316 |
+
df = pd.DataFrame({
|
317 |
+
'Mutation': muts,
|
318 |
+
'ddG [kcal/mol]': len(muts) * [np.nan],
|
319 |
+
'10A Interface': muts_on_interface,
|
320 |
+
'Attn Id': len(muts) * [np.nan],
|
321 |
+
})
|
322 |
+
|
323 |
+
# Show warning if some mutations are not on the interface
|
324 |
+
muts_not_on_interface = df[~df['10A Interface']]['Mutation'].tolist()
|
325 |
+
n_muts_not_on_interface = len(muts_not_on_interface)
|
326 |
+
if n_muts_not_on_interface:
|
327 |
+
n_muts_warn = 5
|
328 |
+
muts_not_on_interface = ';'.join(muts_not_on_interface[:n_muts_warn])
|
329 |
+
if n_muts_not_on_interface > n_muts_warn:
|
330 |
+
muts_not_on_interface += f'... (and {n_muts_not_on_interface - n_muts_warn} more)'
|
331 |
+
gr.Warning((
|
332 |
+
f"{muts_not_on_interface} {'is' if n_muts_not_on_interface == 1 else 'are'} not on the interface. "
|
333 |
+
f"The model will predict the effect{'s' if n_muts_not_on_interface > 1 else ''} of "
|
334 |
+
f"mutation{'s' if n_muts_not_on_interface > 1 else ''} on the whole complex. "
|
335 |
+
f"This may lead to less accurate predictions."
|
336 |
+
))
|
337 |
+
|
338 |
+
logging.info('Inputs processed')
|
339 |
+
|
340 |
+
# Predict using interface for mutations on the interface and using the whole complex otherwise
|
341 |
+
attn_ppi, attn_pdb = None, None
|
342 |
+
for df_sub, path in [
|
343 |
+
[df[df['10A Interface']], ppi_path],
|
344 |
+
[df[~df['10A Interface']], pdb_path]
|
345 |
+
]:
|
346 |
+
if not len(df_sub):
|
347 |
+
continue
|
348 |
+
|
349 |
+
# Predict
|
350 |
+
try:
|
351 |
+
ddg, attn = predict_ddg(models, path, df_sub['Mutation'].tolist(), return_attn=True)
|
352 |
+
except Exception as e:
|
353 |
+
print(f"Prediction failed. {str(e)}")
|
354 |
+
raise gr.Error(f"Prediction failed. {str(e)}")
|
355 |
+
ddg = ddg.detach().numpy().tolist()
|
356 |
+
|
357 |
+
logging.info(f'Predictions made for {path}')
|
358 |
+
|
359 |
+
# Update dataframe and attention tensor
|
360 |
+
idx = df_sub.index
|
361 |
+
df.loc[idx, 'ddG [kcal/mol]'] = ddg
|
362 |
+
df.loc[idx, 'Attn Id'] = np.arange(len(idx))
|
363 |
+
|
364 |
+
if path == ppi_path:
|
365 |
+
attn_ppi = attn
|
366 |
+
else:
|
367 |
+
attn_pdb = attn
|
368 |
+
df['Attn Id'] = df['Attn Id'].astype(int)
|
369 |
+
|
370 |
+
# Round ddG values
|
371 |
+
df['ddG [kcal/mol]'] = df['ddG [kcal/mol]'].round(3)
|
372 |
+
|
373 |
+
# Create PPI-specific dropdown
|
374 |
+
dropdown = gr.Dropdown(
|
375 |
+
df['Mutation'].tolist(), value=df['Mutation'].iloc[0],
|
376 |
+
interactive=True, visible=True, label="Mutation to visualize",
|
377 |
+
)
|
378 |
+
|
379 |
+
# Predefine plot arguments for all dropdown choices
|
380 |
+
dropdown_choices_to_plot_args = {
|
381 |
+
mut: (
|
382 |
+
pdb_path,
|
383 |
+
ppi_path if df[df['Mutation'] == mut]['10A Interface'].iloc[0] else pdb_path,
|
384 |
+
mut,
|
385 |
+
attn_ppi if df[df['Mutation'] == mut]['10A Interface'].iloc[0] else attn_pdb,
|
386 |
+
df[df['Mutation'] == mut]['Attn Id'].iloc[0]
|
387 |
+
)
|
388 |
+
for mut in df['Mutation']
|
389 |
+
}
|
390 |
+
|
391 |
+
# Create dataframe file
|
392 |
+
path = 'ppiformer_ddg_predictions.csv'
|
393 |
+
if n_muts_not_on_interface:
|
394 |
+
df = df[['Mutation', 'ddG [kcal/mol]', '10A Interface']]
|
395 |
+
df.to_csv(path, index=False)
|
396 |
+
df = gr.Dataframe(
|
397 |
+
value=df,
|
398 |
+
headers=['Mutation', 'ddG [kcal/mol]', '10A Interface'],
|
399 |
+
datatype=['str', 'number', 'bool'],
|
400 |
+
col_count=(3, 'fixed'),
|
401 |
+
)
|
402 |
+
else:
|
403 |
+
df = df[['Mutation', 'ddG [kcal/mol]']]
|
404 |
+
df.to_csv(path, index=False)
|
405 |
+
df = gr.Dataframe(
|
406 |
+
value=df,
|
407 |
+
headers=['Mutation', 'ddG [kcal/mol]'],
|
408 |
+
datatype=['str', 'number'],
|
409 |
+
col_count=(2, 'fixed'),
|
410 |
+
)
|
411 |
+
|
412 |
+
logging.info('Prediction results prepared')
|
413 |
+
|
414 |
+
return df, path, dropdown, dropdown_choices_to_plot_args
|
415 |
+
|
416 |
+
|
417 |
+
def update_plot(dropdown, dropdown_choices_to_plot_args):
|
418 |
+
return plot_3dmol(*dropdown_choices_to_plot_args[dropdown])
|
419 |
+
|
420 |
+
|
421 |
+
app = gr.Blocks(theme=gr.themes.Default(primary_hue="green", secondary_hue="pink"))
|
422 |
+
with app:
|
423 |
+
|
424 |
+
# Input GUI
|
425 |
+
gr.Markdown(value="""
|
426 |
+
# PPIformer Web
|
427 |
+
### Computational Design of Protein-Protein Interactions
|
428 |
+
""")
|
429 |
+
gr.Image("assets/readme-dimer-close-up.png")
|
430 |
+
gr.Markdown(value="""
|
431 |
+
[PPIformer](https://github.com/anton-bushuiev/PPIformer/tree/main) is a state-of-the-art predictor of the effects of mutations
|
432 |
+
on protein-protein interactions (PPIs), as quantified by the binding free energy changes (ddG). PPIformer was shown to successfully
|
433 |
+
identify known favourable mutations of the [staphylokinase thrombolytics](https://pubmed.ncbi.nlm.nih.gov/10942387/)
|
434 |
+
and a [human antibody](https://www.pnas.org/doi/10.1073/pnas.2122954119) against the SARS-CoV-2 spike protein. The model was pre-trained
|
435 |
+
on the [PPIRef](https://github.com/anton-bushuiev/PPIRef)
|
436 |
+
dataset via a coarse-grained structural masked modeling and fine-tuned on the [SKEMPI v2.0](https://life.bsc.es/pid/skempi2) dataset via log odds.
|
437 |
+
Please see more details in [our ICLR 2024 paper](https://arxiv.org/abs/2310.18515).
|
438 |
+
|
439 |
+
**Inputs.** To use PPIformer on your data, please specify the PPI structure (PDB code or .pdb file), interacting proteins of interest
|
440 |
+
(chain codes in the file) and mutations (semicolon-separated list or file with mutations in the
|
441 |
+
[standard format](https://foldxsuite.crg.eu/parameter/mutant-file): wild-type residue, chain, residue number, mutant residue).
|
442 |
+
For inspiration, you can use one of the examples below: click on one of the rows to pre-fill the inputs. After specifying the inputs,
|
443 |
+
press the button to predict the effects of mutations on the PPI. Currently the model runs on CPU, so the predictions may take a few minutes.
|
444 |
+
|
445 |
+
**Outputs.** After making a prediction with the model, you will see binding free energy changes for each mutation (ddG values in kcal/mol).
|
446 |
+
A more negative value indicates an improvement in affinity, whereas a more positive value means a reduction in affinity.
|
447 |
+
Below you will also see a 3D visualization of the PPI with wild types of mutated residues highlighted in red. The visualization additionally shows
|
448 |
+
the attention coefficients of the model for the nearest neighboring residues, which quantifies the contribution of the residues
|
449 |
+
to the predicted ddG value. The brighter and thicker a residue is, the more attention the model paid to it.
|
450 |
+
""")
|
451 |
+
|
452 |
+
with gr.Row(equal_height=True):
|
453 |
+
with gr.Column():
|
454 |
+
gr.Markdown("## PPI structure")
|
455 |
+
with gr.Row(equal_height=True):
|
456 |
+
pdb_code = gr.Textbox(placeholder="1BUI", label="PDB code", info="Protein Data Bank identifier for the structure (https://www.rcsb.org/)")
|
457 |
+
partners = gr.Textbox(placeholder="A,B,C", label="Partners", info="Protein chain identifiers in the PDB file forming the PPI interface (two or more)")
|
458 |
+
pdb_path = gr.File(file_count="single", label="Or .pdb file instead of PDB code (your structure will only be used for this prediction and not stored anywhere)")
|
459 |
+
|
460 |
+
with gr.Column():
|
461 |
+
gr.Markdown("## Mutations")
|
462 |
+
muts = gr.Textbox(placeholder="SC16A;FC47A;SC16A,FC47A", label="List of (multi-point) mutations", info="SC16A;FC47A;SC16A,FC47A for three mutations: serine to alanine at position 16 in chain C, phenylalanine to alanine at position 47 in chain C, and their double-point combination")
|
463 |
+
muts_path = gr.File(file_count="single", label="Or file with mutations")
|
464 |
+
|
465 |
+
examples = gr.Examples(
|
466 |
+
examples=[
|
467 |
+
["1BUI", "A,B,C", "SC16A,FC47A;SC16A;FC47A"],
|
468 |
+
["3QIB", "A,B,P,C,D", "YP7F,TP12S;YP7F;TP12S"],
|
469 |
+
["1KNE", "A,P", ';'.join([f"TP6{a}" for a in AMINO_ACID_CODES_1])]
|
470 |
+
],
|
471 |
+
inputs=[pdb_code, partners, muts],
|
472 |
+
label="Examples (click on a line to pre-fill the inputs)",
|
473 |
+
cache_examples=False
|
474 |
+
)
|
475 |
+
|
476 |
+
# Predict GUI
|
477 |
+
predict_button = gr.Button(value="Predict effects of mutations on PPI", variant="primary")
|
478 |
+
|
479 |
+
# Output GUI
|
480 |
+
gr.Markdown("## Predictions")
|
481 |
+
df_file = gr.File(label="Download predictions as .csv", interactive=False, visible=True)
|
482 |
+
df = gr.Dataframe(
|
483 |
+
headers=["Mutation", "ddG [kcal/mol]"],
|
484 |
+
datatype=["str", "number"],
|
485 |
+
col_count=(2, "fixed"),
|
486 |
+
)
|
487 |
+
dropdown = gr.Dropdown(interactive=True, visible=False)
|
488 |
+
dropdown_choices_to_plot_args = gr.State([])
|
489 |
+
plot = gr.HTML()
|
490 |
+
|
491 |
+
# Bottom info box
|
492 |
+
gr.Markdown(value="""
|
493 |
+
<br/>
|
494 |
+
|
495 |
+
## About this web
|
496 |
+
|
497 |
+
**Use cases**. The predictor can be used in: (i) Drug Discovery for the development of novel drugs and vaccines for various diseases such as cancer,
|
498 |
+
neurodegenerative disorders, and infectious diseases, (ii) Biotechnological Applications to develop new biocatalysts for biofuels,
|
499 |
+
industrial chemicals, and pharmaceuticals (iii) Therapeutic Protein Design to develop therapeutic proteins with enhanced stability,
|
500 |
+
specificity, and efficacy, and (iv) Mechanistic Studies to gain insights into fundamental biological processes, such as signal transduction,
|
501 |
+
gene regulation, and immune response.
|
502 |
+
|
503 |
+
**Acknowledgement**. Please, use the following citation to acknowledge the use of our service. The web server is provided free of charge for non-commercial use.
|
504 |
+
> Bushuiev, Anton, Roman Bushuiev, Petr Kouba, Anatolii Filkin, Marketa Gabrielova, Michal Gabriel, Jiri Sedlar, Tomas Pluskal, Jiri Damborsky, Stanislav Mazurenko, Josef Sivic.
|
505 |
+
> "Learning to design protein-protein interactions with enhanced generalization". The Twelfth International Conference on Learning Representations (ICLR 2024).
|
506 |
+
> [https://arxiv.org/abs/2310.18515](https://arxiv.org/abs/2310.18515).
|
507 |
+
|
508 |
+
**Contact**. Please share your feedback or report any bugs through [GitHub Issues](https://github.com/anton-bushuiev/PPIformer/issues/new), or feel free to contact us directly at [anton.bushuiev@cvut.cz](mailto:anton.bushuiev@cvut.cz).
|
509 |
+
""")
|
510 |
+
gr.Image("assets/logos.png")
|
511 |
+
|
512 |
+
# Download weights from Zenodo
|
513 |
+
download_from_zenodo('weights.zip')
|
514 |
+
|
515 |
+
# Set device
|
516 |
+
device = 'cuda' if torch.cuda.is_available() else 'cpu'
|
517 |
+
print(f"[INFO] Device on start: {device}")
|
518 |
+
|
519 |
+
# Load models
|
520 |
+
models = [
|
521 |
+
DDGPPIformer.load_from_checkpoint(
|
522 |
+
PPIFORMER_WEIGHTS_DIR / f'ddg_regression/{i}.ckpt',
|
523 |
+
map_location=torch.device('cpu')
|
524 |
+
).eval()
|
525 |
+
for i in range(3)
|
526 |
+
]
|
527 |
+
models = [model.to(device) for model in models]
|
528 |
+
|
529 |
+
# Create temporary directory for storing downloaded PDBs and extracted PPIs
|
530 |
+
temp_dir_obj = tempfile.TemporaryDirectory()
|
531 |
+
temp_dir = Path(temp_dir_obj.name)
|
532 |
+
|
533 |
+
# Main logic
|
534 |
+
inputs = [pdb_code, pdb_path, partners, muts, muts_path]
|
535 |
+
outputs = [df, df_file, dropdown, dropdown_choices_to_plot_args]
|
536 |
+
predict = partial(predict, models, temp_dir)
|
537 |
+
predict_button.click(predict, inputs=inputs, outputs=outputs)
|
538 |
+
|
539 |
+
# Update plot on dropdown change
|
540 |
+
dropdown.change(update_plot, inputs=[dropdown, dropdown_choices_to_plot_args], outputs=[plot])
|
541 |
+
|
542 |
+
app.launch(allowed_paths=['./assets'])
|
assets/logos.png
ADDED
assets/readme-dimer-close-up.png
ADDED
requirements.txt
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
ppiformer @ git+https://github.com/anton-bushuiev/ppiformer.git@main
|
2 |
+
# gradio==3.50.2
|
3 |
+
# spaces
|
4 |
+
# typing_extensions==4.7.1
|
5 |
+
# gradio[oauth]==5.3.0
|
6 |
+
# uvicorn>=0.14.0
|
7 |
+
# spaces==0.30.4
|