Anton Bushuiev commited on
Commit
29bd8b5
1 Parent(s): 7103bfb

Initial commit

Browse files
Files changed (7) hide show
  1. .gitattributes +0 -35
  2. LICENSE +21 -0
  3. README.md +6 -11
  4. app.py +542 -0
  5. assets/logos.png +0 -0
  6. assets/readme-dimer-close-up.png +0 -0
  7. requirements.txt +7 -0
.gitattributes DELETED
@@ -1,35 +0,0 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
- *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
- *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
- *.pt filter=lfs diff=lfs merge=lfs -text
23
- *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
- *.tflite filter=lfs diff=lfs merge=lfs -text
30
- *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
- *.xz filter=lfs diff=lfs merge=lfs -text
33
- *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2024 Anton Bushuiev
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
README.md CHANGED
@@ -1,14 +1,9 @@
1
  ---
2
- title: PPIformer CPU
3
- emoji: 🐢
4
- colorFrom: blue
5
- colorTo: blue
6
  sdk: gradio
7
- sdk_version: 5.3.0
8
  app_file: app.py
9
- pinned: false
10
- license: mit
11
- short_description: Learning to design protein-protein interactions with enhance
12
- ---
13
-
14
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: PPIformer
3
+ emoji: 🔬
4
+ colorFrom: pink
5
+ colorTo: green
6
  sdk: gradio
 
7
  app_file: app.py
8
+ pinned: true
9
+ ---
 
 
 
 
app.py ADDED
@@ -0,0 +1,542 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # print("""
2
+ # __ __ _ ___ _ _ _____ _____ _ _ _ _ _ ____ _____
3
+ # | \/ | / \ |_ _| \ | |_ _| ____| \ | | / \ | \ | |/ ___| ____|
4
+ # | |\/| | / _ \ | || \| | | | | _| | \| | / _ \ | \| | | | _|
5
+ # | | | |/ ___ \ | || |\ | | | | |___| |\ |/ ___ \| |\ | |___| |___
6
+ # |_| |_/_/ \_\___|_| \_| |_| |_____|_| \_/_/ \_\_| \_|\____|_____|
7
+
8
+ # ____ ____ _____ _ _ __
9
+ # | __ )| _ \| ____| / \ | |/ /
10
+ # | _ \| |_) | _| / _ \ | ' /
11
+ # | |_) | _ <| |___ / ___ \| . \
12
+ # |____/|_| \_\_____/_/ \_\_|\_\
13
+ # """)
14
+ import os
15
+ # os.system("pip uninstall -y gradio")
16
+ # os.system("pip install gradio==3.50.2")
17
+ # os.system("pip uninstall -y spaces")
18
+ # os.system("pip install spaces==0.8")
19
+ os.system("pip uninstall -y torch")
20
+ os.system("pip install torch==2.0.1")
21
+
22
+ import sys
23
+ import copy
24
+ import random
25
+ import tempfile
26
+ import shutil
27
+ import logging
28
+ from pathlib import Path
29
+ from functools import partial
30
+
31
+ import spaces
32
+ import gradio as gr
33
+ import torch
34
+ import numpy as np
35
+ import pandas as pd
36
+ from Bio.PDB.Polypeptide import protein_letters_3to1
37
+ from biopandas.pdb import PandasPdb
38
+ from colour import Color
39
+ from colour import RGB_TO_COLOR_NAMES
40
+
41
+ from mutils.proteins import AMINO_ACID_CODES_1
42
+ from mutils.pdb import download_pdb
43
+ from mutils.mutations import Mutation
44
+ from ppiref.extraction import PPIExtractor
45
+ from ppiref.utils.ppi import PPIPath
46
+ from ppiref.utils.residue import Residue
47
+ from ppiformer.tasks.node import DDGPPIformer
48
+ from ppiformer.utils.api import download_from_zenodo
49
+ from ppiformer.utils.api import predict_ddg as predict_ddg_
50
+ from ppiformer.utils.torch import fill_diagonal
51
+ from ppiformer.definitions import PPIFORMER_WEIGHTS_DIR
52
+
53
+
54
+ import pkg_resources
55
+ import sys
56
+
57
+ def print_package_versions():
58
+ installed_packages = sorted([f"{pkg.key}=={pkg.version}" for pkg in pkg_resources.working_set])
59
+ print("Installed packages and their versions:")
60
+ for package in installed_packages:
61
+ print(package)
62
+
63
+ print("\nPython version:")
64
+ print(sys.version)
65
+
66
+ print_package_versions()
67
+
68
+
69
+ logging.basicConfig(
70
+ level=logging.INFO,
71
+ format='%(asctime)s - %(levelname)s - %(message)s',
72
+ handlers=[logging.StreamHandler(sys.stdout)]
73
+ )
74
+
75
+ random.seed(0)
76
+
77
+
78
+ @spaces.GPU
79
+ def predict_ddg(models, ppi, muts, return_attn):
80
+ device = 'cuda' if torch.cuda.is_available() else 'cpu'
81
+ print(f"[INFO] Device on prediction: {device}")
82
+ models = [model.to(device) for model in models]
83
+ if return_attn:
84
+ ddg_pred, attns = predict_ddg_(models, ppi, muts, return_attn=return_attn)
85
+ return ddg_pred.detach().cpu(), attns.detach().cpu()
86
+ else:
87
+ ddg_pred = predict_ddg_(models, ppi, muts, return_attn=return_attn)
88
+ return ddg_pred.detach().cpu()
89
+
90
+
91
+ def process_inputs(inputs, temp_dir):
92
+ pdb_code, pdb_path, partners, muts, muts_path = inputs
93
+
94
+ # Check inputs
95
+ if not pdb_code and not pdb_path:
96
+ raise gr.Error("PPI structure not specified.")
97
+
98
+ if pdb_code and pdb_path:
99
+ gr.Warning("Both PDB code and PDB file specified. Using PDB file.")
100
+
101
+ if not partners:
102
+ raise gr.Error("Partners not specified.")
103
+
104
+ if not muts and not muts_path:
105
+ raise gr.Error("Mutations not specified.")
106
+
107
+ if muts and muts_path:
108
+ gr.Warning("Both mutations and mutations file specified. Using mutations file.")
109
+
110
+ # Prepare PDB input
111
+ if pdb_path:
112
+ # convert file name to PPIRef format
113
+ new_pdb_path = temp_dir / f"pdb/{pdb_path.name.replace('_', '-')}"
114
+ new_pdb_path.parent.mkdir(parents=True, exist_ok=True)
115
+ shutil.copy(str(pdb_path), str(new_pdb_path))
116
+ pdb_path = new_pdb_path
117
+ pdb_path = Path(pdb_path)
118
+ else:
119
+ try:
120
+ pdb_code = pdb_code.strip().lower()
121
+ pdb_path = temp_dir / f'pdb/{pdb_code}.pdb'
122
+ download_pdb(pdb_code, path=pdb_path)
123
+ except:
124
+ raise gr.Error("PDB download failed.")
125
+
126
+ # Parse partners
127
+ partners = list(map(lambda x: x.strip(), partners.split(',')))
128
+
129
+ # Add partners to file name
130
+ pdb_path = pdb_path.rename(pdb_path.with_stem(f"{pdb_path.stem}-{'-'.join(partners)}"))
131
+
132
+ # Extract PPI into temp dir
133
+ try:
134
+ ppi_dir = temp_dir / 'ppi'
135
+ extractor = PPIExtractor(out_dir=ppi_dir, nest_out_dir=True, join=True, radius=10.0)
136
+ extractor.extract(pdb_path, partners=partners)
137
+ ppi_path = PPIPath.construct(ppi_dir, pdb_path.stem, partners)
138
+ except:
139
+ raise gr.Error("PPI extraction failed.")
140
+
141
+ # Prepare mutations input
142
+ if muts_path:
143
+ muts_path = Path(muts_path)
144
+ muts = muts_path.read_text()
145
+
146
+ # Check mutations
147
+
148
+ # Basic format
149
+ try:
150
+ muts = [Mutation.from_str(m) for m in muts.strip().split(';') if m.strip()]
151
+ except Exception as e:
152
+ raise gr.Error(f'Mutations parsing failed: {e}')
153
+
154
+ # Partners
155
+ for mut in muts:
156
+ for pmut in mut.muts:
157
+ if pmut.chain not in partners:
158
+ raise gr.Error(f'Chain of point mutation {pmut} is not in the list of partners {partners}.')
159
+
160
+ # Consistency with provided .pdb
161
+ muts_on_interface = []
162
+ for mut in muts:
163
+ if mut.wt_in_pdb(ppi_path):
164
+ val = True
165
+ elif mut.wt_in_pdb(pdb_path):
166
+ val = False
167
+ else:
168
+ raise gr.Error(f'Wild-type of mutation {mut} is not in the provided .pdb file.')
169
+ muts_on_interface.append(val)
170
+
171
+ muts = [str(m) for m in muts]
172
+
173
+ return pdb_path, ppi_path, muts, muts_on_interface
174
+
175
+
176
+ def plot_3dmol(pdb_path, ppi_path, mut, attn, attn_mut_id=0):
177
+ # NOTE 3DMol.js adapted from https://huggingface.co/spaces/huhlim/cg2all/blob/main/app.py
178
+
179
+ # Read PDB for 3Dmol.js
180
+ with open(pdb_path, "r") as fp:
181
+ lines = fp.readlines()
182
+ mol = ""
183
+ for l in lines:
184
+ mol += l
185
+ mol = mol.replace("OT1", "O ")
186
+ mol = mol.replace("OT2", "OXT")
187
+
188
+ # Read PPI to customize 3Dmol.js visualization
189
+ ppi_df = PandasPdb().read_pdb(ppi_path).df['ATOM']
190
+ ppi_df = ppi_df.groupby(list(Residue._fields)).apply(lambda df: df[df['atom_name'] == 'CA'].iloc[0]).reset_index(drop=True)
191
+ ppi_df['id'] = ppi_df.apply(lambda row: ':'.join([row['residue_name'], row['chain_id'], str(row['residue_number']), row['insertion']]), axis=1)
192
+ ppi_df['id'] = ppi_df['id'].apply(lambda x: x[:-1] if x[-1] == ':' else x)
193
+ muts_id = Mutation.from_str(mut).wt_to_graphein() # flatten ids of all sp muts
194
+ ppi_df['mutated'] = ppi_df.apply(lambda row: row['id'] in muts_id, axis=1)
195
+
196
+ # Prepare attention coeffictients per residue (normalized sum of direct attention from mutated residues)
197
+ attn = torch.nan_to_num(attn, nan=1e-10)
198
+ attn_sub = attn[:, attn_mut_id, 0, :, 0, :, :, :] # models, layers, heads, tokens, tokens
199
+ idx_mutated = torch.from_numpy(ppi_df.index[ppi_df['mutated']].to_numpy())
200
+ attn_sub = fill_diagonal(attn_sub, 1e-10)
201
+ attn_mutated = attn_sub[..., idx_mutated, :]
202
+ attn_mutated.shape
203
+ attns_per_token = torch.sum(attn_mutated, dim=(0, 1, 2, 3))
204
+ attns_per_token = (attns_per_token - attns_per_token.min()) / (attns_per_token.max() - attns_per_token.min())
205
+ attns_per_token += 1e-10
206
+ ppi_df['attn'] = attns_per_token.numpy()
207
+
208
+ chains = ppi_df.sort_values('attn', ascending=False)['chain_id'].unique()
209
+
210
+ # Customize 3Dmol.js visualization https://3dmol.csb.pitt.edu/doc/
211
+ styles = []
212
+ zoom_atoms = []
213
+
214
+ # Cartoon chains
215
+ preferred_colors = ['LimeGreen', 'HotPink', 'RoyalBlue']
216
+ all_colors = [c[0] for c in RGB_TO_COLOR_NAMES.values()]
217
+ all_colors = [c for c in all_colors if c not in preferred_colors + ['Black', 'White']]
218
+ random.shuffle(all_colors)
219
+ all_colors = preferred_colors + all_colors
220
+ all_colors = [Color(c) for c in all_colors]
221
+ chain_to_color = dict(zip(chains, all_colors))
222
+ for chain in chains:
223
+ styles.append([{"chain": chain}, {"cartoon": {"color": chain_to_color[chain].hex_l, "opacity": 0.6}}])
224
+
225
+ # Stick PPI and atoms for zoom
226
+ # TODO Insertions
227
+ for _, row in ppi_df.iterrows():
228
+ color = copy.deepcopy(chain_to_color[row['chain_id']])
229
+ color.saturation = row['attn']
230
+ color = color.hex_l
231
+ if row['mutated']:
232
+ styles.append([
233
+ {'chain': row['chain_id'], 'resi': str(row['residue_number'])},
234
+ {'stick': {'color': 'red', 'radius': 0.2, 'opacity': 1.0}}
235
+ ])
236
+ zoom_atoms.append(row['atom_number'])
237
+ else:
238
+ styles.append([
239
+ {'chain': row['chain_id'], 'resi': str(row['residue_number'])},
240
+ {'stick': {'color': color, 'radius': row['attn'] / 5, 'opacity': row['attn']}}
241
+ ])
242
+
243
+ # Convert style dicts to JS lines
244
+ styles = ''.join(['viewer.addStyle(' + ', '.join([str(s).replace("'", '"') for s in dcts]) + ');\n' for dcts in styles])
245
+
246
+ # Convert zoom atoms to 3DMol.js selection and add labels for mutated residues
247
+ zoom_animation_duration = 500
248
+ sel = '{\"or\": [' + ', '.join(["{\"serial\": " + str(a) + "}" for a in zoom_atoms]) + ']}'
249
+ zoom = 'viewer.zoomTo(' + sel + ',' + f'{zoom_animation_duration});'
250
+ for atom in zoom_atoms:
251
+ sel = '{\"serial\": ' + str(atom) + '}'
252
+ row = ppi_df[ppi_df['atom_number'] == atom].iloc[0]
253
+ label = protein_letters_3to1[row['residue_name']] + row['chain_id'] + str(row['residue_number']) + row['insertion']
254
+ styles += 'viewer.addLabel(' + f"\"{label}\"," + "{fontSize:16, fontColor:\"red\", backgroundOpacity: 0.0}," + sel + ');\n'
255
+
256
+ # Construct 3Dmol.js visualization script embedded in HTML
257
+ html = (
258
+ """<!DOCTYPE html>
259
+ <html>
260
+ <head>
261
+ <meta http-equiv="content-type" content="text/html; charset=UTF-8" />
262
+ <style>
263
+ body{
264
+ font-family:sans-serif
265
+ }
266
+ .mol-container {
267
+ width: 100%;
268
+ height: 600px;
269
+ position: relative;
270
+ }
271
+ .mol-container select{
272
+ background-image:None;
273
+ }
274
+ </style>
275
+ <script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.6.3/jquery.min.js" integrity="sha512-STof4xm1wgkfm7heWqFJVn58Hm3EtS31XFaagaa8VMReCXAkQnJZ+jEy8PCC/iT18dFy95WcExNHFTqLyp72eQ==" crossorigin="anonymous" referrerpolicy="no-referrer"></script>
276
+ <script src="https://3Dmol.csb.pitt.edu/build/3Dmol-min.js"></script>
277
+ </head>
278
+ <body>
279
+ <div id="container" class="mol-container"></div>
280
+
281
+ <script>
282
+ let pdb = `"""
283
+ + mol
284
+ + """`
285
+
286
+ $(document).ready(function () {
287
+ let element = $("#container");
288
+ let config = { backgroundColor: "white" };
289
+ let viewer = $3Dmol.createViewer(element, config);
290
+ viewer.addModel(pdb, "pdb");
291
+ viewer.setStyle({"model": 0}, {"ray_opaque_background": "off"}, {"stick": {"color": "lightgrey", "opacity": 0.5}});
292
+ """
293
+ + styles
294
+ + zoom
295
+ + """
296
+ viewer.render();
297
+ })
298
+ </script>
299
+ </body></html>"""
300
+ )
301
+
302
+ return f"""<iframe style="width: 100%; height: 600px" name="result" allow="midi; geolocation; microphone; camera;
303
+ display-capture; encrypted-media;" sandbox="allow-modals allow-forms
304
+ allow-scripts allow-same-origin allow-popups
305
+ allow-top-navigation-by-user-activation allow-downloads" allowfullscreen=""
306
+ allowpaymentrequest="" frameborder="0" srcdoc='{html}'></iframe>"""
307
+
308
+
309
+ def predict(models, temp_dir, *inputs):
310
+ logging.info('Starting prediction')
311
+
312
+ # Process input
313
+ pdb_path, ppi_path, muts, muts_on_interface = process_inputs(inputs, temp_dir)
314
+
315
+ # Create dataframe
316
+ df = pd.DataFrame({
317
+ 'Mutation': muts,
318
+ 'ddG [kcal/mol]': len(muts) * [np.nan],
319
+ '10A Interface': muts_on_interface,
320
+ 'Attn Id': len(muts) * [np.nan],
321
+ })
322
+
323
+ # Show warning if some mutations are not on the interface
324
+ muts_not_on_interface = df[~df['10A Interface']]['Mutation'].tolist()
325
+ n_muts_not_on_interface = len(muts_not_on_interface)
326
+ if n_muts_not_on_interface:
327
+ n_muts_warn = 5
328
+ muts_not_on_interface = ';'.join(muts_not_on_interface[:n_muts_warn])
329
+ if n_muts_not_on_interface > n_muts_warn:
330
+ muts_not_on_interface += f'... (and {n_muts_not_on_interface - n_muts_warn} more)'
331
+ gr.Warning((
332
+ f"{muts_not_on_interface} {'is' if n_muts_not_on_interface == 1 else 'are'} not on the interface. "
333
+ f"The model will predict the effect{'s' if n_muts_not_on_interface > 1 else ''} of "
334
+ f"mutation{'s' if n_muts_not_on_interface > 1 else ''} on the whole complex. "
335
+ f"This may lead to less accurate predictions."
336
+ ))
337
+
338
+ logging.info('Inputs processed')
339
+
340
+ # Predict using interface for mutations on the interface and using the whole complex otherwise
341
+ attn_ppi, attn_pdb = None, None
342
+ for df_sub, path in [
343
+ [df[df['10A Interface']], ppi_path],
344
+ [df[~df['10A Interface']], pdb_path]
345
+ ]:
346
+ if not len(df_sub):
347
+ continue
348
+
349
+ # Predict
350
+ try:
351
+ ddg, attn = predict_ddg(models, path, df_sub['Mutation'].tolist(), return_attn=True)
352
+ except Exception as e:
353
+ print(f"Prediction failed. {str(e)}")
354
+ raise gr.Error(f"Prediction failed. {str(e)}")
355
+ ddg = ddg.detach().numpy().tolist()
356
+
357
+ logging.info(f'Predictions made for {path}')
358
+
359
+ # Update dataframe and attention tensor
360
+ idx = df_sub.index
361
+ df.loc[idx, 'ddG [kcal/mol]'] = ddg
362
+ df.loc[idx, 'Attn Id'] = np.arange(len(idx))
363
+
364
+ if path == ppi_path:
365
+ attn_ppi = attn
366
+ else:
367
+ attn_pdb = attn
368
+ df['Attn Id'] = df['Attn Id'].astype(int)
369
+
370
+ # Round ddG values
371
+ df['ddG [kcal/mol]'] = df['ddG [kcal/mol]'].round(3)
372
+
373
+ # Create PPI-specific dropdown
374
+ dropdown = gr.Dropdown(
375
+ df['Mutation'].tolist(), value=df['Mutation'].iloc[0],
376
+ interactive=True, visible=True, label="Mutation to visualize",
377
+ )
378
+
379
+ # Predefine plot arguments for all dropdown choices
380
+ dropdown_choices_to_plot_args = {
381
+ mut: (
382
+ pdb_path,
383
+ ppi_path if df[df['Mutation'] == mut]['10A Interface'].iloc[0] else pdb_path,
384
+ mut,
385
+ attn_ppi if df[df['Mutation'] == mut]['10A Interface'].iloc[0] else attn_pdb,
386
+ df[df['Mutation'] == mut]['Attn Id'].iloc[0]
387
+ )
388
+ for mut in df['Mutation']
389
+ }
390
+
391
+ # Create dataframe file
392
+ path = 'ppiformer_ddg_predictions.csv'
393
+ if n_muts_not_on_interface:
394
+ df = df[['Mutation', 'ddG [kcal/mol]', '10A Interface']]
395
+ df.to_csv(path, index=False)
396
+ df = gr.Dataframe(
397
+ value=df,
398
+ headers=['Mutation', 'ddG [kcal/mol]', '10A Interface'],
399
+ datatype=['str', 'number', 'bool'],
400
+ col_count=(3, 'fixed'),
401
+ )
402
+ else:
403
+ df = df[['Mutation', 'ddG [kcal/mol]']]
404
+ df.to_csv(path, index=False)
405
+ df = gr.Dataframe(
406
+ value=df,
407
+ headers=['Mutation', 'ddG [kcal/mol]'],
408
+ datatype=['str', 'number'],
409
+ col_count=(2, 'fixed'),
410
+ )
411
+
412
+ logging.info('Prediction results prepared')
413
+
414
+ return df, path, dropdown, dropdown_choices_to_plot_args
415
+
416
+
417
+ def update_plot(dropdown, dropdown_choices_to_plot_args):
418
+ return plot_3dmol(*dropdown_choices_to_plot_args[dropdown])
419
+
420
+
421
+ app = gr.Blocks(theme=gr.themes.Default(primary_hue="green", secondary_hue="pink"))
422
+ with app:
423
+
424
+ # Input GUI
425
+ gr.Markdown(value="""
426
+ # PPIformer Web
427
+ ### Computational Design of Protein-Protein Interactions
428
+ """)
429
+ gr.Image("assets/readme-dimer-close-up.png")
430
+ gr.Markdown(value="""
431
+ [PPIformer](https://github.com/anton-bushuiev/PPIformer/tree/main) is a state-of-the-art predictor of the effects of mutations
432
+ on protein-protein interactions (PPIs), as quantified by the binding free energy changes (ddG). PPIformer was shown to successfully
433
+ identify known favourable mutations of the [staphylokinase thrombolytics](https://pubmed.ncbi.nlm.nih.gov/10942387/)
434
+ and a [human antibody](https://www.pnas.org/doi/10.1073/pnas.2122954119) against the SARS-CoV-2 spike protein. The model was pre-trained
435
+ on the [PPIRef](https://github.com/anton-bushuiev/PPIRef)
436
+ dataset via a coarse-grained structural masked modeling and fine-tuned on the [SKEMPI v2.0](https://life.bsc.es/pid/skempi2) dataset via log odds.
437
+ Please see more details in [our ICLR 2024 paper](https://arxiv.org/abs/2310.18515).
438
+
439
+ **Inputs.** To use PPIformer on your data, please specify the PPI structure (PDB code or .pdb file), interacting proteins of interest
440
+ (chain codes in the file) and mutations (semicolon-separated list or file with mutations in the
441
+ [standard format](https://foldxsuite.crg.eu/parameter/mutant-file): wild-type residue, chain, residue number, mutant residue).
442
+ For inspiration, you can use one of the examples below: click on one of the rows to pre-fill the inputs. After specifying the inputs,
443
+ press the button to predict the effects of mutations on the PPI. Currently the model runs on CPU, so the predictions may take a few minutes.
444
+
445
+ **Outputs.** After making a prediction with the model, you will see binding free energy changes for each mutation (ddG values in kcal/mol).
446
+ A more negative value indicates an improvement in affinity, whereas a more positive value means a reduction in affinity.
447
+ Below you will also see a 3D visualization of the PPI with wild types of mutated residues highlighted in red. The visualization additionally shows
448
+ the attention coefficients of the model for the nearest neighboring residues, which quantifies the contribution of the residues
449
+ to the predicted ddG value. The brighter and thicker a residue is, the more attention the model paid to it.
450
+ """)
451
+
452
+ with gr.Row(equal_height=True):
453
+ with gr.Column():
454
+ gr.Markdown("## PPI structure")
455
+ with gr.Row(equal_height=True):
456
+ pdb_code = gr.Textbox(placeholder="1BUI", label="PDB code", info="Protein Data Bank identifier for the structure (https://www.rcsb.org/)")
457
+ partners = gr.Textbox(placeholder="A,B,C", label="Partners", info="Protein chain identifiers in the PDB file forming the PPI interface (two or more)")
458
+ pdb_path = gr.File(file_count="single", label="Or .pdb file instead of PDB code (your structure will only be used for this prediction and not stored anywhere)")
459
+
460
+ with gr.Column():
461
+ gr.Markdown("## Mutations")
462
+ muts = gr.Textbox(placeholder="SC16A;FC47A;SC16A,FC47A", label="List of (multi-point) mutations", info="SC16A;FC47A;SC16A,FC47A for three mutations: serine to alanine at position 16 in chain C, phenylalanine to alanine at position 47 in chain C, and their double-point combination")
463
+ muts_path = gr.File(file_count="single", label="Or file with mutations")
464
+
465
+ examples = gr.Examples(
466
+ examples=[
467
+ ["1BUI", "A,B,C", "SC16A,FC47A;SC16A;FC47A"],
468
+ ["3QIB", "A,B,P,C,D", "YP7F,TP12S;YP7F;TP12S"],
469
+ ["1KNE", "A,P", ';'.join([f"TP6{a}" for a in AMINO_ACID_CODES_1])]
470
+ ],
471
+ inputs=[pdb_code, partners, muts],
472
+ label="Examples (click on a line to pre-fill the inputs)",
473
+ cache_examples=False
474
+ )
475
+
476
+ # Predict GUI
477
+ predict_button = gr.Button(value="Predict effects of mutations on PPI", variant="primary")
478
+
479
+ # Output GUI
480
+ gr.Markdown("## Predictions")
481
+ df_file = gr.File(label="Download predictions as .csv", interactive=False, visible=True)
482
+ df = gr.Dataframe(
483
+ headers=["Mutation", "ddG [kcal/mol]"],
484
+ datatype=["str", "number"],
485
+ col_count=(2, "fixed"),
486
+ )
487
+ dropdown = gr.Dropdown(interactive=True, visible=False)
488
+ dropdown_choices_to_plot_args = gr.State([])
489
+ plot = gr.HTML()
490
+
491
+ # Bottom info box
492
+ gr.Markdown(value="""
493
+ <br/>
494
+
495
+ ## About this web
496
+
497
+ **Use cases**. The predictor can be used in: (i) Drug Discovery for the development of novel drugs and vaccines for various diseases such as cancer,
498
+ neurodegenerative disorders, and infectious diseases, (ii) Biotechnological Applications to develop new biocatalysts for biofuels,
499
+ industrial chemicals, and pharmaceuticals (iii) Therapeutic Protein Design to develop therapeutic proteins with enhanced stability,
500
+ specificity, and efficacy, and (iv) Mechanistic Studies to gain insights into fundamental biological processes, such as signal transduction,
501
+ gene regulation, and immune response.
502
+
503
+ **Acknowledgement**. Please, use the following citation to acknowledge the use of our service. The web server is provided free of charge for non-commercial use.
504
+ > Bushuiev, Anton, Roman Bushuiev, Petr Kouba, Anatolii Filkin, Marketa Gabrielova, Michal Gabriel, Jiri Sedlar, Tomas Pluskal, Jiri Damborsky, Stanislav Mazurenko, Josef Sivic.
505
+ > "Learning to design protein-protein interactions with enhanced generalization". The Twelfth International Conference on Learning Representations (ICLR 2024).
506
+ > [https://arxiv.org/abs/2310.18515](https://arxiv.org/abs/2310.18515).
507
+
508
+ **Contact**. Please share your feedback or report any bugs through [GitHub Issues](https://github.com/anton-bushuiev/PPIformer/issues/new), or feel free to contact us directly at [anton.bushuiev@cvut.cz](mailto:anton.bushuiev@cvut.cz).
509
+ """)
510
+ gr.Image("assets/logos.png")
511
+
512
+ # Download weights from Zenodo
513
+ download_from_zenodo('weights.zip')
514
+
515
+ # Set device
516
+ device = 'cuda' if torch.cuda.is_available() else 'cpu'
517
+ print(f"[INFO] Device on start: {device}")
518
+
519
+ # Load models
520
+ models = [
521
+ DDGPPIformer.load_from_checkpoint(
522
+ PPIFORMER_WEIGHTS_DIR / f'ddg_regression/{i}.ckpt',
523
+ map_location=torch.device('cpu')
524
+ ).eval()
525
+ for i in range(3)
526
+ ]
527
+ models = [model.to(device) for model in models]
528
+
529
+ # Create temporary directory for storing downloaded PDBs and extracted PPIs
530
+ temp_dir_obj = tempfile.TemporaryDirectory()
531
+ temp_dir = Path(temp_dir_obj.name)
532
+
533
+ # Main logic
534
+ inputs = [pdb_code, pdb_path, partners, muts, muts_path]
535
+ outputs = [df, df_file, dropdown, dropdown_choices_to_plot_args]
536
+ predict = partial(predict, models, temp_dir)
537
+ predict_button.click(predict, inputs=inputs, outputs=outputs)
538
+
539
+ # Update plot on dropdown change
540
+ dropdown.change(update_plot, inputs=[dropdown, dropdown_choices_to_plot_args], outputs=[plot])
541
+
542
+ app.launch(allowed_paths=['./assets'])
assets/logos.png ADDED
assets/readme-dimer-close-up.png ADDED
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ ppiformer @ git+https://github.com/anton-bushuiev/ppiformer.git@main
2
+ # gradio==3.50.2
3
+ # spaces
4
+ # typing_extensions==4.7.1
5
+ # gradio[oauth]==5.3.0
6
+ # uvicorn>=0.14.0
7
+ # spaces==0.30.4