jannisborn commited on
Commit
e83e5dc
1 Parent(s): 5984d9a
README.md CHANGED
@@ -1,5 +1,5 @@
1
  ---
2
- title: GT4SD - PaccMannRL
3
  emoji: 💡
4
  colorFrom: green
5
  colorTo: blue
 
1
  ---
2
+ title: GT4SD - PaccMannGP
3
  emoji: 💡
4
  colorFrom: green
5
  colorTo: blue
app.py CHANGED
@@ -3,14 +3,15 @@ import pathlib
3
  from typing import List
4
 
5
  import gradio as gr
6
- import numpy as np
7
  import pandas as pd
8
- from gt4sd.algorithms.conditional_generation.paccmann_rl import (
9
- PaccMannRL,
10
- PaccMannRLOmicBasedGenerator,
11
- PaccMannRLProteinBasedGenerator,
12
  )
13
- from gt4sd.algorithms.generation.paccmann_vae import PaccMannVAE, PaccMannVAEGenerator
 
 
 
14
  from gt4sd.algorithms.registry import ApplicationsRegistry
15
 
16
  from utils import draw_grid_generate
@@ -19,53 +20,57 @@ logger = logging.getLogger(__name__)
19
  logger.addHandler(logging.NullHandler())
20
 
21
 
 
 
 
22
  def run_inference(
23
  algorithm_version: str,
24
- inference_type: str,
25
  protein_target: str,
26
- omics_target: str,
27
  temperature: float,
28
  length: float,
29
  number_of_samples: int,
 
 
 
 
 
 
 
 
30
  ):
31
- if inference_type == "Unbiased":
32
- algorithm_class = PaccMannVAEGenerator
33
- model_class = PaccMannVAE
34
- target = None
35
- elif inference_type == "Conditional":
36
- if "Protein" in algorithm_version:
37
- algorithm_class = PaccMannRLProteinBasedGenerator
38
- target = protein_target
39
- elif "Omic" in algorithm_version:
40
- algorithm_class = PaccMannRLOmicBasedGenerator
41
- try:
42
- test_target = [float(x) for x in omics_target.split(" ")]
43
- except Exception:
44
- raise ValueError(
45
- f"Expected 2128 space-separated omics values, got {omics_target}"
46
- )
47
- if len(test_target) != 2128:
48
- raise ValueError(
49
- f"Expected 2128 omics values, got {len(target)}: {target}"
50
- )
51
- target = f"[{omics_target.replace(' ', ',')}]"
52
- else:
53
- raise ValueError(f"Unknown algorithm version {algorithm_version}")
54
- model_class = PaccMannRL
55
- else:
56
- raise ValueError(f"Unknown inference type {inference_type}")
57
 
58
- config = algorithm_class(
59
- algorithm_version.split("_")[-1],
 
60
  temperature=temperature,
61
  generated_length=length,
 
 
 
 
 
 
 
 
 
 
62
  )
63
- print("Target is ", target)
64
- print(type(target), len(target))
65
- model = model_class(config, target=target)
 
 
 
 
66
  samples = list(model.sample(number_of_samples))
67
 
68
- return draw_grid_generate(samples=samples, n_cols=5)
 
 
 
 
 
69
 
70
 
71
  if __name__ == "__main__":
@@ -73,18 +78,17 @@ if __name__ == "__main__":
73
  # Preparation (retrieve all available algorithms)
74
  all_algos = ApplicationsRegistry.list_available()
75
  algos = [
76
- x["algorithm_application"].split("Based")[0].split("PaccMannRL")[-1]
77
- + "_"
78
- + x["algorithm_version"]
79
  for x in list(filter(lambda x: "PaccMannRL" in x["algorithm_name"], all_algos))
80
  ]
81
 
82
  # Load metadata
83
  metadata_root = pathlib.Path(__file__).parent.joinpath("model_cards")
84
 
85
- examples = pd.read_csv(metadata_root.joinpath("examples.csv"), header=None).fillna(
86
- ""
87
- )
 
88
 
89
  with open(metadata_root.joinpath("article.md"), "r") as f:
90
  article = f.read()
@@ -93,24 +97,20 @@ if __name__ == "__main__":
93
 
94
  demo = gr.Interface(
95
  fn=run_inference,
96
- title="PaccMannRL",
97
  inputs=[
98
- gr.Dropdown(algos, label="Algorithm version", value="Protein_v0"),
99
- gr.Radio(
100
- choices=["Conditional", "Unbiased"],
101
- label="Inference type",
102
- value="Conditional",
 
103
  ),
104
  gr.Textbox(
105
  label="Protein target",
106
  placeholder="MVLSPADKTNVKAAWGKVGAHAGEYGAEALERMFLSFPTT",
107
  lines=1,
108
  ),
109
- gr.Textbox(
110
- label="Gene expression target",
111
- placeholder=f"{' '.join(map(str, np.round(np.random.rand(2128), 2)))}",
112
- lines=1,
113
- ),
114
  gr.Slider(minimum=0.5, maximum=2, value=1, label="Decoding temperature"),
115
  gr.Slider(
116
  minimum=5,
@@ -122,6 +122,34 @@ if __name__ == "__main__":
122
  gr.Slider(
123
  minimum=1, maximum=50, value=10, label="Number of samples", step=1
124
  ),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
125
  ],
126
  outputs=gr.HTML(label="Output"),
127
  article=article,
 
3
  from typing import List
4
 
5
  import gradio as gr
 
6
  import pandas as pd
7
+ from gt4sd.algorithms.controlled_sampling.paccmann_gp import (
8
+ PaccMannGPGenerator,
9
+ PaccMannGP,
 
10
  )
11
+ from gt4sd.algorithms.controlled_sampling.paccmann_gp.implementation import (
12
+ MINIMIZATION_FUNCTIONS,
13
+ )
14
+
15
  from gt4sd.algorithms.registry import ApplicationsRegistry
16
 
17
  from utils import draw_grid_generate
 
20
  logger.addHandler(logging.NullHandler())
21
 
22
 
23
+ MINIMIZATION_FUNCTIONS.pop("callable", None)
24
+
25
+
26
  def run_inference(
27
  algorithm_version: str,
28
+ targets: List[str],
29
  protein_target: str,
 
30
  temperature: float,
31
  length: float,
32
  number_of_samples: int,
33
+ limit: int,
34
+ number_of_steps: int,
35
+ number_of_initial_points: int,
36
+ number_of_optimization_rounds: int,
37
+ sampling_variance: float,
38
+ samples_for_evaluation: int,
39
+ maximum_number_of_sampling_steps: int,
40
+ seed: int,
41
  ):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
 
43
+ config = PaccMannGPGenerator(
44
+ algorithm_version=algorithm_version.split("_")[-1],
45
+ batch_size=32,
46
  temperature=temperature,
47
  generated_length=length,
48
+ limit=limit,
49
+ acquisition_function="EI",
50
+ number_of_steps=number_of_steps,
51
+ number_of_initial_points=number_of_initial_points,
52
+ initial_point_generator="random",
53
+ number_of_optimization_rounds=number_of_optimization_rounds,
54
+ sampling_variance=sampling_variance,
55
+ samples_for_evaluation=samples_for_evaluation,
56
+ maximum_number_of_sampling_steps=maximum_number_of_sampling_steps,
57
+ seed=seed,
58
  )
59
+ target = {i: {} for i in targets}
60
+ if "affinity" in targets:
61
+ target["affinity"]["protein"] = protein_target
62
+ else:
63
+ protein_target = ""
64
+
65
+ model = PaccMannGP(config, target=target)
66
  samples = list(model.sample(number_of_samples))
67
 
68
+ return draw_grid_generate(
69
+ samples=samples,
70
+ n_cols=5,
71
+ properties=set(target.keys()),
72
+ protein_target=protein_target,
73
+ )
74
 
75
 
76
  if __name__ == "__main__":
 
78
  # Preparation (retrieve all available algorithms)
79
  all_algos = ApplicationsRegistry.list_available()
80
  algos = [
81
+ x["algorithm_version"]
 
 
82
  for x in list(filter(lambda x: "PaccMannRL" in x["algorithm_name"], all_algos))
83
  ]
84
 
85
  # Load metadata
86
  metadata_root = pathlib.Path(__file__).parent.joinpath("model_cards")
87
 
88
+ examples = pd.read_csv(
89
+ metadata_root.joinpath("examples.csv"), header=None, sep="|"
90
+ ).fillna("")
91
+ examples[1] = examples[1].apply(eval)
92
 
93
  with open(metadata_root.joinpath("article.md"), "r") as f:
94
  article = f.read()
 
97
 
98
  demo = gr.Interface(
99
  fn=run_inference,
100
+ title="PaccMannGP",
101
  inputs=[
102
+ gr.Dropdown(algos, label="Algorithm version", value="v0"),
103
+ gr.CheckboxGroup(
104
+ choices=list(MINIMIZATION_FUNCTIONS.keys()),
105
+ value=["qed"],
106
+ multiselect=True,
107
+ label="Property goals",
108
  ),
109
  gr.Textbox(
110
  label="Protein target",
111
  placeholder="MVLSPADKTNVKAAWGKVGAHAGEYGAEALERMFLSFPTT",
112
  lines=1,
113
  ),
 
 
 
 
 
114
  gr.Slider(minimum=0.5, maximum=2, value=1, label="Decoding temperature"),
115
  gr.Slider(
116
  minimum=5,
 
122
  gr.Slider(
123
  minimum=1, maximum=50, value=10, label="Number of samples", step=1
124
  ),
125
+ gr.Slider(minimum=1, maximum=8, value=4.0, label="Limit"),
126
+ gr.Slider(minimum=1, maximum=32, value=8, label="Number of steps", step=1),
127
+ gr.Slider(
128
+ minimum=1, maximum=32, value=4, label="Number of initial points", step=1
129
+ ),
130
+ gr.Slider(
131
+ minimum=1,
132
+ maximum=4,
133
+ value=1,
134
+ label="Number of optimization rounds",
135
+ step=1,
136
+ ),
137
+ gr.Slider(minimum=0.01, maximum=1, value=0.1, label="Sampling variance"),
138
+ gr.Slider(
139
+ minimum=1,
140
+ maximum=10,
141
+ value=1,
142
+ label="Samples used for evaluation",
143
+ step=1,
144
+ ),
145
+ gr.Slider(
146
+ minimum=1,
147
+ maximum=64,
148
+ value=4,
149
+ label="Maximum number of sampling steps",
150
+ step=1,
151
+ ),
152
+ gr.Number(value=42, label="Seed", precision=0),
153
  ],
154
  outputs=gr.HTML(label="Output"),
155
  article=article,
model_cards/article.md CHANGED
@@ -1,12 +1,10 @@
1
  # Model documentation & parameters
2
 
3
- **Algorithm Version**: Which model version (either protein-target-driven or gene-expression-profile-driven) to use and which checkpoint to rely on.
4
 
5
- **Inference type**: Whether the model should be conditioned on the target (default) or whether the model is used in an `Unbiased` manner.
6
 
7
- **Protein target**: An AAS of a protein target used for conditioning. Only use if `Inference type` is `Conditional` and if the `Algorithm version` is a Protein model.
8
-
9
- **Gene expression target**: A list of 2128 floats, representing the embedding of gene expression profile to be used for conditioning. Only use if `Inference type` is `Conditional` and if the `Algorithm version` is a Omic model.
10
 
11
  **Decoding temperature**: The temperature parameter in the SMILES/SELFIES decoder. Higher values lead to more explorative choices, smaller values culminate in mode collapse.
12
 
@@ -14,30 +12,43 @@
14
 
15
  **Number of samples**: How many samples should be generated (between 1 and 50).
16
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
 
18
- # Model card -- PaccMannRL
19
 
20
- **Model Details**: PaccMannRL is a language model for conditional molecular design. It consists of a domain-specific encoder (for protein targets or gene expression profiles) and a generic molecular decoder. Both components are finetuned together using RL to convert the context representation into a molecule with high affinity toward the context (i.e., binding affinity to the protein or high inhibitory effect for the cell profile).
 
 
 
 
21
 
22
  **Developers**: Jannis Born, Matteo Manica and colleagues from IBM Research.
23
 
24
  **Distributors**: Original authors' code wrapped and distributed by GT4SD Team (2023) from IBM Research.
25
 
26
- **Model date**: Published in 2021.
27
 
28
- **Model version**: Models trained and distribuetd by the original authors.
29
- - **Protein_v0**: Molecular decoder pretrained on 1.5M molecules from ChEMBL. Protein encoder pretrained on 404k proteins from UniProt. Encoder and decoder finetuned on 41 SARS-CoV-2-related protein targets with a binding affinity predictor trained on BindingDB.
30
- - **Omic_v0**: Molecular decoder pretrained on 1.5M molecules from ChEMBL. Gene expression encoder pretrained on 12k gene expression profiles from TCGA. Encoder and decoder finetuned on a few hundred cancer cell profiles from GDSC with a IC50 predictor trained on GDSC.
31
 
32
- **Model type**: A language-based molecular generative model that can be optimized with RL to generate molecules with high affinity toward a context.
33
 
34
  **Information about training algorithms, parameters, fairness constraints or other applied approaches, and features**:
35
- - **Protein**: Parameters as provided on [(GitHub repo)](https://github.com/PaccMann/paccmann_sarscov2).
36
- - **Omics**: Parameters as provided on [(GitHub repo)](https://github.com/PaccMann/paccmann_rl).
37
 
38
  **Paper or other resource for more information**:
39
- - **Protein**: [PaccMannRL: De novo generation of hit-like anticancer molecules from transcriptomic data via reinforcement learning (2021; *iScience*)](https://www.cell.com/iscience/fulltext/S2589-0042(21)00237-6).
40
- - **Omics**: [Data-driven molecular design for discovery and synthesis of novel ligands: a case study on SARS-CoV-2 (2021; *Machine Learning: Science and Technology*)](https://iopscience.iop.org/article/10.1088/2632-2153/abe808/meta).
41
 
42
  **License**: MIT
43
 
@@ -51,9 +62,9 @@
51
 
52
  **Factors**: Not applicable.
53
 
54
- **Metrics**: High reward on generating molecules with high affinity toward context.
55
 
56
- **Datasets**: ChEMBL, UniProt, GDSC and BindingDB (see above).
57
 
58
  **Ethical Considerations**: Unclear, please consult with original authors in case of questions.
59
 
@@ -62,35 +73,17 @@
62
  Model card prototype inspired by [Mitchell et al. (2019)](https://dl.acm.org/doi/abs/10.1145/3287560.3287596?casa_token=XD4eHiE2cRUAAAAA:NL11gMa1hGPOUKTAbtXnbVQBDBbjxwcjGECF_i-WC_3g1aBgU1Hbz_f2b4kI_m1in-w__1ztGeHnwHs)
63
 
64
  ## Citation
65
-
66
- **Omics**:
67
- ```bib
68
- @article{born2021paccmannrl,
69
- title = {PaccMann\textsuperscript{RL}: De novo generation of hit-like anticancer molecules from transcriptomic data via reinforcement learning},
70
- journal = {iScience},
71
- volume = {24},
72
- number = {4},
73
- pages = {102269},
74
- year = {2021},
75
- issn = {2589-0042},
76
- doi = {https://doi.org/10.1016/j.isci.2021.102269},
77
- url = {https://www.cell.com/iscience/fulltext/S2589-0042(21)00237-6},
78
- author = {Born, Jannis and Manica, Matteo and Oskooei, Ali and Cadow, Joris and Markert, Greta and {Rodr{\'{i}}guez Mart{\'{i}}nez}, Mar{\'{i}}a}
79
- }
80
- ```
81
-
82
- **Proteins**:
83
  ```bib
84
- @article{born2021datadriven,
85
- author = {Born, Jannis and Manica, Matteo and Cadow, Joris and Markert, Greta and Mill, Nil Adell and Filipavicius, Modestas and Janakarajan, Nikita and Cardinale, Antonio and Laino, Teodoro and {Rodr{\'{i}}guez Mart{\'{i}}nez}, Mar{\'{i}}a},
86
- doi = {10.1088/2632-2153/abe808},
87
- issn = {2632-2153},
88
- journal = {Machine Learning: Science and Technology},
89
- number = {2},
90
- pages = {025024},
91
- title = {{Data-driven molecular design for discovery and synthesis of novel ligands: a case study on SARS-CoV-2}},
92
- url = {https://iopscience.iop.org/article/10.1088/2632-2153/abe808},
93
- volume = {2},
94
- year = {2021}
95
  }
96
  ```
 
1
  # Model documentation & parameters
2
 
3
+ **Algorithm Version**: Which model version to use.
4
 
5
+ **Property goals**: One or multiple properties that will be optimized.
6
 
7
+ **Protein target**: An AAS of a protein target used for conditioning. Leave blank unless you use `affinity` as a `property goal`.
 
 
8
 
9
  **Decoding temperature**: The temperature parameter in the SMILES/SELFIES decoder. Higher values lead to more explorative choices, smaller values culminate in mode collapse.
10
 
 
12
 
13
  **Number of samples**: How many samples should be generated (between 1 and 50).
14
 
15
+ **Limit**: Hypercube limits in the latent space.
16
+
17
+ **Number of steps**: Number of steps for a GP optmization round. The longer the slower. Has to be at least `Number of initial points`.
18
+
19
+ **Number of initial points**: Number of initial points evaluated. The longer the slower.
20
+
21
+ **Number of optimization rounds**: Maximum number of optimization rounds.
22
+
23
+ **Sampling variance**: Variance of the Gaussian noise applied during sampling from the optimal point.
24
+
25
+ **Samples for evaluation**: Number of samples averaged for each minimization function evaluation.
26
+
27
+ **Max. sampling steps**: Maximum number of sampling steps in an optmization round.
28
 
29
+ **Seed**: The random seed used for initialization.
30
 
31
+
32
+
33
+ # Model card -- PaccMannGP
34
+
35
+ **Model Details**: [PaccMann<sup>GP</sup>](https://github.com/PaccMann/paccmann_gp) is a language-based Variational Autoencoder that is coupled with a GaussianProcess for controlled sampling. This model systematically explores the latent space of a trained molecular VAE.
36
 
37
  **Developers**: Jannis Born, Matteo Manica and colleagues from IBM Research.
38
 
39
  **Distributors**: Original authors' code wrapped and distributed by GT4SD Team (2023) from IBM Research.
40
 
41
+ **Model date**: Published in 2022.
42
 
43
+ **Model version**: A molecular VAE trained on 1.5M molecules from ChEMBL.
 
 
44
 
45
+ **Model type**: A language-based molecular generative model that can be explored with Gaussian Processes to generate molecules with desired properties.
46
 
47
  **Information about training algorithms, parameters, fairness constraints or other applied approaches, and features**:
48
+ Described in the [original paper](https://pubs.acs.org/doi/10.1021/acs.jcim.1c00889).
 
49
 
50
  **Paper or other resource for more information**:
51
+ [Active Site Sequence Representations of Human Kinases Outperform Full Sequence Representations for Affinity Prediction and Inhibitor Generation: 3D Effects in a 1D Model (2022; *Journal of Chemical Information & Modeling*)](https://pubs.acs.org/doi/10.1021/acs.jcim.1c00889).
 
52
 
53
  **License**: MIT
54
 
 
62
 
63
  **Factors**: Not applicable.
64
 
65
+ **Metrics**: High reward on generating molecules with desired properties.
66
 
67
+ **Datasets**: ChEMBL.
68
 
69
  **Ethical Considerations**: Unclear, please consult with original authors in case of questions.
70
 
 
73
  Model card prototype inspired by [Mitchell et al. (2019)](https://dl.acm.org/doi/abs/10.1145/3287560.3287596?casa_token=XD4eHiE2cRUAAAAA:NL11gMa1hGPOUKTAbtXnbVQBDBbjxwcjGECF_i-WC_3g1aBgU1Hbz_f2b4kI_m1in-w__1ztGeHnwHs)
74
 
75
  ## Citation
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76
  ```bib
77
+ @article{born2022active,
78
+ author = {Born, Jannis and Huynh, Tien and Stroobants, Astrid and Cornell, Wendy D. and Manica, Matteo},
79
+ title = {Active Site Sequence Representations of Human Kinases Outperform Full Sequence Representations for Affinity Prediction and Inhibitor Generation: 3D Effects in a 1D Model},
80
+ journal = {Journal of Chemical Information and Modeling},
81
+ volume = {62},
82
+ number = {2},
83
+ pages = {240-257},
84
+ year = {2022},
85
+ doi = {10.1021/acs.jcim.1c00889},
86
+ note ={PMID: 34905358},
87
+ URL = {https://doi.org/10.1021/acs.jcim.1c00889}
88
  }
89
  ```
model_cards/description.md CHANGED
@@ -1,9 +1,6 @@
1
  <img align="right" src="https://raw.githubusercontent.com/GT4SD/gt4sd-core/main/docs/_static/gt4sd_logo.png" alt="logo" width="120" >
2
 
3
- [PaccMann<sup>RL</sup>](https://github.com/PaccMann/paccmann_rl) is a language-based molecular generative model that can be conditioned (primed) on protein targets or gene expression profiles and produces molecules with high affinity toward the context vector. This model has been developed at IBM Research and is distributed by the **GT4SD** (Generative Toolkit for Scientific Discovery) team. For details please see the two publications:
4
- - [Born et al., (2021), *iScience*](https://www.cell.com/iscience/fulltext/S2589-0042(21)00237-6) for the model conditionable on gene expression profiles.
5
- - [Born et al., (2021), *Machine Learning: Science & Technology*](https://iopscience.iop.org/article/10.1088/2632-2153/abe808/meta) for the model conditionable on protein targets.
6
-
7
 
8
  For **examples** and **documentation** of the model parameters, please see below.
9
  Moreover, we provide a **model card** ([Mitchell et al. (2019)](https://dl.acm.org/doi/abs/10.1145/3287560.3287596?casa_token=XD4eHiE2cRUAAAAA:NL11gMa1hGPOUKTAbtXnbVQBDBbjxwcjGECF_i-WC_3g1aBgU1Hbz_f2b4kI_m1in-w__1ztGeHnwHs)) at the bottom of this page.
 
1
  <img align="right" src="https://raw.githubusercontent.com/GT4SD/gt4sd-core/main/docs/_static/gt4sd_logo.png" alt="logo" width="120" >
2
 
3
+ [PaccMann<sup>GP</sup>](https://github.com/PaccMann/paccmann_gp) is a language-based Variational Autoencoder that is coupled with a GaussianProcess for controlled sampling. For details of the methodology, please see [Born et al., (2022), *Journal of Chemical Information & Modeling*](https://pubs.acs.org/doi/10.1021/acs.jcim.1c00889).
 
 
 
4
 
5
  For **examples** and **documentation** of the model parameters, please see below.
6
  Moreover, we provide a **model card** ([Mitchell et al. (2019)](https://dl.acm.org/doi/abs/10.1145/3287560.3287596?casa_token=XD4eHiE2cRUAAAAA:NL11gMa1hGPOUKTAbtXnbVQBDBbjxwcjGECF_i-WC_3g1aBgU1Hbz_f2b4kI_m1in-w__1ztGeHnwHs)) at the bottom of this page.
model_cards/examples.csv CHANGED
@@ -1,3 +1 @@
1
- Protein_v0,Conditional,MVLSPADKTNVKAAWGKVGAHAGEYGAEALERMFLSFPTT,,1.2,100,10
2
- Protein_v0,Unbiased,,,1.4,250,10
3
- Omic_v0,Conditional,,0.08 0.9 0.47 0.91 0.7 0.88 0.95 0.37 0.72 0.42 0.63 0.77 0.65 0.83 0.48 0.31 0.36 0.33 0.64 0.33 1.0 0.82 0.49 0.98 0.96 0.86 0.1 0.92 0.13 0.41 0.88 0.79 0.88 0.01 0.3 0.98 0.91 0.83 0.06 0.77 0.56 0.87 0.78 0.27 0.97 0.14 0.71 0.1 0.08 0.63 0.53 0.6 0.66 0.04 0.46 0.6 0.59 0.36 0.65 0.57 0.96 0.42 0.37 0.18 0.71 0.5 0.54 0.22 0.21 0.53 0.66 0.9 0.4 0.95 0.48 0.81 0.47 0.27 0.56 0.77 0.32 0.66 0.01 0.82 0.29 0.81 0.7 0.77 0.65 0.36 0.78 0.31 0.85 0.69 0.12 0.04 0.39 0.11 0.13 0.15 0.35 0.97 0.66 0.35 0.78 0.33 0.48 0.8 0.26 0.05 0.69 0.07 0.92 0.22 0.35 0.13 0.22 0.94 0.73 0.81 0.29 0.3 0.13 0.06 0.9 0.62 0.19 0.69 0.72 0.55 0.34 0.26 0.72 0.95 0.81 0.78 0.5 0.47 0.67 0.49 0.48 0.75 0.52 0.91 0.42 0.62 0.8 0.17 1.0 0.35 0.63 0.02 0.79 0.67 0.99 0.86 0.71 0.15 0.13 0.54 0.19 0.81 0.56 0.98 0.16 0.15 0.69 0.17 0.66 0.74 0.65 0.9 0.73 0.61 0.69 0.19 0.04 0.72 0.41 0.35 0.93 0.91 0.34 0.35 0.92 0.45 0.34 0.52 0.73 0.39 0.54 0.83 0.99 0.68 0.16 0.6 0.48 0.18 0.96 0.7 0.18 0.77 0.6 0.07 0.99 0.97 0.41 0.25 0.98 0.85 0.95 0.59 0.77 0.18 0.22 0.39 0.33 0.46 0.07 0.16 0.81 0.0 0.53 0.49 0.9 0.57 0.03 0.26 0.24 0.57 0.63 0.88 0.57 0.73 0.6 0.71 0.29 0.25 0.94 0.23 0.93 0.07 0.35 0.59 0.66 0.51 0.25 0.51 0.47 0.04 0.85 0.15 0.4 0.51 0.0 0.29 0.29 0.07 0.14 0.77 0.1 0.31 0.95 0.52 0.48 0.24 0.71 0.27 0.93 0.77 0.04 0.92 0.08 0.92 0.68 0.32 0.15 0.77 0.63 0.73 0.14 0.83 0.76 0.96 0.72 0.57 0.92 0.35 0.62 0.21 0.46 0.66 0.89 0.52 0.35 0.71 0.0 0.78 0.51 0.34 0.05 0.57 0.34 0.54 0.57 0.81 0.88 0.61 0.53 0.98 0.26 0.34 0.57 0.94 0.09 0.94 0.15 0.81 0.15 0.83 0.83 0.73 0.33 0.69 0.89 0.46 0.96 0.12 0.82 0.89 0.45 0.26 0.84 0.48 0.51 0.43 0.12 0.74 0.32 0.19 0.8 0.04 0.61 0.63 0.23 0.22 0.7 0.14 0.63 0.35 0.89 0.4 0.1 0.1 0.56 0.98 0.7 0.41 0.78 0.14 0.04 0.97 0.32 0.66 0.54 0.66 0.8 0.86 0.36 0.99 0.01 0.41 0.62 0.81 0.14 0.84 0.49 0.3 0.4 0.13 0.2 0.05 0.29 0.11 0.75 0.87 0.71 0.25 0.43 0.67 0.49 0.2 0.77 0.85 0.32 0.94 0.51 0.95 0.54 0.22 0.7 0.97 0.71 0.24 0.88 0.9 0.61 0.99 0.57 0.25 0.01 0.09 0.83 0.83 0.89 0.58 0.95 0.86 0.06 0.88 0.27 0.12 0.7 0.17 0.23 0.43 0.61 0.51 0.65 0.02 0.19 0.61 0.69 0.14 0.89 0.3 0.86 0.55 0.06 0.46 0.78 0.82 0.34 0.63 0.38 0.12 0.15 0.45 0.93 0.08 0.54 0.94 0.64 0.74 0.4 0.23 0.18 0.27 0.44 0.6 0.82 0.19 0.13 0.48 0.19 0.99 0.66 0.69 0.86 0.47 0.15 0.94 0.53 0.07 0.61 0.44 0.62 0.85 0.16 0.66 0.58 0.63 0.55 0.38 0.02 0.68 0.91 0.89 0.63 0.25 0.58 0.93 0.52 0.7 0.64 0.81 0.47 0.21 0.18 0.17 0.78 0.46 0.31 0.2 0.31 0.37 0.66 0.46 0.11 1.0 0.21 0.39 0.12 0.36 0.83 0.52 0.76 0.23 0.62 0.17 0.21 0.07 0.78 0.12 0.59 0.76 0.33 0.49 0.13 0.67 0.44 0.92 0.84 0.18 0.73 0.81 0.68 0.27 0.28 0.14 0.23 0.98 0.07 0.34 0.2 0.78 0.44 0.27 0.7 0.88 0.28 0.96 0.07 0.33 0.65 0.9 0.99 0.75 0.32 0.68 0.54 0.57 0.28 0.57 0.96 0.91 0.0 0.0 0.32 0.66 0.08 0.7 0.14 0.88 0.91 0.85 0.17 0.91 0.31 0.47 0.69 0.41 0.8 0.08 0.59 0.66 0.79 0.82 0.28 0.11 0.05 0.11 0.61 0.66 0.25 0.32 0.53 0.8 0.11 0.5 0.6 0.73 0.31 0.11 0.2 1.0 0.79 0.88 0.77 0.37 0.51 0.25 0.89 0.79 0.8 0.79 0.96 0.45 0.36 0.14 0.64 0.85 0.75 0.23 0.64 0.23 0.64 0.41 0.76 0.78 0.13 0.37 0.48 0.61 0.32 0.58 0.98 0.58 0.27 0.06 0.78 0.05 0.56 0.14 0.57 0.2 0.68 0.61 0.58 0.36 0.39 0.99 0.63 0.12 0.82 0.05 0.54 0.96 0.27 0.2 0.94 0.03 0.55 0.9 0.47 0.61 0.83 0.72 0.9 0.94 0.53 0.11 0.57 0.96 0.64 0.35 0.81 0.72 0.59 0.45 0.85 0.98 0.44 0.08 0.12 0.5 0.17 0.31 0.8 0.49 0.13 0.63 0.83 0.32 0.22 0.13 0.76 0.18 0.4 0.81 0.65 0.02 0.94 0.39 0.0 0.58 0.96 0.93 0.33 0.22 0.12 0.78 0.22 0.65 0.82 0.83 0.79 0.09 0.86 0.55 0.16 0.95 0.76 0.22 0.06 0.21 0.58 0.63 0.31 0.21 0.99 0.19 0.13 0.68 0.33 0.82 0.91 0.42 0.37 0.55 0.66 0.29 0.36 0.75 0.62 1.0 0.71 0.21 0.17 0.73 0.23 0.6 0.99 0.85 0.22 0.58 0.4 0.97 0.46 0.69 0.19 0.78 0.26 0.0 0.74 0.43 0.17 0.05 0.74 0.46 0.23 0.64 0.13 0.47 0.14 0.54 0.48 0.88 0.64 0.23 0.48 0.82 0.81 0.56 0.99 0.07 0.07 0.53 0.74 0.67 0.52 0.66 0.14 0.52 0.46 0.85 0.44 0.05 0.13 0.56 0.38 0.57 0.15 0.84 0.99 0.97 0.0 0.12 0.07 0.79 0.29 0.02 0.54 0.39 0.26 0.28 0.44 0.88 0.62 0.63 0.16 0.67 0.66 0.03 0.97 0.83 0.95 0.84 0.95 0.56 0.67 0.38 0.71 0.16 0.43 0.29 0.34 0.71 0.44 0.63 0.7 0.11 0.72 0.23 0.94 0.02 0.33 0.33 0.92 0.35 0.31 0.17 0.36 0.91 0.75 0.1 0.65 0.83 0.79 0.58 0.43 0.8 0.19 0.64 0.3 0.57 0.01 0.41 0.9 0.46 0.31 0.88 0.19 0.02 0.75 0.07 0.45 0.18 0.25 0.01 0.97 0.75 0.64 0.23 0.34 0.07 0.21 0.22 0.02 0.92 0.02 0.69 0.1 0.86 0.05 0.02 0.81 0.96 0.85 0.13 0.55 0.99 0.49 0.89 0.13 0.52 0.91 0.69 0.97 0.95 0.81 0.12 0.92 0.44 0.89 0.57 0.47 0.47 0.78 0.12 0.26 0.24 0.44 0.74 0.43 0.06 0.32 0.89 0.03 0.64 0.18 0.22 0.25 0.14 0.24 0.72 0.96 0.72 0.96 0.52 0.7 0.66 0.88 0.25 0.91 0.14 0.52 0.7 0.56 0.59 0.43 0.21 0.8 0.67 0.33 0.63 0.55 0.55 0.92 0.16 0.31 0.61 0.29 0.9 0.06 0.69 0.89 0.12 0.58 0.74 0.83 0.8 0.14 0.04 0.69 0.28 0.62 0.77 0.11 0.62 0.18 0.59 0.17 0.58 0.1 0.08 0.61 0.46 0.2 0.6 0.94 0.65 0.1 0.47 0.35 0.51 0.8 0.2 0.06 0.86 1.0 0.73 0.43 0.41 0.88 0.46 0.83 0.5 0.15 0.22 0.85 0.79 0.5 0.67 0.99 0.89 0.75 0.82 0.07 0.45 0.54 0.82 0.34 0.01 0.97 0.41 0.53 0.18 0.56 0.02 0.63 0.64 0.21 0.84 0.25 0.41 0.46 0.73 0.91 0.71 0.16 0.01 0.09 0.95 0.7 0.45 0.86 0.9 0.04 0.98 0.66 0.93 0.58 0.37 0.62 0.73 0.37 0.3 0.71 0.95 0.41 0.79 0.45 0.71 0.57 0.24 0.43 0.07 0.85 0.53 0.57 0.58 0.45 0.82 0.92 0.17 0.23 0.29 0.62 0.03 0.36 0.68 0.5 0.69 0.07 0.07 0.36 0.94 0.06 0.4 0.93 0.48 0.17 0.78 0.66 0.45 0.82 0.93 0.99 0.51 0.19 0.32 0.47 0.69 0.19 0.35 0.19 0.62 0.34 0.52 0.42 0.76 0.05 0.9 0.53 0.59 0.52 0.43 0.73 0.43 0.37 0.09 0.47 0.59 0.78 0.83 0.85 0.21 0.95 0.47 0.87 0.43 0.95 0.18 0.13 0.95 0.79 0.62 0.02 0.79 0.28 0.87 0.71 0.13 0.53 0.02 0.73 0.6 0.13 0.75 0.07 0.02 0.34 0.58 0.55 0.4 0.42 0.46 0.43 0.98 0.86 0.31 0.77 0.64 0.97 0.6 0.91 0.94 0.9 0.34 0.78 0.0 0.49 0.17 0.86 0.47 0.3 0.62 0.33 0.86 0.62 0.65 0.36 0.4 0.08 0.67 0.92 0.76 0.87 0.61 0.41 0.3 0.65 0.25 0.37 0.3 0.57 0.77 0.64 0.1 0.3 0.6 0.52 0.45 0.1 0.02 0.83 0.57 0.41 0.46 0.55 0.41 0.77 0.39 0.03 0.0 0.9 0.42 0.22 0.73 0.48 0.94 0.15 0.14 0.32 0.65 0.6 0.03 0.64 0.15 0.42 0.96 0.41 0.53 0.43 0.3 0.76 0.93 0.32 0.53 0.62 0.31 0.54 0.2 0.66 0.68 0.39 0.01 0.99 0.25 0.71 0.19 0.52 0.93 0.96 0.68 1.0 0.4 0.66 0.64 0.09 0.28 0.47 0.01 0.99 0.36 0.09 0.57 0.79 0.41 0.35 0.3 0.5 0.28 0.71 0.27 0.13 0.06 0.46 0.39 0.37 0.88 0.99 0.3 0.09 0.01 0.98 0.74 0.12 0.01 0.15 0.64 0.68 0.27 0.09 0.89 0.3 0.64 0.34 0.44 0.71 0.01 0.0 0.33 0.12 0.05 0.74 0.81 0.49 0.45 0.94 0.86 0.58 0.56 0.07 0.91 0.54 0.64 0.82 0.17 0.69 0.7 0.99 0.35 0.62 0.6 0.93 0.38 0.32 0.01 0.79 0.62 0.97 0.74 0.71 0.54 0.08 0.01 0.09 0.95 0.53 0.52 0.15 0.18 0.38 0.71 0.57 0.2 0.87 1.0 0.43 0.93 0.49 0.65 0.42 0.29 0.63 0.53 0.34 0.84 0.23 0.38 0.51 0.88 0.07 0.17 0.9 0.13 0.83 0.54 0.54 0.07 0.49 0.83 0.94 0.04 0.79 0.18 0.46 0.51 0.73 0.68 0.04 0.89 0.4 0.16 0.9 0.36 0.73 0.36 0.39 0.42 0.03 0.6 0.85 0.2 0.88 0.64 0.07 0.04 0.58 0.11 0.36 0.19 0.12 0.74 0.54 0.65 0.37 0.31 0.78 0.94 0.02 0.56 0.72 0.18 0.03 0.12 0.3 0.55 0.74 0.22 0.14 0.42 0.23 0.71 0.78 0.66 0.82 0.12 0.83 0.73 0.7 0.22 0.89 0.81 0.34 0.61 0.2 0.68 0.22 0.84 0.03 0.99 0.06 0.23 0.68 0.71 0.41 0.97 0.04 0.78 0.88 0.8 0.72 0.63 0.68 0.94 0.58 0.07 0.53 0.51 0.04 0.45 0.19 0.05 0.23 0.67 0.13 0.41 0.62 0.18 0.01 0.34 0.91 0.88 0.21 0.71 0.47 0.61 0.51 0.65 0.95 0.33 0.0 0.16 0.56 0.21 0.06 0.06 0.06 0.8 0.39 0.83 0.29 0.04 0.74 0.27 0.25 0.35 0.78 0.44 0.23 0.95 0.97 0.89 0.83 0.85 0.41 0.95 0.69 0.09 0.91 0.63 0.96 0.76 0.16 0.75 0.41 0.83 0.63 0.83 0.86 0.82 0.04 0.32 0.3 0.21 0.39 0.48 0.8 0.21 0.4 0.96 0.71 0.63 0.54 0.95 0.81 0.11 0.83 0.63 0.41 0.33 0.32 0.58 0.72 0.82 0.73 0.01 0.5 0.93 0.69 0.91 0.44 0.18 0.28 0.61 0.5 0.98 0.93 0.91 0.72 0.59 0.63 0.03 0.82 0.62 0.07 0.51 0.53 0.89 0.47 0.04 0.08 0.17 0.2 0.88 0.78 0.93 0.71 0.24 0.22 0.32 0.87 0.03 0.01 0.85 0.77 0.82 0.64 0.2 0.83 0.88 0.23 0.44 0.72 0.2 0.98 0.11 0.46 0.59 0.3 0.82 0.01 0.66 0.8 0.91 0.0 0.86 0.84 0.56 0.49 0.22 0.27 0.02 0.62 0.55 0.62 0.79 0.94 0.89 0.56 0.87 0.96 0.43 0.58 0.63 0.22 0.37 0.44 0.85 0.28 0.25 0.4 0.34 0.14 0.8 0.84 0.89 0.06 0.45 0.02 0.07 0.85 0.43 0.13 0.21 0.21 0.05 0.23 0.85 0.44 0.8 0.52 0.39 0.65 0.67 0.64 0.79 0.3 0.01 0.3 0.11 0.02 0.96 0.05 0.44 0.06 0.01 0.77 0.19 0.06 0.31 0.48 0.97 0.64 0.92 0.76 0.07 0.77 0.95 0.98 0.63 0.25 0.27 0.76 0.96 0.24 0.18 0.8 0.0 0.96 0.24 0.52 0.59 0.65 0.17 0.32 0.55 0.59 0.62 0.82 0.59 0.29 0.42 0.12 0.24 0.02 0.66 0.59 0.78 0.37 0.19 0.96 0.18 0.2 0.99 0.76 0.58 0.35 0.54 0.89 0.14 0.58 0.1 0.97 0.38 0.82 0.48 0.06 0.83 1.0 0.99 0.77 0.41 0.08 0.87 0.75 0.13 0.52 0.58 0.68 0.03 0.92 0.55 0.04 0.56 0.63 0.28 0.8 0.39 0.68 0.58 0.01 0.23 0.28 0.98 0.96 0.05 0.28 0.44 0.31 0.91 0.81 0.18 0.65 0.53 0.02 0.41 0.98 0.09 0.12 0.84 0.6 0.17 0.2 0.58 0.35 0.25 0.74 0.83 0.55 0.18 0.8 0.33 0.04 0.56 0.85 0.22 0.83 0.48 0.53 0.54 0.51 0.06 0.76 0.1 0.43 0.21 0.46 0.97 0.48 0.77 0.11 0.36 0.9 0.52 0.06 0.23 0.8 0.09 0.11 0.57 0.59 0.76 0.44 0.15 0.46 0.07 0.86 0.01 0.49 0.05 0.54 0.14 0.29 0.01 0.81 0.45 0.45 0.12 0.82 0.47 0.93 0.51 0.04 0.26 0.14 0.5 0.06 0.25 0.62 0.95 0.07 0.28 0.32 0.03 0.28 0.45 0.86 0.24 0.22 0.78 0.63 0.4 0.33 0.56 0.26 0.41 0.63 0.73 0.73 0.35 0.44 0.67 0.03 0.07 0.68 0.86 0.35 0.58 0.75 0.16 0.37 0.87 0.66 0.59 0.67 0.46 0.64 0.78 0.97 0.45 0.98 0.64 0.41 0.58 0.51 0.97 0.95 0.9 0.34 0.1 0.76 0.37 0.05 0.57 0.72 0.91 0.4 0.43 0.78 0.78 0.39 0.3 0.21 0.88 0.36 0.54 0.87 0.84 0.19 0.22 0.89 0.89 0.85 0.77 0.86 0.46 0.5 0.88 0.18 0.4 0.61 0.07 0.06 0.65 0.05 0.31 0.55 0.87 0.05 0.54 0.28 0.28 0.35 0.1 0.55 0.82 0.86 0.12 0.17 0.69 0.74 0.13 0.08 0.6 0.4 0.97 0.32 0.81 0.14 0.97 0.65 0.72 0.32 0.57 0.69 0.74 0.65 0.75 0.37 0.88 0.97 0.88 0.7 0.98 0.36 0.1 0.35 0.15 0.23 0.09 0.3 1.0 0.21 0.99 0.44 0.23 0.21 0.15 0.43 0.77 0.17 0.32 0.55 0.8 0.08 0.72 0.49 0.31 0.39 0.48 0.29 0.78 0.64 0.04 0.11 0.69 0.76 0.9 0.79 0.32 0.03 0.68 0.67 0.35 0.55 0.01 0.03 0.22 0.31 0.3 0.28 0.14 0.01 0.73 0.86 0.67 0.06 0.45 0.32 0.78 0.22 0.84 0.19 0.29 0.8 0.61 0.23 0.71 0.94 0.04 0.86 0.87 0.88 0.65 0.04 0.93 0.1 0.73 0.38 0.88 0.8 0.54 0.62 0.2 0.76 0.66 0.46 0.0 0.32 0.38 0.92 0.85 0.84 0.9 0.85 0.08 0.32 0.98 0.57 0.72 0.48 0.86 0.23 1.0 0.56 0.48 0.13 0.61 0.46 0.38 0.58 0.06 0.95 0.37 0.94 0.11 0.44 0.53 0.26 0.98 0.67 0.28 0.65 0.28 0.48 0.52 0.58 0.01 0.1 0.03 0.29 0.14 0.33 0.5 0.98 0.99 0.68 0.28 0.12 0.6 0.65 0.77 0.69 0.66 0.5 0.76 0.79 0.79 0.64 0.67 0.35 0.78 0.71 0.47 0.5 0.79 0.69 0.13 0.18 0.89 0.29 0.79 0.92 0.54,1.2,100,10
 
1
+ v0|["qed"]||1.2|100|10|4|8|4|1|0.1|3|4|42
 
 
utils.py CHANGED
@@ -1,6 +1,9 @@
1
  import logging
2
  from collections import defaultdict
3
- from typing import List
 
 
 
4
 
5
  import mols2grid
6
  import pandas as pd
@@ -9,9 +12,23 @@ logger = logging.getLogger(__name__)
9
  logger.addHandler(logging.NullHandler())
10
 
11
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  def draw_grid_generate(
13
  samples: List[str],
14
- seeds: List[str] = [],
 
15
  n_cols: int = 3,
16
  size=(140, 200),
17
  ) -> str:
@@ -27,14 +44,22 @@ def draw_grid_generate(
27
  HTML to display
28
  """
29
 
 
 
 
30
  result = defaultdict(list)
31
  result.update(
32
- {
33
- "SMILES": seeds + samples,
34
- "Name": [f"Seed_{i}" for i in range(len(seeds))]
35
- + [f"Generated_{i}" for i in range(len(samples))],
36
- },
37
  )
 
 
 
 
 
 
 
 
 
38
 
39
  result_df = pd.DataFrame(result)
40
  obj = mols2grid.display(
 
1
  import logging
2
  from collections import defaultdict
3
+ from typing import List, Callable
4
+ from gt4sd.properties import PropertyPredictorRegistry
5
+ from gt4sd.algorithms.prediction.paccmann.core import PaccMann, AffinityPredictor
6
+
7
 
8
  import mols2grid
9
  import pandas as pd
 
12
  logger.addHandler(logging.NullHandler())
13
 
14
 
15
+ def get_affinity_function(target: str) -> Callable:
16
+ return lambda mols: PaccMann(
17
+ AffinityPredictor(protein_targets=[target] * len(mols), ligands=mols)
18
+ ).sample(len(mols))
19
+
20
+
21
+ EVAL_DICT = {
22
+ "qed": PropertyPredictorRegistry.get_property_predictor("qed"),
23
+ "sas": PropertyPredictorRegistry.get_property_predictor("sas"),
24
+ "molwt": PropertyPredictorRegistry.get_property_predictor("molecular_weight"),
25
+ }
26
+
27
+
28
  def draw_grid_generate(
29
  samples: List[str],
30
+ properties: List[str],
31
+ protein_target: str,
32
  n_cols: int = 3,
33
  size=(140, 200),
34
  ) -> str:
 
44
  HTML to display
45
  """
46
 
47
+ if protein_target != "":
48
+ EVAL_DICT.update({"affinity": get_affinity_function(protein_target)})
49
+
50
  result = defaultdict(list)
51
  result.update(
52
+ {"SMILES": samples, "Name": [f"Generated_{i}" for i in range(len(samples))]},
 
 
 
 
53
  )
54
+ if "affinity" in properties:
55
+ properties.remove("affinity")
56
+ vals = EVAL_DICT["affinity"](samples)
57
+ result["affinity"] = vals
58
+ # Fill properties
59
+ for sample in samples:
60
+ for prop in properties:
61
+ value = EVAL_DICT[prop](sample)
62
+ result[prop].append(f"{prop} = {value}")
63
 
64
  result_df = pd.DataFrame(result)
65
  obj = mols2grid.display(