paccmann_gp

Sleeping

App Files Files Community

jannisborn commited on Jan 8, 2023

Commit

e83e5dc

•

1 Parent(s): 5984d9a

update

Browse files

Files changed (6) hide show

README.md +1 -1
app.py +85 -57
model_cards/article.md +40 -47
model_cards/description.md +1 -4
model_cards/examples.csv +1 -3
utils.py +32 -7

README.md CHANGED Viewed

@@ -1,5 +1,5 @@
 ---
-title: GT4SD - PaccMannRL
 emoji: 💡
 colorFrom: green
 colorTo: blue

 ---
+title: GT4SD - PaccMannGP
 emoji: 💡
 colorFrom: green
 colorTo: blue

app.py CHANGED Viewed

@@ -3,14 +3,15 @@ import pathlib
 from typing import List
 import gradio as gr
-import numpy as np
 import pandas as pd
-from gt4sd.algorithms.conditional_generation.paccmann_rl import (
-    PaccMannRL,
-    PaccMannRLOmicBasedGenerator,
-    PaccMannRLProteinBasedGenerator,
 )
-from gt4sd.algorithms.generation.paccmann_vae import PaccMannVAE, PaccMannVAEGenerator
 from gt4sd.algorithms.registry import ApplicationsRegistry
 from utils import draw_grid_generate
@@ -19,53 +20,57 @@ logger = logging.getLogger(__name__)
 logger.addHandler(logging.NullHandler())
 def run_inference(
     algorithm_version: str,
-    inference_type: str,
     protein_target: str,
-    omics_target: str,
     temperature: float,
     length: float,
     number_of_samples: int,
 ):
-    if inference_type == "Unbiased":
-        algorithm_class = PaccMannVAEGenerator
-        model_class = PaccMannVAE
-        target = None
-    elif inference_type == "Conditional":
-        if "Protein" in algorithm_version:
-            algorithm_class = PaccMannRLProteinBasedGenerator
-            target = protein_target
-        elif "Omic" in algorithm_version:
-            algorithm_class = PaccMannRLOmicBasedGenerator
-            try:
-                test_target = [float(x) for x in omics_target.split(" ")]
-            except Exception:
-                raise ValueError(
-                    f"Expected 2128 space-separated omics values, got {omics_target}"
-                )
-            if len(test_target) != 2128:
-                raise ValueError(
-                    f"Expected 2128 omics values, got {len(target)}: {target}"
-                )
-            target = f"[{omics_target.replace(' ', ',')}]"
-        else:
-            raise ValueError(f"Unknown algorithm version {algorithm_version}")
-        model_class = PaccMannRL
-    else:
-        raise ValueError(f"Unknown inference type {inference_type}")
-    config = algorithm_class(
-        algorithm_version.split("_")[-1],
         temperature=temperature,
         generated_length=length,
     )
-    print("Target is ", target)
-    print(type(target), len(target))
-    model = model_class(config, target=target)
     samples = list(model.sample(number_of_samples))
-    return draw_grid_generate(samples=samples, n_cols=5)
 if __name__ == "__main__":
@@ -73,18 +78,17 @@ if __name__ == "__main__":
     # Preparation (retrieve all available algorithms)
     all_algos = ApplicationsRegistry.list_available()
     algos = [
-        x["algorithm_application"].split("Based")[0].split("PaccMannRL")[-1]
-        + "_"
-        + x["algorithm_version"]
         for x in list(filter(lambda x: "PaccMannRL" in x["algorithm_name"], all_algos))
     ]
     # Load metadata
     metadata_root = pathlib.Path(__file__).parent.joinpath("model_cards")
-    examples = pd.read_csv(metadata_root.joinpath("examples.csv"), header=None).fillna(
-        ""
-    )
     with open(metadata_root.joinpath("article.md"), "r") as f:
         article = f.read()
@@ -93,24 +97,20 @@ if __name__ == "__main__":
     demo = gr.Interface(
         fn=run_inference,
-        title="PaccMannRL",
         inputs=[
-            gr.Dropdown(algos, label="Algorithm version", value="Protein_v0"),
-            gr.Radio(
-                choices=["Conditional", "Unbiased"],
-                label="Inference type",
-                value="Conditional",
             ),
             gr.Textbox(
                 label="Protein target",
                 placeholder="MVLSPADKTNVKAAWGKVGAHAGEYGAEALERMFLSFPTT",
                 lines=1,
             ),
-            gr.Textbox(
-                label="Gene expression target",
-                placeholder=f"{' '.join(map(str, np.round(np.random.rand(2128), 2)))}",
-                lines=1,
-            ),
             gr.Slider(minimum=0.5, maximum=2, value=1, label="Decoding temperature"),
             gr.Slider(
                 minimum=5,
@@ -122,6 +122,34 @@ if __name__ == "__main__":
             gr.Slider(
                 minimum=1, maximum=50, value=10, label="Number of samples", step=1
             ),
         ],
         outputs=gr.HTML(label="Output"),
         article=article,

 from typing import List
 import gradio as gr
 import pandas as pd
+from gt4sd.algorithms.controlled_sampling.paccmann_gp import (
+    PaccMannGPGenerator,
+    PaccMannGP,
 )
+from gt4sd.algorithms.controlled_sampling.paccmann_gp.implementation import (
+    MINIMIZATION_FUNCTIONS,
+)
 from gt4sd.algorithms.registry import ApplicationsRegistry
 from utils import draw_grid_generate
 logger.addHandler(logging.NullHandler())
+MINIMIZATION_FUNCTIONS.pop("callable", None)
 def run_inference(
     algorithm_version: str,
+    targets: List[str],
     protein_target: str,
     temperature: float,
     length: float,
     number_of_samples: int,
+    limit: int,
+    number_of_steps: int,
+    number_of_initial_points: int,
+    number_of_optimization_rounds: int,
+    sampling_variance: float,
+    samples_for_evaluation: int,
+    maximum_number_of_sampling_steps: int,
+    seed: int,
 ):
+    config = PaccMannGPGenerator(
+        algorithm_version=algorithm_version.split("_")[-1],
+        batch_size=32,
         temperature=temperature,
         generated_length=length,
+        limit=limit,
+        acquisition_function="EI",
+        number_of_steps=number_of_steps,
+        number_of_initial_points=number_of_initial_points,
+        initial_point_generator="random",
+        number_of_optimization_rounds=number_of_optimization_rounds,
+        sampling_variance=sampling_variance,
+        samples_for_evaluation=samples_for_evaluation,
+        maximum_number_of_sampling_steps=maximum_number_of_sampling_steps,
+        seed=seed,
     )
+    target = {i: {} for i in targets}
+    if "affinity" in targets:
+        target["affinity"]["protein"] = protein_target
+    else:
+        protein_target = ""
+    model = PaccMannGP(config, target=target)
     samples = list(model.sample(number_of_samples))
+    return draw_grid_generate(
+        samples=samples,
+        n_cols=5,
+        properties=set(target.keys()),
+        protein_target=protein_target,
+    )
 if __name__ == "__main__":
     # Preparation (retrieve all available algorithms)
     all_algos = ApplicationsRegistry.list_available()
     algos = [
+        x["algorithm_version"]
         for x in list(filter(lambda x: "PaccMannRL" in x["algorithm_name"], all_algos))
     ]
     # Load metadata
     metadata_root = pathlib.Path(__file__).parent.joinpath("model_cards")
+    examples = pd.read_csv(
+        metadata_root.joinpath("examples.csv"), header=None, sep="|"
+    ).fillna("")
+    examples[1] = examples[1].apply(eval)
     with open(metadata_root.joinpath("article.md"), "r") as f:
         article = f.read()
     demo = gr.Interface(
         fn=run_inference,
+        title="PaccMannGP",
         inputs=[
+            gr.Dropdown(algos, label="Algorithm version", value="v0"),
+            gr.CheckboxGroup(
+                choices=list(MINIMIZATION_FUNCTIONS.keys()),
+                value=["qed"],
+                multiselect=True,
+                label="Property goals",
             ),
             gr.Textbox(
                 label="Protein target",
                 placeholder="MVLSPADKTNVKAAWGKVGAHAGEYGAEALERMFLSFPTT",
                 lines=1,
             ),
             gr.Slider(minimum=0.5, maximum=2, value=1, label="Decoding temperature"),
             gr.Slider(
                 minimum=5,
             gr.Slider(
                 minimum=1, maximum=50, value=10, label="Number of samples", step=1
             ),
+            gr.Slider(minimum=1, maximum=8, value=4.0, label="Limit"),
+            gr.Slider(minimum=1, maximum=32, value=8, label="Number of steps", step=1),
+            gr.Slider(
+                minimum=1, maximum=32, value=4, label="Number of initial points", step=1
+            ),
+            gr.Slider(
+                minimum=1,
+                maximum=4,
+                value=1,
+                label="Number of optimization rounds",
+                step=1,
+            ),
+            gr.Slider(minimum=0.01, maximum=1, value=0.1, label="Sampling variance"),
+            gr.Slider(
+                minimum=1,
+                maximum=10,
+                value=1,
+                label="Samples used for evaluation",
+                step=1,
+            ),
+            gr.Slider(
+                minimum=1,
+                maximum=64,
+                value=4,
+                label="Maximum number of sampling steps",
+                step=1,
+            ),
+            gr.Number(value=42, label="Seed", precision=0),
         ],
         outputs=gr.HTML(label="Output"),
         article=article,

model_cards/article.md CHANGED Viewed

@@ -1,12 +1,10 @@
 # Model documentation & parameters
-**Algorithm Version**: Which model version (either protein-target-driven or gene-expression-profile-driven) to use and which checkpoint to rely on.
-**Inference type**: Whether the model should be conditioned on the target (default) or whether the model is used in an  `Unbiased` manner.
-**Protein target**: An AAS of a protein target used for conditioning. Only use if `Inference type` is `Conditional` and if the `Algorithm version` is a Protein model.
-**Gene expression target**: A list of 2128 floats, representing the embedding of gene expression profile to be used for conditioning. Only use if `Inference type` is `Conditional` and if the `Algorithm version` is a Omic model.
 **Decoding temperature**: The temperature parameter in the SMILES/SELFIES decoder. Higher values lead to more explorative choices, smaller values culminate in mode collapse.
@@ -14,30 +12,43 @@
 **Number of samples**: How many samples should be generated (between 1 and 50).
-# Model card -- PaccMannRL
-**Model Details**: PaccMannRL is a language model for conditional molecular design. It consists of a domain-specific encoder (for protein targets or gene expression profiles) and a generic molecular decoder. Both components are finetuned together using RL to convert the context representation into a molecule with high affinity toward the context (i.e., binding affinity to the protein or high inhibitory effect for the cell profile).
 **Developers**: Jannis Born, Matteo Manica and colleagues from IBM Research.
 **Distributors**: Original authors' code wrapped and distributed by GT4SD Team (2023) from IBM Research.
-**Model date**: Published in 2021.
-**Model version**: Models trained and distribuetd by the original authors.
-- **Protein_v0**: Molecular decoder pretrained on 1.5M molecules from ChEMBL. Protein encoder pretrained on 404k proteins from UniProt. Encoder and decoder finetuned on 41 SARS-CoV-2-related protein targets with a binding affinity predictor trained on BindingDB.
-- **Omic_v0**: Molecular decoder pretrained on 1.5M molecules from ChEMBL. Gene expression encoder pretrained on 12k gene expression profiles from TCGA. Encoder and decoder finetuned on a few hundred cancer cell profiles from GDSC with a IC50 predictor trained on GDSC.
-**Model type**: A language-based molecular generative model that can be optimized with RL to generate molecules with high affinity toward a context.
 **Information about training algorithms, parameters, fairness constraints or other applied approaches, and features**:
-- **Protein**: Parameters as provided on [(GitHub repo)](https://github.com/PaccMann/paccmann_sarscov2).
-- **Omics**: Parameters as provided on [(GitHub repo)](https://github.com/PaccMann/paccmann_rl).
 **Paper or other resource for more information**:
-- **Protein**: [PaccMannRL: De novo generation of hit-like anticancer molecules from transcriptomic data via reinforcement learning (2021; *iScience*)](https://www.cell.com/iscience/fulltext/S2589-0042(21)00237-6).
-- **Omics**: [Data-driven molecular design for discovery and synthesis of novel ligands: a case study on SARS-CoV-2 (2021; *Machine Learning: Science and Technology*)](https://iopscience.iop.org/article/10.1088/2632-2153/abe808/meta).
 **License**: MIT
@@ -51,9 +62,9 @@
 **Factors**: Not applicable.
-**Metrics**: High reward on generating molecules with high affinity toward context.
-**Datasets**: ChEMBL, UniProt, GDSC and BindingDB (see above).
 **Ethical Considerations**: Unclear, please consult with original authors in case of questions.
@@ -62,35 +73,17 @@
 Model card prototype inspired by [Mitchell et al. (2019)](https://dl.acm.org/doi/abs/10.1145/3287560.3287596?casa_token=XD4eHiE2cRUAAAAA:NL11gMa1hGPOUKTAbtXnbVQBDBbjxwcjGECF_i-WC_3g1aBgU1Hbz_f2b4kI_m1in-w__1ztGeHnwHs)
 ## Citation
-**Omics**:
-```bib
-@article{born2021paccmannrl,
-  title = {PaccMann\textsuperscript{RL}: De novo generation of hit-like anticancer molecules from transcriptomic data via reinforcement learning},
-  journal = {iScience},
-  volume = {24},
-  number = {4},
-  pages = {102269},
-  year = {2021},
-  issn = {2589-0042},
-  doi = {https://doi.org/10.1016/j.isci.2021.102269},
-  url = {https://www.cell.com/iscience/fulltext/S2589-0042(21)00237-6},
-  author = {Born, Jannis and Manica, Matteo and Oskooei, Ali and Cadow, Joris and Markert, Greta and {Rodr{\'{i}}guez Mart{\'{i}}nez}, Mar{\'{i}}a}
-}
-```
-**Proteins**:
 ```bib
-@article{born2021datadriven,
-  author = {Born, Jannis and Manica, Matteo and Cadow, Joris and Markert, Greta and Mill, Nil Adell and Filipavicius, Modestas and Janakarajan, Nikita and Cardinale, Antonio and Laino, Teodoro and {Rodr{\'{i}}guez Mart{\'{i}}nez}, Mar{\'{i}}a},
-  doi = {10.1088/2632-2153/abe808},
-  issn = {2632-2153},
-  journal = {Machine Learning: Science and Technology},
-  number = {2},
-  pages = {025024},
-  title = {{Data-driven molecular design for discovery and synthesis of novel ligands: a case study on SARS-CoV-2}},
-  url = {https://iopscience.iop.org/article/10.1088/2632-2153/abe808},
-  volume = {2},
-  year = {2021}
 }
 ```

 # Model documentation & parameters
+**Algorithm Version**: Which model version to use.
+**Property goals**: One or multiple properties that will be optimized.
+**Protein target**: An AAS of a protein target used for conditioning. Leave blank unless you use `affinity` as a `property goal`.
 **Decoding temperature**: The temperature parameter in the SMILES/SELFIES decoder. Higher values lead to more explorative choices, smaller values culminate in mode collapse.
 **Number of samples**: How many samples should be generated (between 1 and 50).
+**Limit**: Hypercube limits in the latent space.
+**Number of steps**: Number of steps for a GP optmization round. The longer the slower. Has to be at least `Number of initial points`.
+**Number of initial points**: Number of initial points evaluated. The longer the slower.
+**Number of optimization rounds**: Maximum number of optimization rounds.
+**Sampling variance**: Variance of the Gaussian noise applied during sampling from the optimal point.
+**Samples for evaluation**: Number of samples averaged for each minimization function evaluation.
+**Max. sampling steps**: Maximum number of sampling steps in an optmization round.
+**Seed**: The random seed used for initialization.
+# Model card -- PaccMannGP
+**Model Details**: [PaccMann<sup>GP</sup>](https://github.com/PaccMann/paccmann_gp) is a language-based Variational Autoencoder that is coupled with a GaussianProcess for controlled sampling. This model systematically explores the latent space of a trained molecular VAE.
 **Developers**: Jannis Born, Matteo Manica and colleagues from IBM Research.
 **Distributors**: Original authors' code wrapped and distributed by GT4SD Team (2023) from IBM Research.
+**Model date**: Published in 2022.
+**Model version**: A molecular VAE trained on 1.5M molecules from ChEMBL.
+**Model type**: A language-based molecular generative model that can be explored with Gaussian Processes to generate molecules with desired properties.
 **Information about training algorithms, parameters, fairness constraints or other applied approaches, and features**:
+Described in the [original paper](https://pubs.acs.org/doi/10.1021/acs.jcim.1c00889).
 **Paper or other resource for more information**:
+[Active Site Sequence Representations of Human Kinases Outperform Full Sequence Representations for Affinity Prediction and Inhibitor Generation: 3D Effects in a 1D Model (2022; *Journal of Chemical Information & Modeling*)](https://pubs.acs.org/doi/10.1021/acs.jcim.1c00889).
 **License**: MIT
 **Factors**: Not applicable.
+**Metrics**: High reward on generating molecules with desired properties.
+**Datasets**: ChEMBL.
 **Ethical Considerations**: Unclear, please consult with original authors in case of questions.
 Model card prototype inspired by [Mitchell et al. (2019)](https://dl.acm.org/doi/abs/10.1145/3287560.3287596?casa_token=XD4eHiE2cRUAAAAA:NL11gMa1hGPOUKTAbtXnbVQBDBbjxwcjGECF_i-WC_3g1aBgU1Hbz_f2b4kI_m1in-w__1ztGeHnwHs)
 ## Citation
 ```bib
+@article{born2022active,
+	author = {Born, Jannis and Huynh, Tien and Stroobants, Astrid and Cornell, Wendy D. and Manica, Matteo},
+	title = {Active Site Sequence Representations of Human Kinases Outperform Full Sequence Representations for Affinity Prediction and Inhibitor Generation: 3D Effects in a 1D Model},
+	journal = {Journal of Chemical Information and Modeling},
+	volume = {62},
+	number = {2},
+	pages = {240-257},
+	year = {2022},
+	doi = {10.1021/acs.jcim.1c00889},
+	note ={PMID: 34905358},
+	URL = {https://doi.org/10.1021/acs.jcim.1c00889}
 }
 ```

model_cards/description.md CHANGED Viewed

@@ -1,9 +1,6 @@
 <img align="right" src="https://raw.githubusercontent.com/GT4SD/gt4sd-core/main/docs/_static/gt4sd_logo.png" alt="logo" width="120" >
-[PaccMann<sup>RL</sup>](https://github.com/PaccMann/paccmann_rl) is a language-based molecular generative model that can be conditioned (primed) on protein targets or gene expression profiles and produces molecules with high affinity toward the context vector. This model has been developed at IBM Research and is distributed by the **GT4SD** (Generative Toolkit for Scientific Discovery) team. For details please see the two publications:
-- [Born et al., (2021), *iScience*](https://www.cell.com/iscience/fulltext/S2589-0042(21)00237-6) for the model conditionable on gene expression profiles.
-- [Born et al., (2021), *Machine Learning: Science & Technology*](https://iopscience.iop.org/article/10.1088/2632-2153/abe808/meta) for the model conditionable on protein targets.
 For **examples** and **documentation** of the model parameters, please see below.
 Moreover, we provide a **model card** ([Mitchell et al. (2019)](https://dl.acm.org/doi/abs/10.1145/3287560.3287596?casa_token=XD4eHiE2cRUAAAAA:NL11gMa1hGPOUKTAbtXnbVQBDBbjxwcjGECF_i-WC_3g1aBgU1Hbz_f2b4kI_m1in-w__1ztGeHnwHs)) at the bottom of this page.

 <img align="right" src="https://raw.githubusercontent.com/GT4SD/gt4sd-core/main/docs/_static/gt4sd_logo.png" alt="logo" width="120" >
+[PaccMann<sup>GP</sup>](https://github.com/PaccMann/paccmann_gp) is a language-based Variational Autoencoder that is coupled with a GaussianProcess for controlled sampling. For details of the methodology, please see [Born et al., (2022), *Journal of Chemical Information & Modeling*](https://pubs.acs.org/doi/10.1021/acs.jcim.1c00889).
 For **examples** and **documentation** of the model parameters, please see below.
 Moreover, we provide a **model card** ([Mitchell et al. (2019)](https://dl.acm.org/doi/abs/10.1145/3287560.3287596?casa_token=XD4eHiE2cRUAAAAA:NL11gMa1hGPOUKTAbtXnbVQBDBbjxwcjGECF_i-WC_3g1aBgU1Hbz_f2b4kI_m1in-w__1ztGeHnwHs)) at the bottom of this page.

model_cards/examples.csv CHANGED Viewed

@@ -1,3 +1 @@
-Protein_v0,Conditional,MVLSPADKTNVKAAWGKVGAHAGEYGAEALERMFLSFPTT,,1.2,100,10
-Protein_v0,Unbiased,,,1.4,250,10
-Omic_v0,Conditional,,0.08 0.9 0.47 0.91 0.7 0.88 0.95 0.37 0.72 0.42 0.63 0.77 0.65 0.83 0.48 0.31 0.36 0.33 0.64 0.33 1.0 0.82 0.49 0.98 0.96 0.86 0.1 0.92 0.13 0.41 0.88 0.79 0.88 0.01 0.3 0.98 0.91 0.83 0.06 0.77 0.56 0.87 0.78 0.27 0.97 0.14 0.71 0.1 0.08 0.63 0.53 0.6 0.66 0.04 0.46 0.6 0.59 0.36 0.65 0.57 0.96 0.42 0.37 0.18 0.71 0.5 0.54 0.22 0.21 0.53 0.66 0.9 0.4 0.95 0.48 0.81 0.47 0.27 0.56 0.77 0.32 0.66 0.01 0.82 0.29 0.81 0.7 0.77 0.65 0.36 0.78 0.31 0.85 0.69 0.12 0.04 0.39 0.11 0.13 0.15 0.35 0.97 0.66 0.35 0.78 0.33 0.48 0.8 0.26 0.05 0.69 0.07 0.92 0.22 0.35 0.13 0.22 0.94 0.73 0.81 0.29 0.3 0.13 0.06 0.9 0.62 0.19 0.69 0.72 0.55 0.34 0.26 0.72 0.95 0.81 0.78 0.5 0.47 0.67 0.49 0.48 0.75 0.52 0.91 0.42 0.62 0.8 0.17 1.0 0.35 0.63 0.02 0.79 0.67 0.99 0.86 0.71 0.15 0.13 0.54 0.19 0.81 0.56 0.98 0.16 0.15 0.69 0.17 0.66 0.74 0.65 0.9 0.73 0.61 0.69 0.19 0.04 0.72 0.41 0.35 0.93 0.91 0.34 0.35 0.92 0.45 0.34 0.52 0.73 0.39 0.54 0.83 0.99 0.68 0.16 0.6 0.48 0.18 0.96 0.7 0.18 0.77 0.6 0.07 0.99 0.97 0.41 0.25 0.98 0.85 0.95 0.59 0.77 0.18 0.22 0.39 0.33 0.46 0.07 0.16 0.81 0.0 0.53 0.49 0.9 0.57 0.03 0.26 0.24 0.57 0.63 0.88 0.57 0.73 0.6 0.71 0.29 0.25 0.94 0.23 0.93 0.07 0.35 0.59 0.66 0.51 0.25 0.51 0.47 0.04 0.85 0.15 0.4 0.51 0.0 0.29 0.29 0.07 0.14 0.77 0.1 0.31 0.95 0.52 0.48 0.24 0.71 0.27 0.93 0.77 0.04 0.92 0.08 0.92 0.68 0.32 0.15 0.77 0.63 0.73 0.14 0.83 0.76 0.96 0.72 0.57 0.92 0.35 0.62 0.21 0.46 0.66 0.89 0.52 0.35 0.71 0.0 0.78 0.51 0.34 0.05 0.57 0.34 0.54 0.57 0.81 0.88 0.61 0.53 0.98 0.26 0.34 0.57 0.94 0.09 0.94 0.15 0.81 0.15 0.83 0.83 0.73 0.33 0.69 0.89 0.46 0.96 0.12 0.82 0.89 0.45 0.26 0.84 0.48 0.51 0.43 0.12 0.74 0.32 0.19 0.8 0.04 0.61 0.63 0.23 0.22 0.7 0.14 0.63 0.35 0.89 0.4 0.1 0.1 0.56 0.98 0.7 0.41 0.78 0.14 0.04 0.97 0.32 0.66 0.54 0.66 0.8 0.86 0.36 0.99 0.01 0.41 0.62 0.81 0.14 0.84 0.49 0.3 0.4 0.13 0.2 0.05 0.29 0.11 0.75 0.87 0.71 0.25 0.43 0.67 0.49 0.2 0.77 0.85 0.32 0.94 0.51 0.95 0.54 0.22 0.7 0.97 0.71 0.24 0.88 0.9 0.61 0.99 0.57 0.25 0.01 0.09 0.83 0.83 0.89 0.58 0.95 0.86 0.06 0.88 0.27 0.12 0.7 0.17 0.23 0.43 0.61 0.51 0.65 0.02 0.19 0.61 0.69 0.14 0.89 0.3 0.86 0.55 0.06 0.46 0.78 0.82 0.34 0.63 0.38 0.12 0.15 0.45 0.93 0.08 0.54 0.94 0.64 0.74 0.4 0.23 0.18 0.27 0.44 0.6 0.82 0.19 0.13 0.48 0.19 0.99 0.66 0.69 0.86 0.47 0.15 0.94 0.53 0.07 0.61 0.44 0.62 0.85 0.16 0.66 0.58 0.63 0.55 0.38 0.02 0.68 0.91 0.89 0.63 0.25 0.58 0.93 0.52 0.7 0.64 0.81 0.47 0.21 0.18 0.17 0.78 0.46 0.31 0.2 0.31 0.37 0.66 0.46 0.11 1.0 0.21 0.39 0.12 0.36 0.83 0.52 0.76 0.23 0.62 0.17 0.21 0.07 0.78 0.12 0.59 0.76 0.33 0.49 0.13 0.67 0.44 0.92 0.84 0.18 0.73 0.81 0.68 0.27 0.28 0.14 0.23 0.98 0.07 0.34 0.2 0.78 0.44 0.27 0.7 0.88 0.28 0.96 0.07 0.33 0.65 0.9 0.99 0.75 0.32 0.68 0.54 0.57 0.28 0.57 0.96 0.91 0.0 0.0 0.32 0.66 0.08 0.7 0.14 0.88 0.91 0.85 0.17 0.91 0.31 0.47 0.69 0.41 0.8 0.08 0.59 0.66 0.79 0.82 0.28 0.11 0.05 0.11 0.61 0.66 0.25 0.32 0.53 0.8 0.11 0.5 0.6 0.73 0.31 0.11 0.2 1.0 0.79 0.88 0.77 0.37 0.51 0.25 0.89 0.79 0.8 0.79 0.96 0.45 0.36 0.14 0.64 0.85 0.75 0.23 0.64 0.23 0.64 0.41 0.76 0.78 0.13 0.37 0.48 0.61 0.32 0.58 0.98 0.58 0.27 0.06 0.78 0.05 0.56 0.14 0.57 0.2 0.68 0.61 0.58 0.36 0.39 0.99 0.63 0.12 0.82 0.05 0.54 0.96 0.27 0.2 0.94 0.03 0.55 0.9 0.47 0.61 0.83 0.72 0.9 0.94 0.53 0.11 0.57 0.96 0.64 0.35 0.81 0.72 0.59 0.45 0.85 0.98 0.44 0.08 0.12 0.5 0.17 0.31 0.8 0.49 0.13 0.63 0.83 0.32 0.22 0.13 0.76 0.18 0.4 0.81 0.65 0.02 0.94 0.39 0.0 0.58 0.96 0.93 0.33 0.22 0.12 0.78 0.22 0.65 0.82 0.83 0.79 0.09 0.86 0.55 0.16 0.95 0.76 0.22 0.06 0.21 0.58 0.63 0.31 0.21 0.99 0.19 0.13 0.68 0.33 0.82 0.91 0.42 0.37 0.55 0.66 0.29 0.36 0.75 0.62 1.0 0.71 0.21 0.17 0.73 0.23 0.6 0.99 0.85 0.22 0.58 0.4 0.97 0.46 0.69 0.19 0.78 0.26 0.0 0.74 0.43 0.17 0.05 0.74 0.46 0.23 0.64 0.13 0.47 0.14 0.54 0.48 0.88 0.64 0.23 0.48 0.82 0.81 0.56 0.99 0.07 0.07 0.53 0.74 0.67 0.52 0.66 0.14 0.52 0.46 0.85 0.44 0.05 0.13 0.56 0.38 0.57 0.15 0.84 0.99 0.97 0.0 0.12 0.07 0.79 0.29 0.02 0.54 0.39 0.26 0.28 0.44 0.88 0.62 0.63 0.16 0.67 0.66 0.03 0.97 0.83 0.95 0.84 0.95 0.56 0.67 0.38 0.71 0.16 0.43 0.29 0.34 0.71 0.44 0.63 0.7 0.11 0.72 0.23 0.94 0.02 0.33 0.33 0.92 0.35 0.31 0.17 0.36 0.91 0.75 0.1 0.65 0.83 0.79 0.58 0.43 0.8 0.19 0.64 0.3 0.57 0.01 0.41 0.9 0.46 0.31 0.88 0.19 0.02 0.75 0.07 0.45 0.18 0.25 0.01 0.97 0.75 0.64 0.23 0.34 0.07 0.21 0.22 0.02 0.92 0.02 0.69 0.1 0.86 0.05 0.02 0.81 0.96 0.85 0.13 0.55 0.99 0.49 0.89 0.13 0.52 0.91 0.69 0.97 0.95 0.81 0.12 0.92 0.44 0.89 0.57 0.47 0.47 0.78 0.12 0.26 0.24 0.44 0.74 0.43 0.06 0.32 0.89 0.03 0.64 0.18 0.22 0.25 0.14 0.24 0.72 0.96 0.72 0.96 0.52 0.7 0.66 0.88 0.25 0.91 0.14 0.52 0.7 0.56 0.59 0.43 0.21 0.8 0.67 0.33 0.63 0.55 0.55 0.92 0.16 0.31 0.61 0.29 0.9 0.06 0.69 0.89 0.12 0.58 0.74 0.83 0.8 0.14 0.04 0.69 0.28 0.62 0.77 0.11 0.62 0.18 0.59 0.17 0.58 0.1 0.08 0.61 0.46 0.2 0.6 0.94 0.65 0.1 0.47 0.35 0.51 0.8 0.2 0.06 0.86 1.0 0.73 0.43 0.41 0.88 0.46 0.83 0.5 0.15 0.22 0.85 0.79 0.5 0.67 0.99 0.89 0.75 0.82 0.07 0.45 0.54 0.82 0.34 0.01 0.97 0.41 0.53 0.18 0.56 0.02 0.63 0.64 0.21 0.84 0.25 0.41 0.46 0.73 0.91 0.71 0.16 0.01 0.09 0.95 0.7 0.45 0.86 0.9 0.04 0.98 0.66 0.93 0.58 0.37 0.62 0.73 0.37 0.3 0.71 0.95 0.41 0.79 0.45 0.71 0.57 0.24 0.43 0.07 0.85 0.53 0.57 0.58 0.45 0.82 0.92 0.17 0.23 0.29 0.62 0.03 0.36 0.68 0.5 0.69 0.07 0.07 0.36 0.94 0.06 0.4 0.93 0.48 0.17 0.78 0.66 0.45 0.82 0.93 0.99 0.51 0.19 0.32 0.47 0.69 0.19 0.35 0.19 0.62 0.34 0.52 0.42 0.76 0.05 0.9 0.53 0.59 0.52 0.43 0.73 0.43 0.37 0.09 0.47 0.59 0.78 0.83 0.85 0.21 0.95 0.47 0.87 0.43 0.95 0.18 0.13 0.95 0.79 0.62 0.02 0.79 0.28 0.87 0.71 0.13 0.53 0.02 0.73 0.6 0.13 0.75 0.07 0.02 0.34 0.58 0.55 0.4 0.42 0.46 0.43 0.98 0.86 0.31 0.77 0.64 0.97 0.6 0.91 0.94 0.9 0.34 0.78 0.0 0.49 0.17 0.86 0.47 0.3 0.62 0.33 0.86 0.62 0.65 0.36 0.4 0.08 0.67 0.92 0.76 0.87 0.61 0.41 0.3 0.65 0.25 0.37 0.3 0.57 0.77 0.64 0.1 0.3 0.6 0.52 0.45 0.1 0.02 0.83 0.57 0.41 0.46 0.55 0.41 0.77 0.39 0.03 0.0 0.9 0.42 0.22 0.73 0.48 0.94 0.15 0.14 0.32 0.65 0.6 0.03 0.64 0.15 0.42 0.96 0.41 0.53 0.43 0.3 0.76 0.93 0.32 0.53 0.62 0.31 0.54 0.2 0.66 0.68 0.39 0.01 0.99 0.25 0.71 0.19 0.52 0.93 0.96 0.68 1.0 0.4 0.66 0.64 0.09 0.28 0.47 0.01 0.99 0.36 0.09 0.57 0.79 0.41 0.35 0.3 0.5 0.28 0.71 0.27 0.13 0.06 0.46 0.39 0.37 0.88 0.99 0.3 0.09 0.01 0.98 0.74 0.12 0.01 0.15 0.64 0.68 0.27 0.09 0.89 0.3 0.64 0.34 0.44 0.71 0.01 0.0 0.33 0.12 0.05 0.74 0.81 0.49 0.45 0.94 0.86 0.58 0.56 0.07 0.91 0.54 0.64 0.82 0.17 0.69 0.7 0.99 0.35 0.62 0.6 0.93 0.38 0.32 0.01 0.79 0.62 0.97 0.74 0.71 0.54 0.08 0.01 0.09 0.95 0.53 0.52 0.15 0.18 0.38 0.71 0.57 0.2 0.87 1.0 0.43 0.93 0.49 0.65 0.42 0.29 0.63 0.53 0.34 0.84 0.23 0.38 0.51 0.88 0.07 0.17 0.9 0.13 0.83 0.54 0.54 0.07 0.49 0.83 0.94 0.04 0.79 0.18 0.46 0.51 0.73 0.68 0.04 0.89 0.4 0.16 0.9 0.36 0.73 0.36 0.39 0.42 0.03 0.6 0.85 0.2 0.88 0.64 0.07 0.04 0.58 0.11 0.36 0.19 0.12 0.74 0.54 0.65 0.37 0.31 0.78 0.94 0.02 0.56 0.72 0.18 0.03 0.12 0.3 0.55 0.74 0.22 0.14 0.42 0.23 0.71 0.78 0.66 0.82 0.12 0.83 0.73 0.7 0.22 0.89 0.81 0.34 0.61 0.2 0.68 0.22 0.84 0.03 0.99 0.06 0.23 0.68 0.71 0.41 0.97 0.04 0.78 0.88 0.8 0.72 0.63 0.68 0.94 0.58 0.07 0.53 0.51 0.04 0.45 0.19 0.05 0.23 0.67 0.13 0.41 0.62 0.18 0.01 0.34 0.91 0.88 0.21 0.71 0.47 0.61 0.51 0.65 0.95 0.33 0.0 0.16 0.56 0.21 0.06 0.06 0.06 0.8 0.39 0.83 0.29 0.04 0.74 0.27 0.25 0.35 0.78 0.44 0.23 0.95 0.97 0.89 0.83 0.85 0.41 0.95 0.69 0.09 0.91 0.63 0.96 0.76 0.16 0.75 0.41 0.83 0.63 0.83 0.86 0.82 0.04 0.32 0.3 0.21 0.39 0.48 0.8 0.21 0.4 0.96 0.71 0.63 0.54 0.95 0.81 0.11 0.83 0.63 0.41 0.33 0.32 0.58 0.72 0.82 0.73 0.01 0.5 0.93 0.69 0.91 0.44 0.18 0.28 0.61 0.5 0.98 0.93 0.91 0.72 0.59 0.63 0.03 0.82 0.62 0.07 0.51 0.53 0.89 0.47 0.04 0.08 0.17 0.2 0.88 0.78 0.93 0.71 0.24 0.22 0.32 0.87 0.03 0.01 0.85 0.77 0.82 0.64 0.2 0.83 0.88 0.23 0.44 0.72 0.2 0.98 0.11 0.46 0.59 0.3 0.82 0.01 0.66 0.8 0.91 0.0 0.86 0.84 0.56 0.49 0.22 0.27 0.02 0.62 0.55 0.62 0.79 0.94 0.89 0.56 0.87 0.96 0.43 0.58 0.63 0.22 0.37 0.44 0.85 0.28 0.25 0.4 0.34 0.14 0.8 0.84 0.89 0.06 0.45 0.02 0.07 0.85 0.43 0.13 0.21 0.21 0.05 0.23 0.85 0.44 0.8 0.52 0.39 0.65 0.67 0.64 0.79 0.3 0.01 0.3 0.11 0.02 0.96 0.05 0.44 0.06 0.01 0.77 0.19 0.06 0.31 0.48 0.97 0.64 0.92 0.76 0.07 0.77 0.95 0.98 0.63 0.25 0.27 0.76 0.96 0.24 0.18 0.8 0.0 0.96 0.24 0.52 0.59 0.65 0.17 0.32 0.55 0.59 0.62 0.82 0.59 0.29 0.42 0.12 0.24 0.02 0.66 0.59 0.78 0.37 0.19 0.96 0.18 0.2 0.99 0.76 0.58 0.35 0.54 0.89 0.14 0.58 0.1 0.97 0.38 0.82 0.48 0.06 0.83 1.0 0.99 0.77 0.41 0.08 0.87 0.75 0.13 0.52 0.58 0.68 0.03 0.92 0.55 0.04 0.56 0.63 0.28 0.8 0.39 0.68 0.58 0.01 0.23 0.28 0.98 0.96 0.05 0.28 0.44 0.31 0.91 0.81 0.18 0.65 0.53 0.02 0.41 0.98 0.09 0.12 0.84 0.6 0.17 0.2 0.58 0.35 0.25 0.74 0.83 0.55 0.18 0.8 0.33 0.04 0.56 0.85 0.22 0.83 0.48 0.53 0.54 0.51 0.06 0.76 0.1 0.43 0.21 0.46 0.97 0.48 0.77 0.11 0.36 0.9 0.52 0.06 0.23 0.8 0.09 0.11 0.57 0.59 0.76 0.44 0.15 0.46 0.07 0.86 0.01 0.49 0.05 0.54 0.14 0.29 0.01 0.81 0.45 0.45 0.12 0.82 0.47 0.93 0.51 0.04 0.26 0.14 0.5 0.06 0.25 0.62 0.95 0.07 0.28 0.32 0.03 0.28 0.45 0.86 0.24 0.22 0.78 0.63 0.4 0.33 0.56 0.26 0.41 0.63 0.73 0.73 0.35 0.44 0.67 0.03 0.07 0.68 0.86 0.35 0.58 0.75 0.16 0.37 0.87 0.66 0.59 0.67 0.46 0.64 0.78 0.97 0.45 0.98 0.64 0.41 0.58 0.51 0.97 0.95 0.9 0.34 0.1 0.76 0.37 0.05 0.57 0.72 0.91 0.4 0.43 0.78 0.78 0.39 0.3 0.21 0.88 0.36 0.54 0.87 0.84 0.19 0.22 0.89 0.89 0.85 0.77 0.86 0.46 0.5 0.88 0.18 0.4 0.61 0.07 0.06 0.65 0.05 0.31 0.55 0.87 0.05 0.54 0.28 0.28 0.35 0.1 0.55 0.82 0.86 0.12 0.17 0.69 0.74 0.13 0.08 0.6 0.4 0.97 0.32 0.81 0.14 0.97 0.65 0.72 0.32 0.57 0.69 0.74 0.65 0.75 0.37 0.88 0.97 0.88 0.7 0.98 0.36 0.1 0.35 0.15 0.23 0.09 0.3 1.0 0.21 0.99 0.44 0.23 0.21 0.15 0.43 0.77 0.17 0.32 0.55 0.8 0.08 0.72 0.49 0.31 0.39 0.48 0.29 0.78 0.64 0.04 0.11 0.69 0.76 0.9 0.79 0.32 0.03 0.68 0.67 0.35 0.55 0.01 0.03 0.22 0.31 0.3 0.28 0.14 0.01 0.73 0.86 0.67 0.06 0.45 0.32 0.78 0.22 0.84 0.19 0.29 0.8 0.61 0.23 0.71 0.94 0.04 0.86 0.87 0.88 0.65 0.04 0.93 0.1 0.73 0.38 0.88 0.8 0.54 0.62 0.2 0.76 0.66 0.46 0.0 0.32 0.38 0.92 0.85 0.84 0.9 0.85 0.08 0.32 0.98 0.57 0.72 0.48 0.86 0.23 1.0 0.56 0.48 0.13 0.61 0.46 0.38 0.58 0.06 0.95 0.37 0.94 0.11 0.44 0.53 0.26 0.98 0.67 0.28 0.65 0.28 0.48 0.52 0.58 0.01 0.1 0.03 0.29 0.14 0.33 0.5 0.98 0.99 0.68 0.28 0.12 0.6 0.65 0.77 0.69 0.66 0.5 0.76 0.79 0.79 0.64 0.67 0.35 0.78 0.71 0.47 0.5 0.79 0.69 0.13 0.18 0.89 0.29 0.79 0.92 0.54,1.2,100,10


1	+ v0\|["qed"]\|\|1.2\|100\|10\|4\|8\|4\|1\|0.1\|3\|4\|42

utils.py CHANGED Viewed

@@ -1,6 +1,9 @@
 import logging
 from collections import defaultdict
-from typing import List
 import mols2grid
 import pandas as pd
@@ -9,9 +12,23 @@ logger = logging.getLogger(__name__)
 logger.addHandler(logging.NullHandler())
 def draw_grid_generate(
     samples: List[str],
-    seeds: List[str] = [],
     n_cols: int = 3,
     size=(140, 200),
 ) -> str:
@@ -27,14 +44,22 @@ def draw_grid_generate(
         HTML to display
     """
     result = defaultdict(list)
     result.update(
-        {
-            "SMILES": seeds + samples,
-            "Name": [f"Seed_{i}" for i in range(len(seeds))]
-            + [f"Generated_{i}" for i in range(len(samples))],
-        },
     )
     result_df = pd.DataFrame(result)
     obj = mols2grid.display(

 import logging
 from collections import defaultdict
+from typing import List, Callable
+from gt4sd.properties import PropertyPredictorRegistry
+from gt4sd.algorithms.prediction.paccmann.core import PaccMann, AffinityPredictor
 import mols2grid
 import pandas as pd
 logger.addHandler(logging.NullHandler())
+def get_affinity_function(target: str) -> Callable:
+    return lambda mols: PaccMann(
+        AffinityPredictor(protein_targets=[target] * len(mols), ligands=mols)
+    ).sample(len(mols))
+EVAL_DICT = {
+    "qed": PropertyPredictorRegistry.get_property_predictor("qed"),
+    "sas": PropertyPredictorRegistry.get_property_predictor("sas"),
+    "molwt": PropertyPredictorRegistry.get_property_predictor("molecular_weight"),
+}
 def draw_grid_generate(
     samples: List[str],
+    properties: List[str],
+    protein_target: str,
     n_cols: int = 3,
     size=(140, 200),
 ) -> str:
         HTML to display
     """
+    if protein_target != "":
+        EVAL_DICT.update({"affinity": get_affinity_function(protein_target)})
     result = defaultdict(list)
     result.update(
+        {"SMILES": samples, "Name": [f"Generated_{i}" for i in range(len(samples))]},
     )
+    if "affinity" in properties:
+        properties.remove("affinity")
+        vals = EVAL_DICT["affinity"](samples)
+        result["affinity"] = vals
+    # Fill properties
+    for sample in samples:
+        for prop in properties:
+            value = EVAL_DICT[prop](sample)
+            result[prop].append(f"{prop} = {value}")
     result_df = pd.DataFrame(result)
     obj = mols2grid.display(