paccmann_gp

Running

App Files Files Community

jannisborn commited on Jan 8, 2023

Commit

5984d9a

0 Parent(s):

Duplicate from jannisborn/gt4sd-paccmann-rl

Browse files

Files changed (10) hide show

.gitattributes +34 -0
.gitignore +1 -0
LICENSE +21 -0
README.md +15 -0
app.py +131 -0
model_cards/article.md +96 -0
model_cards/description.md +9 -0
model_cards/examples.csv +3 -0
requirements.txt +29 -0
utils.py +48 -0

.gitattributes ADDED Viewed

	@@ -0,0 +1,34 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1 @@


1	+ __pycache__/

LICENSE ADDED Viewed

	@@ -0,0 +1,21 @@

+MIT License
+Copyright (c) 2022 Generative Toolkit 4 Scientific Discovery
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

README.md ADDED Viewed

	@@ -0,0 +1,15 @@

+---
+title: GT4SD - PaccMannRL
+emoji: 💡
+colorFrom: green
+colorTo: blue
+sdk: gradio
+sdk_version: 3.9.1
+app_file: app.py
+pinned: false
+python_version: 3.8.13
+pypi_version: 20.2.4
+duplicated_from: jannisborn/gt4sd-paccmann-rl
+---
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py ADDED Viewed

	@@ -0,0 +1,131 @@

+import logging
+import pathlib
+from typing import List
+import gradio as gr
+import numpy as np
+import pandas as pd
+from gt4sd.algorithms.conditional_generation.paccmann_rl import (
+    PaccMannRL,
+    PaccMannRLOmicBasedGenerator,
+    PaccMannRLProteinBasedGenerator,
+)
+from gt4sd.algorithms.generation.paccmann_vae import PaccMannVAE, PaccMannVAEGenerator
+from gt4sd.algorithms.registry import ApplicationsRegistry
+from utils import draw_grid_generate
+logger = logging.getLogger(__name__)
+logger.addHandler(logging.NullHandler())
+def run_inference(
+    algorithm_version: str,
+    inference_type: str,
+    protein_target: str,
+    omics_target: str,
+    temperature: float,
+    length: float,
+    number_of_samples: int,
+):
+    if inference_type == "Unbiased":
+        algorithm_class = PaccMannVAEGenerator
+        model_class = PaccMannVAE
+        target = None
+    elif inference_type == "Conditional":
+        if "Protein" in algorithm_version:
+            algorithm_class = PaccMannRLProteinBasedGenerator
+            target = protein_target
+        elif "Omic" in algorithm_version:
+            algorithm_class = PaccMannRLOmicBasedGenerator
+            try:
+                test_target = [float(x) for x in omics_target.split(" ")]
+            except Exception:
+                raise ValueError(
+                    f"Expected 2128 space-separated omics values, got {omics_target}"
+                )
+            if len(test_target) != 2128:
+                raise ValueError(
+                    f"Expected 2128 omics values, got {len(target)}: {target}"
+                )
+            target = f"[{omics_target.replace(' ', ',')}]"
+        else:
+            raise ValueError(f"Unknown algorithm version {algorithm_version}")
+        model_class = PaccMannRL
+    else:
+        raise ValueError(f"Unknown inference type {inference_type}")
+    config = algorithm_class(
+        algorithm_version.split("_")[-1],
+        temperature=temperature,
+        generated_length=length,
+    )
+    print("Target is ", target)
+    print(type(target), len(target))
+    model = model_class(config, target=target)
+    samples = list(model.sample(number_of_samples))
+    return draw_grid_generate(samples=samples, n_cols=5)
+if __name__ == "__main__":
+    # Preparation (retrieve all available algorithms)
+    all_algos = ApplicationsRegistry.list_available()
+    algos = [
+        x["algorithm_application"].split("Based")[0].split("PaccMannRL")[-1]
+        + "_"
+        + x["algorithm_version"]
+        for x in list(filter(lambda x: "PaccMannRL" in x["algorithm_name"], all_algos))
+    ]
+    # Load metadata
+    metadata_root = pathlib.Path(__file__).parent.joinpath("model_cards")
+    examples = pd.read_csv(metadata_root.joinpath("examples.csv"), header=None).fillna(
+        ""
+    )
+    with open(metadata_root.joinpath("article.md"), "r") as f:
+        article = f.read()
+    with open(metadata_root.joinpath("description.md"), "r") as f:
+        description = f.read()
+    demo = gr.Interface(
+        fn=run_inference,
+        title="PaccMannRL",
+        inputs=[
+            gr.Dropdown(algos, label="Algorithm version", value="Protein_v0"),
+            gr.Radio(
+                choices=["Conditional", "Unbiased"],
+                label="Inference type",
+                value="Conditional",
+            ),
+            gr.Textbox(
+                label="Protein target",
+                placeholder="MVLSPADKTNVKAAWGKVGAHAGEYGAEALERMFLSFPTT",
+                lines=1,
+            ),
+            gr.Textbox(
+                label="Gene expression target",
+                placeholder=f"{' '.join(map(str, np.round(np.random.rand(2128), 2)))}",
+                lines=1,
+            ),
+            gr.Slider(minimum=0.5, maximum=2, value=1, label="Decoding temperature"),
+            gr.Slider(
+                minimum=5,
+                maximum=400,
+                value=100,
+                label="Maximal sequence length",
+                step=1,
+            ),
+            gr.Slider(
+                minimum=1, maximum=50, value=10, label="Number of samples", step=1
+            ),
+        ],
+        outputs=gr.HTML(label="Output"),
+        article=article,
+        description=description,
+        examples=examples.values.tolist(),
+    )
+    demo.launch(debug=True, show_error=True)

model_cards/article.md ADDED Viewed

	@@ -0,0 +1,96 @@

+# Model documentation & parameters
+**Algorithm Version**: Which model version (either protein-target-driven or gene-expression-profile-driven) to use and which checkpoint to rely on.
+**Inference type**: Whether the model should be conditioned on the target (default) or whether the model is used in an  `Unbiased` manner.
+**Protein target**: An AAS of a protein target used for conditioning. Only use if `Inference type` is `Conditional` and if the `Algorithm version` is a Protein model.
+**Gene expression target**: A list of 2128 floats, representing the embedding of gene expression profile to be used for conditioning. Only use if `Inference type` is `Conditional` and if the `Algorithm version` is a Omic model.
+**Decoding temperature**: The temperature parameter in the SMILES/SELFIES decoder. Higher values lead to more explorative choices, smaller values culminate in mode collapse.
+**Maximal sequence length**: The maximal number of SMILES tokens in the generated molecule.
+**Number of samples**: How many samples should be generated (between 1 and 50).
+# Model card -- PaccMannRL
+**Model Details**: PaccMannRL is a language model for conditional molecular design. It consists of a domain-specific encoder (for protein targets or gene expression profiles) and a generic molecular decoder. Both components are finetuned together using RL to convert the context representation into a molecule with high affinity toward the context (i.e., binding affinity to the protein or high inhibitory effect for the cell profile).
+**Developers**: Jannis Born, Matteo Manica and colleagues from IBM Research.
+**Distributors**: Original authors' code wrapped and distributed by GT4SD Team (2023) from IBM Research.
+**Model date**: Published in 2021.
+**Model version**: Models trained and distribuetd by the original authors.
+- **Protein_v0**: Molecular decoder pretrained on 1.5M molecules from ChEMBL. Protein encoder pretrained on 404k proteins from UniProt. Encoder and decoder finetuned on 41 SARS-CoV-2-related protein targets with a binding affinity predictor trained on BindingDB.
+- **Omic_v0**: Molecular decoder pretrained on 1.5M molecules from ChEMBL. Gene expression encoder pretrained on 12k gene expression profiles from TCGA. Encoder and decoder finetuned on a few hundred cancer cell profiles from GDSC with a IC50 predictor trained on GDSC.
+**Model type**: A language-based molecular generative model that can be optimized with RL to generate molecules with high affinity toward a context.
+**Information about training algorithms, parameters, fairness constraints or other applied approaches, and features**:
+- **Protein**: Parameters as provided on [(GitHub repo)](https://github.com/PaccMann/paccmann_sarscov2).
+- **Omics**: Parameters as provided on [(GitHub repo)](https://github.com/PaccMann/paccmann_rl).
+**Paper or other resource for more information**:
+- **Protein**: [PaccMannRL: De novo generation of hit-like anticancer molecules from transcriptomic data via reinforcement learning (2021; *iScience*)](https://www.cell.com/iscience/fulltext/S2589-0042(21)00237-6).
+- **Omics**: [Data-driven molecular design for discovery and synthesis of novel ligands: a case study on SARS-CoV-2 (2021; *Machine Learning: Science and Technology*)](https://iopscience.iop.org/article/10.1088/2632-2153/abe808/meta).
+**License**: MIT
+**Where to send questions or comments about the model**: Open an issue on [GT4SD repository](https://github.com/GT4SD/gt4sd-core).
+**Intended Use. Use cases that were envisioned during development**: Chemical research, in particular drug discovery.
+**Primary intended uses/users**: Researchers and computational chemists using the model for model comparison or research exploration purposes.
+**Out-of-scope use cases**: Production-level inference, producing molecules with harmful properties.
+**Factors**: Not applicable.
+**Metrics**: High reward on generating molecules with high affinity toward context.
+**Datasets**: ChEMBL, UniProt, GDSC and BindingDB (see above).
+**Ethical Considerations**: Unclear, please consult with original authors in case of questions.
+**Caveats and Recommendations**: Unclear, please consult with original authors in case of questions.
+Model card prototype inspired by [Mitchell et al. (2019)](https://dl.acm.org/doi/abs/10.1145/3287560.3287596?casa_token=XD4eHiE2cRUAAAAA:NL11gMa1hGPOUKTAbtXnbVQBDBbjxwcjGECF_i-WC_3g1aBgU1Hbz_f2b4kI_m1in-w__1ztGeHnwHs)
+## Citation
+**Omics**:
+```bib
+@article{born2021paccmannrl,
+  title = {PaccMann\textsuperscript{RL}: De novo generation of hit-like anticancer molecules from transcriptomic data via reinforcement learning},
+  journal = {iScience},
+  volume = {24},
+  number = {4},
+  pages = {102269},
+  year = {2021},
+  issn = {2589-0042},
+  doi = {https://doi.org/10.1016/j.isci.2021.102269},
+  url = {https://www.cell.com/iscience/fulltext/S2589-0042(21)00237-6},
+  author = {Born, Jannis and Manica, Matteo and Oskooei, Ali and Cadow, Joris and Markert, Greta and {Rodr{\'{i}}guez Mart{\'{i}}nez}, Mar{\'{i}}a}
+}
+```
+**Proteins**:
+```bib
+@article{born2021datadriven,
+  author = {Born, Jannis and Manica, Matteo and Cadow, Joris and Markert, Greta and Mill, Nil Adell and Filipavicius, Modestas and Janakarajan, Nikita and Cardinale, Antonio and Laino, Teodoro and {Rodr{\'{i}}guez Mart{\'{i}}nez}, Mar{\'{i}}a},
+  doi = {10.1088/2632-2153/abe808},
+  issn = {2632-2153},
+  journal = {Machine Learning: Science and Technology},
+  number = {2},
+  pages = {025024},
+  title = {{Data-driven molecular design for discovery and synthesis of novel ligands: a case study on SARS-CoV-2}},
+  url = {https://iopscience.iop.org/article/10.1088/2632-2153/abe808},
+  volume = {2},
+  year = {2021}
+}
+```

model_cards/description.md ADDED Viewed

	@@ -0,0 +1,9 @@

+<img align="right" src="https://raw.githubusercontent.com/GT4SD/gt4sd-core/main/docs/_static/gt4sd_logo.png" alt="logo" width="120" >
+[PaccMann<sup>RL</sup>](https://github.com/PaccMann/paccmann_rl) is a language-based molecular generative model that can be conditioned (primed) on protein targets or gene expression profiles and produces molecules with high affinity toward the context vector. This model has been developed at IBM Research and is distributed by the **GT4SD** (Generative Toolkit for Scientific Discovery) team. For details please see the two publications:
+- [Born et al., (2021), *iScience*](https://www.cell.com/iscience/fulltext/S2589-0042(21)00237-6) for the model conditionable on gene expression profiles.
+- [Born et al., (2021), *Machine Learning: Science & Technology*](https://iopscience.iop.org/article/10.1088/2632-2153/abe808/meta) for the model conditionable on protein targets.
+For **examples** and **documentation** of the model parameters, please see below.
+Moreover, we provide a **model card** ([Mitchell et al. (2019)](https://dl.acm.org/doi/abs/10.1145/3287560.3287596?casa_token=XD4eHiE2cRUAAAAA:NL11gMa1hGPOUKTAbtXnbVQBDBbjxwcjGECF_i-WC_3g1aBgU1Hbz_f2b4kI_m1in-w__1ztGeHnwHs)) at the bottom of this page.

model_cards/examples.csv ADDED Viewed

	@@ -0,0 +1,3 @@

+Protein_v0,Conditional,MVLSPADKTNVKAAWGKVGAHAGEYGAEALERMFLSFPTT,,1.2,100,10
+Protein_v0,Unbiased,,,1.4,250,10
+Omic_v0,Conditional,,0.08 0.9 0.47 0.91 0.7 0.88 0.95 0.37 0.72 0.42 0.63 0.77 0.65 0.83 0.48 0.31 0.36 0.33 0.64 0.33 1.0 0.82 0.49 0.98 0.96 0.86 0.1 0.92 0.13 0.41 0.88 0.79 0.88 0.01 0.3 0.98 0.91 0.83 0.06 0.77 0.56 0.87 0.78 0.27 0.97 0.14 0.71 0.1 0.08 0.63 0.53 0.6 0.66 0.04 0.46 0.6 0.59 0.36 0.65 0.57 0.96 0.42 0.37 0.18 0.71 0.5 0.54 0.22 0.21 0.53 0.66 0.9 0.4 0.95 0.48 0.81 0.47 0.27 0.56 0.77 0.32 0.66 0.01 0.82 0.29 0.81 0.7 0.77 0.65 0.36 0.78 0.31 0.85 0.69 0.12 0.04 0.39 0.11 0.13 0.15 0.35 0.97 0.66 0.35 0.78 0.33 0.48 0.8 0.26 0.05 0.69 0.07 0.92 0.22 0.35 0.13 0.22 0.94 0.73 0.81 0.29 0.3 0.13 0.06 0.9 0.62 0.19 0.69 0.72 0.55 0.34 0.26 0.72 0.95 0.81 0.78 0.5 0.47 0.67 0.49 0.48 0.75 0.52 0.91 0.42 0.62 0.8 0.17 1.0 0.35 0.63 0.02 0.79 0.67 0.99 0.86 0.71 0.15 0.13 0.54 0.19 0.81 0.56 0.98 0.16 0.15 0.69 0.17 0.66 0.74 0.65 0.9 0.73 0.61 0.69 0.19 0.04 0.72 0.41 0.35 0.93 0.91 0.34 0.35 0.92 0.45 0.34 0.52 0.73 0.39 0.54 0.83 0.99 0.68 0.16 0.6 0.48 0.18 0.96 0.7 0.18 0.77 0.6 0.07 0.99 0.97 0.41 0.25 0.98 0.85 0.95 0.59 0.77 0.18 0.22 0.39 0.33 0.46 0.07 0.16 0.81 0.0 0.53 0.49 0.9 0.57 0.03 0.26 0.24 0.57 0.63 0.88 0.57 0.73 0.6 0.71 0.29 0.25 0.94 0.23 0.93 0.07 0.35 0.59 0.66 0.51 0.25 0.51 0.47 0.04 0.85 0.15 0.4 0.51 0.0 0.29 0.29 0.07 0.14 0.77 0.1 0.31 0.95 0.52 0.48 0.24 0.71 0.27 0.93 0.77 0.04 0.92 0.08 0.92 0.68 0.32 0.15 0.77 0.63 0.73 0.14 0.83 0.76 0.96 0.72 0.57 0.92 0.35 0.62 0.21 0.46 0.66 0.89 0.52 0.35 0.71 0.0 0.78 0.51 0.34 0.05 0.57 0.34 0.54 0.57 0.81 0.88 0.61 0.53 0.98 0.26 0.34 0.57 0.94 0.09 0.94 0.15 0.81 0.15 0.83 0.83 0.73 0.33 0.69 0.89 0.46 0.96 0.12 0.82 0.89 0.45 0.26 0.84 0.48 0.51 0.43 0.12 0.74 0.32 0.19 0.8 0.04 0.61 0.63 0.23 0.22 0.7 0.14 0.63 0.35 0.89 0.4 0.1 0.1 0.56 0.98 0.7 0.41 0.78 0.14 0.04 0.97 0.32 0.66 0.54 0.66 0.8 0.86 0.36 0.99 0.01 0.41 0.62 0.81 0.14 0.84 0.49 0.3 0.4 0.13 0.2 0.05 0.29 0.11 0.75 0.87 0.71 0.25 0.43 0.67 0.49 0.2 0.77 0.85 0.32 0.94 0.51 0.95 0.54 0.22 0.7 0.97 0.71 0.24 0.88 0.9 0.61 0.99 0.57 0.25 0.01 0.09 0.83 0.83 0.89 0.58 0.95 0.86 0.06 0.88 0.27 0.12 0.7 0.17 0.23 0.43 0.61 0.51 0.65 0.02 0.19 0.61 0.69 0.14 0.89 0.3 0.86 0.55 0.06 0.46 0.78 0.82 0.34 0.63 0.38 0.12 0.15 0.45 0.93 0.08 0.54 0.94 0.64 0.74 0.4 0.23 0.18 0.27 0.44 0.6 0.82 0.19 0.13 0.48 0.19 0.99 0.66 0.69 0.86 0.47 0.15 0.94 0.53 0.07 0.61 0.44 0.62 0.85 0.16 0.66 0.58 0.63 0.55 0.38 0.02 0.68 0.91 0.89 0.63 0.25 0.58 0.93 0.52 0.7 0.64 0.81 0.47 0.21 0.18 0.17 0.78 0.46 0.31 0.2 0.31 0.37 0.66 0.46 0.11 1.0 0.21 0.39 0.12 0.36 0.83 0.52 0.76 0.23 0.62 0.17 0.21 0.07 0.78 0.12 0.59 0.76 0.33 0.49 0.13 0.67 0.44 0.92 0.84 0.18 0.73 0.81 0.68 0.27 0.28 0.14 0.23 0.98 0.07 0.34 0.2 0.78 0.44 0.27 0.7 0.88 0.28 0.96 0.07 0.33 0.65 0.9 0.99 0.75 0.32 0.68 0.54 0.57 0.28 0.57 0.96 0.91 0.0 0.0 0.32 0.66 0.08 0.7 0.14 0.88 0.91 0.85 0.17 0.91 0.31 0.47 0.69 0.41 0.8 0.08 0.59 0.66 0.79 0.82 0.28 0.11 0.05 0.11 0.61 0.66 0.25 0.32 0.53 0.8 0.11 0.5 0.6 0.73 0.31 0.11 0.2 1.0 0.79 0.88 0.77 0.37 0.51 0.25 0.89 0.79 0.8 0.79 0.96 0.45 0.36 0.14 0.64 0.85 0.75 0.23 0.64 0.23 0.64 0.41 0.76 0.78 0.13 0.37 0.48 0.61 0.32 0.58 0.98 0.58 0.27 0.06 0.78 0.05 0.56 0.14 0.57 0.2 0.68 0.61 0.58 0.36 0.39 0.99 0.63 0.12 0.82 0.05 0.54 0.96 0.27 0.2 0.94 0.03 0.55 0.9 0.47 0.61 0.83 0.72 0.9 0.94 0.53 0.11 0.57 0.96 0.64 0.35 0.81 0.72 0.59 0.45 0.85 0.98 0.44 0.08 0.12 0.5 0.17 0.31 0.8 0.49 0.13 0.63 0.83 0.32 0.22 0.13 0.76 0.18 0.4 0.81 0.65 0.02 0.94 0.39 0.0 0.58 0.96 0.93 0.33 0.22 0.12 0.78 0.22 0.65 0.82 0.83 0.79 0.09 0.86 0.55 0.16 0.95 0.76 0.22 0.06 0.21 0.58 0.63 0.31 0.21 0.99 0.19 0.13 0.68 0.33 0.82 0.91 0.42 0.37 0.55 0.66 0.29 0.36 0.75 0.62 1.0 0.71 0.21 0.17 0.73 0.23 0.6 0.99 0.85 0.22 0.58 0.4 0.97 0.46 0.69 0.19 0.78 0.26 0.0 0.74 0.43 0.17 0.05 0.74 0.46 0.23 0.64 0.13 0.47 0.14 0.54 0.48 0.88 0.64 0.23 0.48 0.82 0.81 0.56 0.99 0.07 0.07 0.53 0.74 0.67 0.52 0.66 0.14 0.52 0.46 0.85 0.44 0.05 0.13 0.56 0.38 0.57 0.15 0.84 0.99 0.97 0.0 0.12 0.07 0.79 0.29 0.02 0.54 0.39 0.26 0.28 0.44 0.88 0.62 0.63 0.16 0.67 0.66 0.03 0.97 0.83 0.95 0.84 0.95 0.56 0.67 0.38 0.71 0.16 0.43 0.29 0.34 0.71 0.44 0.63 0.7 0.11 0.72 0.23 0.94 0.02 0.33 0.33 0.92 0.35 0.31 0.17 0.36 0.91 0.75 0.1 0.65 0.83 0.79 0.58 0.43 0.8 0.19 0.64 0.3 0.57 0.01 0.41 0.9 0.46 0.31 0.88 0.19 0.02 0.75 0.07 0.45 0.18 0.25 0.01 0.97 0.75 0.64 0.23 0.34 0.07 0.21 0.22 0.02 0.92 0.02 0.69 0.1 0.86 0.05 0.02 0.81 0.96 0.85 0.13 0.55 0.99 0.49 0.89 0.13 0.52 0.91 0.69 0.97 0.95 0.81 0.12 0.92 0.44 0.89 0.57 0.47 0.47 0.78 0.12 0.26 0.24 0.44 0.74 0.43 0.06 0.32 0.89 0.03 0.64 0.18 0.22 0.25 0.14 0.24 0.72 0.96 0.72 0.96 0.52 0.7 0.66 0.88 0.25 0.91 0.14 0.52 0.7 0.56 0.59 0.43 0.21 0.8 0.67 0.33 0.63 0.55 0.55 0.92 0.16 0.31 0.61 0.29 0.9 0.06 0.69 0.89 0.12 0.58 0.74 0.83 0.8 0.14 0.04 0.69 0.28 0.62 0.77 0.11 0.62 0.18 0.59 0.17 0.58 0.1 0.08 0.61 0.46 0.2 0.6 0.94 0.65 0.1 0.47 0.35 0.51 0.8 0.2 0.06 0.86 1.0 0.73 0.43 0.41 0.88 0.46 0.83 0.5 0.15 0.22 0.85 0.79 0.5 0.67 0.99 0.89 0.75 0.82 0.07 0.45 0.54 0.82 0.34 0.01 0.97 0.41 0.53 0.18 0.56 0.02 0.63 0.64 0.21 0.84 0.25 0.41 0.46 0.73 0.91 0.71 0.16 0.01 0.09 0.95 0.7 0.45 0.86 0.9 0.04 0.98 0.66 0.93 0.58 0.37 0.62 0.73 0.37 0.3 0.71 0.95 0.41 0.79 0.45 0.71 0.57 0.24 0.43 0.07 0.85 0.53 0.57 0.58 0.45 0.82 0.92 0.17 0.23 0.29 0.62 0.03 0.36 0.68 0.5 0.69 0.07 0.07 0.36 0.94 0.06 0.4 0.93 0.48 0.17 0.78 0.66 0.45 0.82 0.93 0.99 0.51 0.19 0.32 0.47 0.69 0.19 0.35 0.19 0.62 0.34 0.52 0.42 0.76 0.05 0.9 0.53 0.59 0.52 0.43 0.73 0.43 0.37 0.09 0.47 0.59 0.78 0.83 0.85 0.21 0.95 0.47 0.87 0.43 0.95 0.18 0.13 0.95 0.79 0.62 0.02 0.79 0.28 0.87 0.71 0.13 0.53 0.02 0.73 0.6 0.13 0.75 0.07 0.02 0.34 0.58 0.55 0.4 0.42 0.46 0.43 0.98 0.86 0.31 0.77 0.64 0.97 0.6 0.91 0.94 0.9 0.34 0.78 0.0 0.49 0.17 0.86 0.47 0.3 0.62 0.33 0.86 0.62 0.65 0.36 0.4 0.08 0.67 0.92 0.76 0.87 0.61 0.41 0.3 0.65 0.25 0.37 0.3 0.57 0.77 0.64 0.1 0.3 0.6 0.52 0.45 0.1 0.02 0.83 0.57 0.41 0.46 0.55 0.41 0.77 0.39 0.03 0.0 0.9 0.42 0.22 0.73 0.48 0.94 0.15 0.14 0.32 0.65 0.6 0.03 0.64 0.15 0.42 0.96 0.41 0.53 0.43 0.3 0.76 0.93 0.32 0.53 0.62 0.31 0.54 0.2 0.66 0.68 0.39 0.01 0.99 0.25 0.71 0.19 0.52 0.93 0.96 0.68 1.0 0.4 0.66 0.64 0.09 0.28 0.47 0.01 0.99 0.36 0.09 0.57 0.79 0.41 0.35 0.3 0.5 0.28 0.71 0.27 0.13 0.06 0.46 0.39 0.37 0.88 0.99 0.3 0.09 0.01 0.98 0.74 0.12 0.01 0.15 0.64 0.68 0.27 0.09 0.89 0.3 0.64 0.34 0.44 0.71 0.01 0.0 0.33 0.12 0.05 0.74 0.81 0.49 0.45 0.94 0.86 0.58 0.56 0.07 0.91 0.54 0.64 0.82 0.17 0.69 0.7 0.99 0.35 0.62 0.6 0.93 0.38 0.32 0.01 0.79 0.62 0.97 0.74 0.71 0.54 0.08 0.01 0.09 0.95 0.53 0.52 0.15 0.18 0.38 0.71 0.57 0.2 0.87 1.0 0.43 0.93 0.49 0.65 0.42 0.29 0.63 0.53 0.34 0.84 0.23 0.38 0.51 0.88 0.07 0.17 0.9 0.13 0.83 0.54 0.54 0.07 0.49 0.83 0.94 0.04 0.79 0.18 0.46 0.51 0.73 0.68 0.04 0.89 0.4 0.16 0.9 0.36 0.73 0.36 0.39 0.42 0.03 0.6 0.85 0.2 0.88 0.64 0.07 0.04 0.58 0.11 0.36 0.19 0.12 0.74 0.54 0.65 0.37 0.31 0.78 0.94 0.02 0.56 0.72 0.18 0.03 0.12 0.3 0.55 0.74 0.22 0.14 0.42 0.23 0.71 0.78 0.66 0.82 0.12 0.83 0.73 0.7 0.22 0.89 0.81 0.34 0.61 0.2 0.68 0.22 0.84 0.03 0.99 0.06 0.23 0.68 0.71 0.41 0.97 0.04 0.78 0.88 0.8 0.72 0.63 0.68 0.94 0.58 0.07 0.53 0.51 0.04 0.45 0.19 0.05 0.23 0.67 0.13 0.41 0.62 0.18 0.01 0.34 0.91 0.88 0.21 0.71 0.47 0.61 0.51 0.65 0.95 0.33 0.0 0.16 0.56 0.21 0.06 0.06 0.06 0.8 0.39 0.83 0.29 0.04 0.74 0.27 0.25 0.35 0.78 0.44 0.23 0.95 0.97 0.89 0.83 0.85 0.41 0.95 0.69 0.09 0.91 0.63 0.96 0.76 0.16 0.75 0.41 0.83 0.63 0.83 0.86 0.82 0.04 0.32 0.3 0.21 0.39 0.48 0.8 0.21 0.4 0.96 0.71 0.63 0.54 0.95 0.81 0.11 0.83 0.63 0.41 0.33 0.32 0.58 0.72 0.82 0.73 0.01 0.5 0.93 0.69 0.91 0.44 0.18 0.28 0.61 0.5 0.98 0.93 0.91 0.72 0.59 0.63 0.03 0.82 0.62 0.07 0.51 0.53 0.89 0.47 0.04 0.08 0.17 0.2 0.88 0.78 0.93 0.71 0.24 0.22 0.32 0.87 0.03 0.01 0.85 0.77 0.82 0.64 0.2 0.83 0.88 0.23 0.44 0.72 0.2 0.98 0.11 0.46 0.59 0.3 0.82 0.01 0.66 0.8 0.91 0.0 0.86 0.84 0.56 0.49 0.22 0.27 0.02 0.62 0.55 0.62 0.79 0.94 0.89 0.56 0.87 0.96 0.43 0.58 0.63 0.22 0.37 0.44 0.85 0.28 0.25 0.4 0.34 0.14 0.8 0.84 0.89 0.06 0.45 0.02 0.07 0.85 0.43 0.13 0.21 0.21 0.05 0.23 0.85 0.44 0.8 0.52 0.39 0.65 0.67 0.64 0.79 0.3 0.01 0.3 0.11 0.02 0.96 0.05 0.44 0.06 0.01 0.77 0.19 0.06 0.31 0.48 0.97 0.64 0.92 0.76 0.07 0.77 0.95 0.98 0.63 0.25 0.27 0.76 0.96 0.24 0.18 0.8 0.0 0.96 0.24 0.52 0.59 0.65 0.17 0.32 0.55 0.59 0.62 0.82 0.59 0.29 0.42 0.12 0.24 0.02 0.66 0.59 0.78 0.37 0.19 0.96 0.18 0.2 0.99 0.76 0.58 0.35 0.54 0.89 0.14 0.58 0.1 0.97 0.38 0.82 0.48 0.06 0.83 1.0 0.99 0.77 0.41 0.08 0.87 0.75 0.13 0.52 0.58 0.68 0.03 0.92 0.55 0.04 0.56 0.63 0.28 0.8 0.39 0.68 0.58 0.01 0.23 0.28 0.98 0.96 0.05 0.28 0.44 0.31 0.91 0.81 0.18 0.65 0.53 0.02 0.41 0.98 0.09 0.12 0.84 0.6 0.17 0.2 0.58 0.35 0.25 0.74 0.83 0.55 0.18 0.8 0.33 0.04 0.56 0.85 0.22 0.83 0.48 0.53 0.54 0.51 0.06 0.76 0.1 0.43 0.21 0.46 0.97 0.48 0.77 0.11 0.36 0.9 0.52 0.06 0.23 0.8 0.09 0.11 0.57 0.59 0.76 0.44 0.15 0.46 0.07 0.86 0.01 0.49 0.05 0.54 0.14 0.29 0.01 0.81 0.45 0.45 0.12 0.82 0.47 0.93 0.51 0.04 0.26 0.14 0.5 0.06 0.25 0.62 0.95 0.07 0.28 0.32 0.03 0.28 0.45 0.86 0.24 0.22 0.78 0.63 0.4 0.33 0.56 0.26 0.41 0.63 0.73 0.73 0.35 0.44 0.67 0.03 0.07 0.68 0.86 0.35 0.58 0.75 0.16 0.37 0.87 0.66 0.59 0.67 0.46 0.64 0.78 0.97 0.45 0.98 0.64 0.41 0.58 0.51 0.97 0.95 0.9 0.34 0.1 0.76 0.37 0.05 0.57 0.72 0.91 0.4 0.43 0.78 0.78 0.39 0.3 0.21 0.88 0.36 0.54 0.87 0.84 0.19 0.22 0.89 0.89 0.85 0.77 0.86 0.46 0.5 0.88 0.18 0.4 0.61 0.07 0.06 0.65 0.05 0.31 0.55 0.87 0.05 0.54 0.28 0.28 0.35 0.1 0.55 0.82 0.86 0.12 0.17 0.69 0.74 0.13 0.08 0.6 0.4 0.97 0.32 0.81 0.14 0.97 0.65 0.72 0.32 0.57 0.69 0.74 0.65 0.75 0.37 0.88 0.97 0.88 0.7 0.98 0.36 0.1 0.35 0.15 0.23 0.09 0.3 1.0 0.21 0.99 0.44 0.23 0.21 0.15 0.43 0.77 0.17 0.32 0.55 0.8 0.08 0.72 0.49 0.31 0.39 0.48 0.29 0.78 0.64 0.04 0.11 0.69 0.76 0.9 0.79 0.32 0.03 0.68 0.67 0.35 0.55 0.01 0.03 0.22 0.31 0.3 0.28 0.14 0.01 0.73 0.86 0.67 0.06 0.45 0.32 0.78 0.22 0.84 0.19 0.29 0.8 0.61 0.23 0.71 0.94 0.04 0.86 0.87 0.88 0.65 0.04 0.93 0.1 0.73 0.38 0.88 0.8 0.54 0.62 0.2 0.76 0.66 0.46 0.0 0.32 0.38 0.92 0.85 0.84 0.9 0.85 0.08 0.32 0.98 0.57 0.72 0.48 0.86 0.23 1.0 0.56 0.48 0.13 0.61 0.46 0.38 0.58 0.06 0.95 0.37 0.94 0.11 0.44 0.53 0.26 0.98 0.67 0.28 0.65 0.28 0.48 0.52 0.58 0.01 0.1 0.03 0.29 0.14 0.33 0.5 0.98 0.99 0.68 0.28 0.12 0.6 0.65 0.77 0.69 0.66 0.5 0.76 0.79 0.79 0.64 0.67 0.35 0.78 0.71 0.47 0.5 0.79 0.69 0.13 0.18 0.89 0.29 0.79 0.92 0.54,1.2,100,10

requirements.txt ADDED Viewed

	@@ -0,0 +1,29 @@

+-f https://download.pytorch.org/whl/cpu/torch_stable.html
+-f https://data.pyg.org/whl/torch-1.12.1+cpu.html
+# pip==20.2.4
+torch==1.12.1
+torch-scatter
+torch-spline-conv
+torch-sparse
+torch-geometric
+torchvision==0.13.1
+torchaudio==0.12.1
+gt4sd>=1.0.5
+molgx>=0.22.0a1
+molecule_generation
+nglview
+PyTDC==0.3.7
+gradio==3.12.0
+markdown-it-py>=2.1.0
+mols2grid>=0.2.0
+numpy==1.23.5
+pandas>=1.0.0
+terminator @ git+https://github.com/IBM/regression-transformer@gt4sd
+guacamol_baselines @ git+https://github.com/GT4SD/guacamol_baselines.git@v0.0.2
+moses @ git+https://github.com/GT4SD/moses.git@v0.1.0
+paccmann_chemistry @ git+https://github.com/PaccMann/paccmann_chemistry@0.0.4
+paccmann_generator @ git+https://github.com/PaccMann/paccmann_generator@0.0.2
+paccmann_gp @ git+https://github.com/PaccMann/paccmann_gp@0.1.1
+paccmann_omics @ git+https://github.com/PaccMann/paccmann_omics@0.0.1.1
+paccmann_predictor @ git+https://github.com/PaccMann/paccmann_predictor@sarscov2
+reinvent_models @ git+https://github.com/GT4SD/reinvent_models@v0.0.1

utils.py ADDED Viewed

	@@ -0,0 +1,48 @@

+import logging
+from collections import defaultdict
+from typing import List
+import mols2grid
+import pandas as pd
+logger = logging.getLogger(__name__)
+logger.addHandler(logging.NullHandler())
+def draw_grid_generate(
+    samples: List[str],
+    seeds: List[str] = [],
+    n_cols: int = 3,
+    size=(140, 200),
+) -> str:
+    """
+    Uses mols2grid to draw a HTML grid for the generated molecules
+    Args:
+        samples: The generated samples.
+        n_cols: Number of columns in grid. Defaults to 5.
+        size: Size of molecule in grid. Defaults to (140, 200).
+    Returns:
+        HTML to display
+    """
+    result = defaultdict(list)
+    result.update(
+        {
+            "SMILES": seeds + samples,
+            "Name": [f"Seed_{i}" for i in range(len(seeds))]
+            + [f"Generated_{i}" for i in range(len(samples))],
+        },
+    )
+    result_df = pd.DataFrame(result)
+    obj = mols2grid.display(
+        result_df,
+        tooltip=list(result.keys()),
+        height=1100,
+        n_cols=n_cols,
+        name="Results",
+        size=size,
+    )
+    return obj.data