Spaces:
Sleeping
Sleeping
jannisborn
commited on
Commit
•
e83e5dc
1
Parent(s):
5984d9a
update
Browse files- README.md +1 -1
- app.py +85 -57
- model_cards/article.md +40 -47
- model_cards/description.md +1 -4
- model_cards/examples.csv +1 -3
- utils.py +32 -7
README.md
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
---
|
2 |
-
title: GT4SD -
|
3 |
emoji: 💡
|
4 |
colorFrom: green
|
5 |
colorTo: blue
|
|
|
1 |
---
|
2 |
+
title: GT4SD - PaccMannGP
|
3 |
emoji: 💡
|
4 |
colorFrom: green
|
5 |
colorTo: blue
|
app.py
CHANGED
@@ -3,14 +3,15 @@ import pathlib
|
|
3 |
from typing import List
|
4 |
|
5 |
import gradio as gr
|
6 |
-
import numpy as np
|
7 |
import pandas as pd
|
8 |
-
from gt4sd.algorithms.
|
9 |
-
|
10 |
-
|
11 |
-
PaccMannRLProteinBasedGenerator,
|
12 |
)
|
13 |
-
from gt4sd.algorithms.
|
|
|
|
|
|
|
14 |
from gt4sd.algorithms.registry import ApplicationsRegistry
|
15 |
|
16 |
from utils import draw_grid_generate
|
@@ -19,53 +20,57 @@ logger = logging.getLogger(__name__)
|
|
19 |
logger.addHandler(logging.NullHandler())
|
20 |
|
21 |
|
|
|
|
|
|
|
22 |
def run_inference(
|
23 |
algorithm_version: str,
|
24 |
-
|
25 |
protein_target: str,
|
26 |
-
omics_target: str,
|
27 |
temperature: float,
|
28 |
length: float,
|
29 |
number_of_samples: int,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
30 |
):
|
31 |
-
if inference_type == "Unbiased":
|
32 |
-
algorithm_class = PaccMannVAEGenerator
|
33 |
-
model_class = PaccMannVAE
|
34 |
-
target = None
|
35 |
-
elif inference_type == "Conditional":
|
36 |
-
if "Protein" in algorithm_version:
|
37 |
-
algorithm_class = PaccMannRLProteinBasedGenerator
|
38 |
-
target = protein_target
|
39 |
-
elif "Omic" in algorithm_version:
|
40 |
-
algorithm_class = PaccMannRLOmicBasedGenerator
|
41 |
-
try:
|
42 |
-
test_target = [float(x) for x in omics_target.split(" ")]
|
43 |
-
except Exception:
|
44 |
-
raise ValueError(
|
45 |
-
f"Expected 2128 space-separated omics values, got {omics_target}"
|
46 |
-
)
|
47 |
-
if len(test_target) != 2128:
|
48 |
-
raise ValueError(
|
49 |
-
f"Expected 2128 omics values, got {len(target)}: {target}"
|
50 |
-
)
|
51 |
-
target = f"[{omics_target.replace(' ', ',')}]"
|
52 |
-
else:
|
53 |
-
raise ValueError(f"Unknown algorithm version {algorithm_version}")
|
54 |
-
model_class = PaccMannRL
|
55 |
-
else:
|
56 |
-
raise ValueError(f"Unknown inference type {inference_type}")
|
57 |
|
58 |
-
config =
|
59 |
-
algorithm_version.split("_")[-1],
|
|
|
60 |
temperature=temperature,
|
61 |
generated_length=length,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
62 |
)
|
63 |
-
|
64 |
-
|
65 |
-
|
|
|
|
|
|
|
|
|
66 |
samples = list(model.sample(number_of_samples))
|
67 |
|
68 |
-
return draw_grid_generate(
|
|
|
|
|
|
|
|
|
|
|
69 |
|
70 |
|
71 |
if __name__ == "__main__":
|
@@ -73,18 +78,17 @@ if __name__ == "__main__":
|
|
73 |
# Preparation (retrieve all available algorithms)
|
74 |
all_algos = ApplicationsRegistry.list_available()
|
75 |
algos = [
|
76 |
-
x["
|
77 |
-
+ "_"
|
78 |
-
+ x["algorithm_version"]
|
79 |
for x in list(filter(lambda x: "PaccMannRL" in x["algorithm_name"], all_algos))
|
80 |
]
|
81 |
|
82 |
# Load metadata
|
83 |
metadata_root = pathlib.Path(__file__).parent.joinpath("model_cards")
|
84 |
|
85 |
-
examples = pd.read_csv(
|
86 |
-
""
|
87 |
-
)
|
|
|
88 |
|
89 |
with open(metadata_root.joinpath("article.md"), "r") as f:
|
90 |
article = f.read()
|
@@ -93,24 +97,20 @@ if __name__ == "__main__":
|
|
93 |
|
94 |
demo = gr.Interface(
|
95 |
fn=run_inference,
|
96 |
-
title="
|
97 |
inputs=[
|
98 |
-
gr.Dropdown(algos, label="Algorithm version", value="
|
99 |
-
gr.
|
100 |
-
choices=
|
101 |
-
|
102 |
-
|
|
|
103 |
),
|
104 |
gr.Textbox(
|
105 |
label="Protein target",
|
106 |
placeholder="MVLSPADKTNVKAAWGKVGAHAGEYGAEALERMFLSFPTT",
|
107 |
lines=1,
|
108 |
),
|
109 |
-
gr.Textbox(
|
110 |
-
label="Gene expression target",
|
111 |
-
placeholder=f"{' '.join(map(str, np.round(np.random.rand(2128), 2)))}",
|
112 |
-
lines=1,
|
113 |
-
),
|
114 |
gr.Slider(minimum=0.5, maximum=2, value=1, label="Decoding temperature"),
|
115 |
gr.Slider(
|
116 |
minimum=5,
|
@@ -122,6 +122,34 @@ if __name__ == "__main__":
|
|
122 |
gr.Slider(
|
123 |
minimum=1, maximum=50, value=10, label="Number of samples", step=1
|
124 |
),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
125 |
],
|
126 |
outputs=gr.HTML(label="Output"),
|
127 |
article=article,
|
|
|
3 |
from typing import List
|
4 |
|
5 |
import gradio as gr
|
|
|
6 |
import pandas as pd
|
7 |
+
from gt4sd.algorithms.controlled_sampling.paccmann_gp import (
|
8 |
+
PaccMannGPGenerator,
|
9 |
+
PaccMannGP,
|
|
|
10 |
)
|
11 |
+
from gt4sd.algorithms.controlled_sampling.paccmann_gp.implementation import (
|
12 |
+
MINIMIZATION_FUNCTIONS,
|
13 |
+
)
|
14 |
+
|
15 |
from gt4sd.algorithms.registry import ApplicationsRegistry
|
16 |
|
17 |
from utils import draw_grid_generate
|
|
|
20 |
logger.addHandler(logging.NullHandler())
|
21 |
|
22 |
|
23 |
+
MINIMIZATION_FUNCTIONS.pop("callable", None)
|
24 |
+
|
25 |
+
|
26 |
def run_inference(
|
27 |
algorithm_version: str,
|
28 |
+
targets: List[str],
|
29 |
protein_target: str,
|
|
|
30 |
temperature: float,
|
31 |
length: float,
|
32 |
number_of_samples: int,
|
33 |
+
limit: int,
|
34 |
+
number_of_steps: int,
|
35 |
+
number_of_initial_points: int,
|
36 |
+
number_of_optimization_rounds: int,
|
37 |
+
sampling_variance: float,
|
38 |
+
samples_for_evaluation: int,
|
39 |
+
maximum_number_of_sampling_steps: int,
|
40 |
+
seed: int,
|
41 |
):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
42 |
|
43 |
+
config = PaccMannGPGenerator(
|
44 |
+
algorithm_version=algorithm_version.split("_")[-1],
|
45 |
+
batch_size=32,
|
46 |
temperature=temperature,
|
47 |
generated_length=length,
|
48 |
+
limit=limit,
|
49 |
+
acquisition_function="EI",
|
50 |
+
number_of_steps=number_of_steps,
|
51 |
+
number_of_initial_points=number_of_initial_points,
|
52 |
+
initial_point_generator="random",
|
53 |
+
number_of_optimization_rounds=number_of_optimization_rounds,
|
54 |
+
sampling_variance=sampling_variance,
|
55 |
+
samples_for_evaluation=samples_for_evaluation,
|
56 |
+
maximum_number_of_sampling_steps=maximum_number_of_sampling_steps,
|
57 |
+
seed=seed,
|
58 |
)
|
59 |
+
target = {i: {} for i in targets}
|
60 |
+
if "affinity" in targets:
|
61 |
+
target["affinity"]["protein"] = protein_target
|
62 |
+
else:
|
63 |
+
protein_target = ""
|
64 |
+
|
65 |
+
model = PaccMannGP(config, target=target)
|
66 |
samples = list(model.sample(number_of_samples))
|
67 |
|
68 |
+
return draw_grid_generate(
|
69 |
+
samples=samples,
|
70 |
+
n_cols=5,
|
71 |
+
properties=set(target.keys()),
|
72 |
+
protein_target=protein_target,
|
73 |
+
)
|
74 |
|
75 |
|
76 |
if __name__ == "__main__":
|
|
|
78 |
# Preparation (retrieve all available algorithms)
|
79 |
all_algos = ApplicationsRegistry.list_available()
|
80 |
algos = [
|
81 |
+
x["algorithm_version"]
|
|
|
|
|
82 |
for x in list(filter(lambda x: "PaccMannRL" in x["algorithm_name"], all_algos))
|
83 |
]
|
84 |
|
85 |
# Load metadata
|
86 |
metadata_root = pathlib.Path(__file__).parent.joinpath("model_cards")
|
87 |
|
88 |
+
examples = pd.read_csv(
|
89 |
+
metadata_root.joinpath("examples.csv"), header=None, sep="|"
|
90 |
+
).fillna("")
|
91 |
+
examples[1] = examples[1].apply(eval)
|
92 |
|
93 |
with open(metadata_root.joinpath("article.md"), "r") as f:
|
94 |
article = f.read()
|
|
|
97 |
|
98 |
demo = gr.Interface(
|
99 |
fn=run_inference,
|
100 |
+
title="PaccMannGP",
|
101 |
inputs=[
|
102 |
+
gr.Dropdown(algos, label="Algorithm version", value="v0"),
|
103 |
+
gr.CheckboxGroup(
|
104 |
+
choices=list(MINIMIZATION_FUNCTIONS.keys()),
|
105 |
+
value=["qed"],
|
106 |
+
multiselect=True,
|
107 |
+
label="Property goals",
|
108 |
),
|
109 |
gr.Textbox(
|
110 |
label="Protein target",
|
111 |
placeholder="MVLSPADKTNVKAAWGKVGAHAGEYGAEALERMFLSFPTT",
|
112 |
lines=1,
|
113 |
),
|
|
|
|
|
|
|
|
|
|
|
114 |
gr.Slider(minimum=0.5, maximum=2, value=1, label="Decoding temperature"),
|
115 |
gr.Slider(
|
116 |
minimum=5,
|
|
|
122 |
gr.Slider(
|
123 |
minimum=1, maximum=50, value=10, label="Number of samples", step=1
|
124 |
),
|
125 |
+
gr.Slider(minimum=1, maximum=8, value=4.0, label="Limit"),
|
126 |
+
gr.Slider(minimum=1, maximum=32, value=8, label="Number of steps", step=1),
|
127 |
+
gr.Slider(
|
128 |
+
minimum=1, maximum=32, value=4, label="Number of initial points", step=1
|
129 |
+
),
|
130 |
+
gr.Slider(
|
131 |
+
minimum=1,
|
132 |
+
maximum=4,
|
133 |
+
value=1,
|
134 |
+
label="Number of optimization rounds",
|
135 |
+
step=1,
|
136 |
+
),
|
137 |
+
gr.Slider(minimum=0.01, maximum=1, value=0.1, label="Sampling variance"),
|
138 |
+
gr.Slider(
|
139 |
+
minimum=1,
|
140 |
+
maximum=10,
|
141 |
+
value=1,
|
142 |
+
label="Samples used for evaluation",
|
143 |
+
step=1,
|
144 |
+
),
|
145 |
+
gr.Slider(
|
146 |
+
minimum=1,
|
147 |
+
maximum=64,
|
148 |
+
value=4,
|
149 |
+
label="Maximum number of sampling steps",
|
150 |
+
step=1,
|
151 |
+
),
|
152 |
+
gr.Number(value=42, label="Seed", precision=0),
|
153 |
],
|
154 |
outputs=gr.HTML(label="Output"),
|
155 |
article=article,
|
model_cards/article.md
CHANGED
@@ -1,12 +1,10 @@
|
|
1 |
# Model documentation & parameters
|
2 |
|
3 |
-
**Algorithm Version**: Which model version
|
4 |
|
5 |
-
**
|
6 |
|
7 |
-
**Protein target**: An AAS of a protein target used for conditioning.
|
8 |
-
|
9 |
-
**Gene expression target**: A list of 2128 floats, representing the embedding of gene expression profile to be used for conditioning. Only use if `Inference type` is `Conditional` and if the `Algorithm version` is a Omic model.
|
10 |
|
11 |
**Decoding temperature**: The temperature parameter in the SMILES/SELFIES decoder. Higher values lead to more explorative choices, smaller values culminate in mode collapse.
|
12 |
|
@@ -14,30 +12,43 @@
|
|
14 |
|
15 |
**Number of samples**: How many samples should be generated (between 1 and 50).
|
16 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
|
18 |
-
|
19 |
|
20 |
-
|
|
|
|
|
|
|
|
|
21 |
|
22 |
**Developers**: Jannis Born, Matteo Manica and colleagues from IBM Research.
|
23 |
|
24 |
**Distributors**: Original authors' code wrapped and distributed by GT4SD Team (2023) from IBM Research.
|
25 |
|
26 |
-
**Model date**: Published in
|
27 |
|
28 |
-
**Model version**:
|
29 |
-
- **Protein_v0**: Molecular decoder pretrained on 1.5M molecules from ChEMBL. Protein encoder pretrained on 404k proteins from UniProt. Encoder and decoder finetuned on 41 SARS-CoV-2-related protein targets with a binding affinity predictor trained on BindingDB.
|
30 |
-
- **Omic_v0**: Molecular decoder pretrained on 1.5M molecules from ChEMBL. Gene expression encoder pretrained on 12k gene expression profiles from TCGA. Encoder and decoder finetuned on a few hundred cancer cell profiles from GDSC with a IC50 predictor trained on GDSC.
|
31 |
|
32 |
-
**Model type**: A language-based molecular generative model that can be
|
33 |
|
34 |
**Information about training algorithms, parameters, fairness constraints or other applied approaches, and features**:
|
35 |
-
|
36 |
-
- **Omics**: Parameters as provided on [(GitHub repo)](https://github.com/PaccMann/paccmann_rl).
|
37 |
|
38 |
**Paper or other resource for more information**:
|
39 |
-
|
40 |
-
- **Omics**: [Data-driven molecular design for discovery and synthesis of novel ligands: a case study on SARS-CoV-2 (2021; *Machine Learning: Science and Technology*)](https://iopscience.iop.org/article/10.1088/2632-2153/abe808/meta).
|
41 |
|
42 |
**License**: MIT
|
43 |
|
@@ -51,9 +62,9 @@
|
|
51 |
|
52 |
**Factors**: Not applicable.
|
53 |
|
54 |
-
**Metrics**: High reward on generating molecules with
|
55 |
|
56 |
-
**Datasets**: ChEMBL
|
57 |
|
58 |
**Ethical Considerations**: Unclear, please consult with original authors in case of questions.
|
59 |
|
@@ -62,35 +73,17 @@
|
|
62 |
Model card prototype inspired by [Mitchell et al. (2019)](https://dl.acm.org/doi/abs/10.1145/3287560.3287596?casa_token=XD4eHiE2cRUAAAAA:NL11gMa1hGPOUKTAbtXnbVQBDBbjxwcjGECF_i-WC_3g1aBgU1Hbz_f2b4kI_m1in-w__1ztGeHnwHs)
|
63 |
|
64 |
## Citation
|
65 |
-
|
66 |
-
**Omics**:
|
67 |
-
```bib
|
68 |
-
@article{born2021paccmannrl,
|
69 |
-
title = {PaccMann\textsuperscript{RL}: De novo generation of hit-like anticancer molecules from transcriptomic data via reinforcement learning},
|
70 |
-
journal = {iScience},
|
71 |
-
volume = {24},
|
72 |
-
number = {4},
|
73 |
-
pages = {102269},
|
74 |
-
year = {2021},
|
75 |
-
issn = {2589-0042},
|
76 |
-
doi = {https://doi.org/10.1016/j.isci.2021.102269},
|
77 |
-
url = {https://www.cell.com/iscience/fulltext/S2589-0042(21)00237-6},
|
78 |
-
author = {Born, Jannis and Manica, Matteo and Oskooei, Ali and Cadow, Joris and Markert, Greta and {Rodr{\'{i}}guez Mart{\'{i}}nez}, Mar{\'{i}}a}
|
79 |
-
}
|
80 |
-
```
|
81 |
-
|
82 |
-
**Proteins**:
|
83 |
```bib
|
84 |
-
@article{
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
}
|
96 |
```
|
|
|
1 |
# Model documentation & parameters
|
2 |
|
3 |
+
**Algorithm Version**: Which model version to use.
|
4 |
|
5 |
+
**Property goals**: One or multiple properties that will be optimized.
|
6 |
|
7 |
+
**Protein target**: An AAS of a protein target used for conditioning. Leave blank unless you use `affinity` as a `property goal`.
|
|
|
|
|
8 |
|
9 |
**Decoding temperature**: The temperature parameter in the SMILES/SELFIES decoder. Higher values lead to more explorative choices, smaller values culminate in mode collapse.
|
10 |
|
|
|
12 |
|
13 |
**Number of samples**: How many samples should be generated (between 1 and 50).
|
14 |
|
15 |
+
**Limit**: Hypercube limits in the latent space.
|
16 |
+
|
17 |
+
**Number of steps**: Number of steps for a GP optmization round. The longer the slower. Has to be at least `Number of initial points`.
|
18 |
+
|
19 |
+
**Number of initial points**: Number of initial points evaluated. The longer the slower.
|
20 |
+
|
21 |
+
**Number of optimization rounds**: Maximum number of optimization rounds.
|
22 |
+
|
23 |
+
**Sampling variance**: Variance of the Gaussian noise applied during sampling from the optimal point.
|
24 |
+
|
25 |
+
**Samples for evaluation**: Number of samples averaged for each minimization function evaluation.
|
26 |
+
|
27 |
+
**Max. sampling steps**: Maximum number of sampling steps in an optmization round.
|
28 |
|
29 |
+
**Seed**: The random seed used for initialization.
|
30 |
|
31 |
+
|
32 |
+
|
33 |
+
# Model card -- PaccMannGP
|
34 |
+
|
35 |
+
**Model Details**: [PaccMann<sup>GP</sup>](https://github.com/PaccMann/paccmann_gp) is a language-based Variational Autoencoder that is coupled with a GaussianProcess for controlled sampling. This model systematically explores the latent space of a trained molecular VAE.
|
36 |
|
37 |
**Developers**: Jannis Born, Matteo Manica and colleagues from IBM Research.
|
38 |
|
39 |
**Distributors**: Original authors' code wrapped and distributed by GT4SD Team (2023) from IBM Research.
|
40 |
|
41 |
+
**Model date**: Published in 2022.
|
42 |
|
43 |
+
**Model version**: A molecular VAE trained on 1.5M molecules from ChEMBL.
|
|
|
|
|
44 |
|
45 |
+
**Model type**: A language-based molecular generative model that can be explored with Gaussian Processes to generate molecules with desired properties.
|
46 |
|
47 |
**Information about training algorithms, parameters, fairness constraints or other applied approaches, and features**:
|
48 |
+
Described in the [original paper](https://pubs.acs.org/doi/10.1021/acs.jcim.1c00889).
|
|
|
49 |
|
50 |
**Paper or other resource for more information**:
|
51 |
+
[Active Site Sequence Representations of Human Kinases Outperform Full Sequence Representations for Affinity Prediction and Inhibitor Generation: 3D Effects in a 1D Model (2022; *Journal of Chemical Information & Modeling*)](https://pubs.acs.org/doi/10.1021/acs.jcim.1c00889).
|
|
|
52 |
|
53 |
**License**: MIT
|
54 |
|
|
|
62 |
|
63 |
**Factors**: Not applicable.
|
64 |
|
65 |
+
**Metrics**: High reward on generating molecules with desired properties.
|
66 |
|
67 |
+
**Datasets**: ChEMBL.
|
68 |
|
69 |
**Ethical Considerations**: Unclear, please consult with original authors in case of questions.
|
70 |
|
|
|
73 |
Model card prototype inspired by [Mitchell et al. (2019)](https://dl.acm.org/doi/abs/10.1145/3287560.3287596?casa_token=XD4eHiE2cRUAAAAA:NL11gMa1hGPOUKTAbtXnbVQBDBbjxwcjGECF_i-WC_3g1aBgU1Hbz_f2b4kI_m1in-w__1ztGeHnwHs)
|
74 |
|
75 |
## Citation
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
76 |
```bib
|
77 |
+
@article{born2022active,
|
78 |
+
author = {Born, Jannis and Huynh, Tien and Stroobants, Astrid and Cornell, Wendy D. and Manica, Matteo},
|
79 |
+
title = {Active Site Sequence Representations of Human Kinases Outperform Full Sequence Representations for Affinity Prediction and Inhibitor Generation: 3D Effects in a 1D Model},
|
80 |
+
journal = {Journal of Chemical Information and Modeling},
|
81 |
+
volume = {62},
|
82 |
+
number = {2},
|
83 |
+
pages = {240-257},
|
84 |
+
year = {2022},
|
85 |
+
doi = {10.1021/acs.jcim.1c00889},
|
86 |
+
note ={PMID: 34905358},
|
87 |
+
URL = {https://doi.org/10.1021/acs.jcim.1c00889}
|
88 |
}
|
89 |
```
|
model_cards/description.md
CHANGED
@@ -1,9 +1,6 @@
|
|
1 |
<img align="right" src="https://raw.githubusercontent.com/GT4SD/gt4sd-core/main/docs/_static/gt4sd_logo.png" alt="logo" width="120" >
|
2 |
|
3 |
-
[PaccMann<sup>
|
4 |
-
- [Born et al., (2021), *iScience*](https://www.cell.com/iscience/fulltext/S2589-0042(21)00237-6) for the model conditionable on gene expression profiles.
|
5 |
-
- [Born et al., (2021), *Machine Learning: Science & Technology*](https://iopscience.iop.org/article/10.1088/2632-2153/abe808/meta) for the model conditionable on protein targets.
|
6 |
-
|
7 |
|
8 |
For **examples** and **documentation** of the model parameters, please see below.
|
9 |
Moreover, we provide a **model card** ([Mitchell et al. (2019)](https://dl.acm.org/doi/abs/10.1145/3287560.3287596?casa_token=XD4eHiE2cRUAAAAA:NL11gMa1hGPOUKTAbtXnbVQBDBbjxwcjGECF_i-WC_3g1aBgU1Hbz_f2b4kI_m1in-w__1ztGeHnwHs)) at the bottom of this page.
|
|
|
1 |
<img align="right" src="https://raw.githubusercontent.com/GT4SD/gt4sd-core/main/docs/_static/gt4sd_logo.png" alt="logo" width="120" >
|
2 |
|
3 |
+
[PaccMann<sup>GP</sup>](https://github.com/PaccMann/paccmann_gp) is a language-based Variational Autoencoder that is coupled with a GaussianProcess for controlled sampling. For details of the methodology, please see [Born et al., (2022), *Journal of Chemical Information & Modeling*](https://pubs.acs.org/doi/10.1021/acs.jcim.1c00889).
|
|
|
|
|
|
|
4 |
|
5 |
For **examples** and **documentation** of the model parameters, please see below.
|
6 |
Moreover, we provide a **model card** ([Mitchell et al. (2019)](https://dl.acm.org/doi/abs/10.1145/3287560.3287596?casa_token=XD4eHiE2cRUAAAAA:NL11gMa1hGPOUKTAbtXnbVQBDBbjxwcjGECF_i-WC_3g1aBgU1Hbz_f2b4kI_m1in-w__1ztGeHnwHs)) at the bottom of this page.
|
model_cards/examples.csv
CHANGED
@@ -1,3 +1 @@
|
|
1 |
-
|
2 |
-
Protein_v0,Unbiased,,,1.4,250,10
|
3 |
-
Omic_v0,Conditional,,0.08 0.9 0.47 0.91 0.7 0.88 0.95 0.37 0.72 0.42 0.63 0.77 0.65 0.83 0.48 0.31 0.36 0.33 0.64 0.33 1.0 0.82 0.49 0.98 0.96 0.86 0.1 0.92 0.13 0.41 0.88 0.79 0.88 0.01 0.3 0.98 0.91 0.83 0.06 0.77 0.56 0.87 0.78 0.27 0.97 0.14 0.71 0.1 0.08 0.63 0.53 0.6 0.66 0.04 0.46 0.6 0.59 0.36 0.65 0.57 0.96 0.42 0.37 0.18 0.71 0.5 0.54 0.22 0.21 0.53 0.66 0.9 0.4 0.95 0.48 0.81 0.47 0.27 0.56 0.77 0.32 0.66 0.01 0.82 0.29 0.81 0.7 0.77 0.65 0.36 0.78 0.31 0.85 0.69 0.12 0.04 0.39 0.11 0.13 0.15 0.35 0.97 0.66 0.35 0.78 0.33 0.48 0.8 0.26 0.05 0.69 0.07 0.92 0.22 0.35 0.13 0.22 0.94 0.73 0.81 0.29 0.3 0.13 0.06 0.9 0.62 0.19 0.69 0.72 0.55 0.34 0.26 0.72 0.95 0.81 0.78 0.5 0.47 0.67 0.49 0.48 0.75 0.52 0.91 0.42 0.62 0.8 0.17 1.0 0.35 0.63 0.02 0.79 0.67 0.99 0.86 0.71 0.15 0.13 0.54 0.19 0.81 0.56 0.98 0.16 0.15 0.69 0.17 0.66 0.74 0.65 0.9 0.73 0.61 0.69 0.19 0.04 0.72 0.41 0.35 0.93 0.91 0.34 0.35 0.92 0.45 0.34 0.52 0.73 0.39 0.54 0.83 0.99 0.68 0.16 0.6 0.48 0.18 0.96 0.7 0.18 0.77 0.6 0.07 0.99 0.97 0.41 0.25 0.98 0.85 0.95 0.59 0.77 0.18 0.22 0.39 0.33 0.46 0.07 0.16 0.81 0.0 0.53 0.49 0.9 0.57 0.03 0.26 0.24 0.57 0.63 0.88 0.57 0.73 0.6 0.71 0.29 0.25 0.94 0.23 0.93 0.07 0.35 0.59 0.66 0.51 0.25 0.51 0.47 0.04 0.85 0.15 0.4 0.51 0.0 0.29 0.29 0.07 0.14 0.77 0.1 0.31 0.95 0.52 0.48 0.24 0.71 0.27 0.93 0.77 0.04 0.92 0.08 0.92 0.68 0.32 0.15 0.77 0.63 0.73 0.14 0.83 0.76 0.96 0.72 0.57 0.92 0.35 0.62 0.21 0.46 0.66 0.89 0.52 0.35 0.71 0.0 0.78 0.51 0.34 0.05 0.57 0.34 0.54 0.57 0.81 0.88 0.61 0.53 0.98 0.26 0.34 0.57 0.94 0.09 0.94 0.15 0.81 0.15 0.83 0.83 0.73 0.33 0.69 0.89 0.46 0.96 0.12 0.82 0.89 0.45 0.26 0.84 0.48 0.51 0.43 0.12 0.74 0.32 0.19 0.8 0.04 0.61 0.63 0.23 0.22 0.7 0.14 0.63 0.35 0.89 0.4 0.1 0.1 0.56 0.98 0.7 0.41 0.78 0.14 0.04 0.97 0.32 0.66 0.54 0.66 0.8 0.86 0.36 0.99 0.01 0.41 0.62 0.81 0.14 0.84 0.49 0.3 0.4 0.13 0.2 0.05 0.29 0.11 0.75 0.87 0.71 0.25 0.43 0.67 0.49 0.2 0.77 0.85 0.32 0.94 0.51 0.95 0.54 0.22 0.7 0.97 0.71 0.24 0.88 0.9 0.61 0.99 0.57 0.25 0.01 0.09 0.83 0.83 0.89 0.58 0.95 0.86 0.06 0.88 0.27 0.12 0.7 0.17 0.23 0.43 0.61 0.51 0.65 0.02 0.19 0.61 0.69 0.14 0.89 0.3 0.86 0.55 0.06 0.46 0.78 0.82 0.34 0.63 0.38 0.12 0.15 0.45 0.93 0.08 0.54 0.94 0.64 0.74 0.4 0.23 0.18 0.27 0.44 0.6 0.82 0.19 0.13 0.48 0.19 0.99 0.66 0.69 0.86 0.47 0.15 0.94 0.53 0.07 0.61 0.44 0.62 0.85 0.16 0.66 0.58 0.63 0.55 0.38 0.02 0.68 0.91 0.89 0.63 0.25 0.58 0.93 0.52 0.7 0.64 0.81 0.47 0.21 0.18 0.17 0.78 0.46 0.31 0.2 0.31 0.37 0.66 0.46 0.11 1.0 0.21 0.39 0.12 0.36 0.83 0.52 0.76 0.23 0.62 0.17 0.21 0.07 0.78 0.12 0.59 0.76 0.33 0.49 0.13 0.67 0.44 0.92 0.84 0.18 0.73 0.81 0.68 0.27 0.28 0.14 0.23 0.98 0.07 0.34 0.2 0.78 0.44 0.27 0.7 0.88 0.28 0.96 0.07 0.33 0.65 0.9 0.99 0.75 0.32 0.68 0.54 0.57 0.28 0.57 0.96 0.91 0.0 0.0 0.32 0.66 0.08 0.7 0.14 0.88 0.91 0.85 0.17 0.91 0.31 0.47 0.69 0.41 0.8 0.08 0.59 0.66 0.79 0.82 0.28 0.11 0.05 0.11 0.61 0.66 0.25 0.32 0.53 0.8 0.11 0.5 0.6 0.73 0.31 0.11 0.2 1.0 0.79 0.88 0.77 0.37 0.51 0.25 0.89 0.79 0.8 0.79 0.96 0.45 0.36 0.14 0.64 0.85 0.75 0.23 0.64 0.23 0.64 0.41 0.76 0.78 0.13 0.37 0.48 0.61 0.32 0.58 0.98 0.58 0.27 0.06 0.78 0.05 0.56 0.14 0.57 0.2 0.68 0.61 0.58 0.36 0.39 0.99 0.63 0.12 0.82 0.05 0.54 0.96 0.27 0.2 0.94 0.03 0.55 0.9 0.47 0.61 0.83 0.72 0.9 0.94 0.53 0.11 0.57 0.96 0.64 0.35 0.81 0.72 0.59 0.45 0.85 0.98 0.44 0.08 0.12 0.5 0.17 0.31 0.8 0.49 0.13 0.63 0.83 0.32 0.22 0.13 0.76 0.18 0.4 0.81 0.65 0.02 0.94 0.39 0.0 0.58 0.96 0.93 0.33 0.22 0.12 0.78 0.22 0.65 0.82 0.83 0.79 0.09 0.86 0.55 0.16 0.95 0.76 0.22 0.06 0.21 0.58 0.63 0.31 0.21 0.99 0.19 0.13 0.68 0.33 0.82 0.91 0.42 0.37 0.55 0.66 0.29 0.36 0.75 0.62 1.0 0.71 0.21 0.17 0.73 0.23 0.6 0.99 0.85 0.22 0.58 0.4 0.97 0.46 0.69 0.19 0.78 0.26 0.0 0.74 0.43 0.17 0.05 0.74 0.46 0.23 0.64 0.13 0.47 0.14 0.54 0.48 0.88 0.64 0.23 0.48 0.82 0.81 0.56 0.99 0.07 0.07 0.53 0.74 0.67 0.52 0.66 0.14 0.52 0.46 0.85 0.44 0.05 0.13 0.56 0.38 0.57 0.15 0.84 0.99 0.97 0.0 0.12 0.07 0.79 0.29 0.02 0.54 0.39 0.26 0.28 0.44 0.88 0.62 0.63 0.16 0.67 0.66 0.03 0.97 0.83 0.95 0.84 0.95 0.56 0.67 0.38 0.71 0.16 0.43 0.29 0.34 0.71 0.44 0.63 0.7 0.11 0.72 0.23 0.94 0.02 0.33 0.33 0.92 0.35 0.31 0.17 0.36 0.91 0.75 0.1 0.65 0.83 0.79 0.58 0.43 0.8 0.19 0.64 0.3 0.57 0.01 0.41 0.9 0.46 0.31 0.88 0.19 0.02 0.75 0.07 0.45 0.18 0.25 0.01 0.97 0.75 0.64 0.23 0.34 0.07 0.21 0.22 0.02 0.92 0.02 0.69 0.1 0.86 0.05 0.02 0.81 0.96 0.85 0.13 0.55 0.99 0.49 0.89 0.13 0.52 0.91 0.69 0.97 0.95 0.81 0.12 0.92 0.44 0.89 0.57 0.47 0.47 0.78 0.12 0.26 0.24 0.44 0.74 0.43 0.06 0.32 0.89 0.03 0.64 0.18 0.22 0.25 0.14 0.24 0.72 0.96 0.72 0.96 0.52 0.7 0.66 0.88 0.25 0.91 0.14 0.52 0.7 0.56 0.59 0.43 0.21 0.8 0.67 0.33 0.63 0.55 0.55 0.92 0.16 0.31 0.61 0.29 0.9 0.06 0.69 0.89 0.12 0.58 0.74 0.83 0.8 0.14 0.04 0.69 0.28 0.62 0.77 0.11 0.62 0.18 0.59 0.17 0.58 0.1 0.08 0.61 0.46 0.2 0.6 0.94 0.65 0.1 0.47 0.35 0.51 0.8 0.2 0.06 0.86 1.0 0.73 0.43 0.41 0.88 0.46 0.83 0.5 0.15 0.22 0.85 0.79 0.5 0.67 0.99 0.89 0.75 0.82 0.07 0.45 0.54 0.82 0.34 0.01 0.97 0.41 0.53 0.18 0.56 0.02 0.63 0.64 0.21 0.84 0.25 0.41 0.46 0.73 0.91 0.71 0.16 0.01 0.09 0.95 0.7 0.45 0.86 0.9 0.04 0.98 0.66 0.93 0.58 0.37 0.62 0.73 0.37 0.3 0.71 0.95 0.41 0.79 0.45 0.71 0.57 0.24 0.43 0.07 0.85 0.53 0.57 0.58 0.45 0.82 0.92 0.17 0.23 0.29 0.62 0.03 0.36 0.68 0.5 0.69 0.07 0.07 0.36 0.94 0.06 0.4 0.93 0.48 0.17 0.78 0.66 0.45 0.82 0.93 0.99 0.51 0.19 0.32 0.47 0.69 0.19 0.35 0.19 0.62 0.34 0.52 0.42 0.76 0.05 0.9 0.53 0.59 0.52 0.43 0.73 0.43 0.37 0.09 0.47 0.59 0.78 0.83 0.85 0.21 0.95 0.47 0.87 0.43 0.95 0.18 0.13 0.95 0.79 0.62 0.02 0.79 0.28 0.87 0.71 0.13 0.53 0.02 0.73 0.6 0.13 0.75 0.07 0.02 0.34 0.58 0.55 0.4 0.42 0.46 0.43 0.98 0.86 0.31 0.77 0.64 0.97 0.6 0.91 0.94 0.9 0.34 0.78 0.0 0.49 0.17 0.86 0.47 0.3 0.62 0.33 0.86 0.62 0.65 0.36 0.4 0.08 0.67 0.92 0.76 0.87 0.61 0.41 0.3 0.65 0.25 0.37 0.3 0.57 0.77 0.64 0.1 0.3 0.6 0.52 0.45 0.1 0.02 0.83 0.57 0.41 0.46 0.55 0.41 0.77 0.39 0.03 0.0 0.9 0.42 0.22 0.73 0.48 0.94 0.15 0.14 0.32 0.65 0.6 0.03 0.64 0.15 0.42 0.96 0.41 0.53 0.43 0.3 0.76 0.93 0.32 0.53 0.62 0.31 0.54 0.2 0.66 0.68 0.39 0.01 0.99 0.25 0.71 0.19 0.52 0.93 0.96 0.68 1.0 0.4 0.66 0.64 0.09 0.28 0.47 0.01 0.99 0.36 0.09 0.57 0.79 0.41 0.35 0.3 0.5 0.28 0.71 0.27 0.13 0.06 0.46 0.39 0.37 0.88 0.99 0.3 0.09 0.01 0.98 0.74 0.12 0.01 0.15 0.64 0.68 0.27 0.09 0.89 0.3 0.64 0.34 0.44 0.71 0.01 0.0 0.33 0.12 0.05 0.74 0.81 0.49 0.45 0.94 0.86 0.58 0.56 0.07 0.91 0.54 0.64 0.82 0.17 0.69 0.7 0.99 0.35 0.62 0.6 0.93 0.38 0.32 0.01 0.79 0.62 0.97 0.74 0.71 0.54 0.08 0.01 0.09 0.95 0.53 0.52 0.15 0.18 0.38 0.71 0.57 0.2 0.87 1.0 0.43 0.93 0.49 0.65 0.42 0.29 0.63 0.53 0.34 0.84 0.23 0.38 0.51 0.88 0.07 0.17 0.9 0.13 0.83 0.54 0.54 0.07 0.49 0.83 0.94 0.04 0.79 0.18 0.46 0.51 0.73 0.68 0.04 0.89 0.4 0.16 0.9 0.36 0.73 0.36 0.39 0.42 0.03 0.6 0.85 0.2 0.88 0.64 0.07 0.04 0.58 0.11 0.36 0.19 0.12 0.74 0.54 0.65 0.37 0.31 0.78 0.94 0.02 0.56 0.72 0.18 0.03 0.12 0.3 0.55 0.74 0.22 0.14 0.42 0.23 0.71 0.78 0.66 0.82 0.12 0.83 0.73 0.7 0.22 0.89 0.81 0.34 0.61 0.2 0.68 0.22 0.84 0.03 0.99 0.06 0.23 0.68 0.71 0.41 0.97 0.04 0.78 0.88 0.8 0.72 0.63 0.68 0.94 0.58 0.07 0.53 0.51 0.04 0.45 0.19 0.05 0.23 0.67 0.13 0.41 0.62 0.18 0.01 0.34 0.91 0.88 0.21 0.71 0.47 0.61 0.51 0.65 0.95 0.33 0.0 0.16 0.56 0.21 0.06 0.06 0.06 0.8 0.39 0.83 0.29 0.04 0.74 0.27 0.25 0.35 0.78 0.44 0.23 0.95 0.97 0.89 0.83 0.85 0.41 0.95 0.69 0.09 0.91 0.63 0.96 0.76 0.16 0.75 0.41 0.83 0.63 0.83 0.86 0.82 0.04 0.32 0.3 0.21 0.39 0.48 0.8 0.21 0.4 0.96 0.71 0.63 0.54 0.95 0.81 0.11 0.83 0.63 0.41 0.33 0.32 0.58 0.72 0.82 0.73 0.01 0.5 0.93 0.69 0.91 0.44 0.18 0.28 0.61 0.5 0.98 0.93 0.91 0.72 0.59 0.63 0.03 0.82 0.62 0.07 0.51 0.53 0.89 0.47 0.04 0.08 0.17 0.2 0.88 0.78 0.93 0.71 0.24 0.22 0.32 0.87 0.03 0.01 0.85 0.77 0.82 0.64 0.2 0.83 0.88 0.23 0.44 0.72 0.2 0.98 0.11 0.46 0.59 0.3 0.82 0.01 0.66 0.8 0.91 0.0 0.86 0.84 0.56 0.49 0.22 0.27 0.02 0.62 0.55 0.62 0.79 0.94 0.89 0.56 0.87 0.96 0.43 0.58 0.63 0.22 0.37 0.44 0.85 0.28 0.25 0.4 0.34 0.14 0.8 0.84 0.89 0.06 0.45 0.02 0.07 0.85 0.43 0.13 0.21 0.21 0.05 0.23 0.85 0.44 0.8 0.52 0.39 0.65 0.67 0.64 0.79 0.3 0.01 0.3 0.11 0.02 0.96 0.05 0.44 0.06 0.01 0.77 0.19 0.06 0.31 0.48 0.97 0.64 0.92 0.76 0.07 0.77 0.95 0.98 0.63 0.25 0.27 0.76 0.96 0.24 0.18 0.8 0.0 0.96 0.24 0.52 0.59 0.65 0.17 0.32 0.55 0.59 0.62 0.82 0.59 0.29 0.42 0.12 0.24 0.02 0.66 0.59 0.78 0.37 0.19 0.96 0.18 0.2 0.99 0.76 0.58 0.35 0.54 0.89 0.14 0.58 0.1 0.97 0.38 0.82 0.48 0.06 0.83 1.0 0.99 0.77 0.41 0.08 0.87 0.75 0.13 0.52 0.58 0.68 0.03 0.92 0.55 0.04 0.56 0.63 0.28 0.8 0.39 0.68 0.58 0.01 0.23 0.28 0.98 0.96 0.05 0.28 0.44 0.31 0.91 0.81 0.18 0.65 0.53 0.02 0.41 0.98 0.09 0.12 0.84 0.6 0.17 0.2 0.58 0.35 0.25 0.74 0.83 0.55 0.18 0.8 0.33 0.04 0.56 0.85 0.22 0.83 0.48 0.53 0.54 0.51 0.06 0.76 0.1 0.43 0.21 0.46 0.97 0.48 0.77 0.11 0.36 0.9 0.52 0.06 0.23 0.8 0.09 0.11 0.57 0.59 0.76 0.44 0.15 0.46 0.07 0.86 0.01 0.49 0.05 0.54 0.14 0.29 0.01 0.81 0.45 0.45 0.12 0.82 0.47 0.93 0.51 0.04 0.26 0.14 0.5 0.06 0.25 0.62 0.95 0.07 0.28 0.32 0.03 0.28 0.45 0.86 0.24 0.22 0.78 0.63 0.4 0.33 0.56 0.26 0.41 0.63 0.73 0.73 0.35 0.44 0.67 0.03 0.07 0.68 0.86 0.35 0.58 0.75 0.16 0.37 0.87 0.66 0.59 0.67 0.46 0.64 0.78 0.97 0.45 0.98 0.64 0.41 0.58 0.51 0.97 0.95 0.9 0.34 0.1 0.76 0.37 0.05 0.57 0.72 0.91 0.4 0.43 0.78 0.78 0.39 0.3 0.21 0.88 0.36 0.54 0.87 0.84 0.19 0.22 0.89 0.89 0.85 0.77 0.86 0.46 0.5 0.88 0.18 0.4 0.61 0.07 0.06 0.65 0.05 0.31 0.55 0.87 0.05 0.54 0.28 0.28 0.35 0.1 0.55 0.82 0.86 0.12 0.17 0.69 0.74 0.13 0.08 0.6 0.4 0.97 0.32 0.81 0.14 0.97 0.65 0.72 0.32 0.57 0.69 0.74 0.65 0.75 0.37 0.88 0.97 0.88 0.7 0.98 0.36 0.1 0.35 0.15 0.23 0.09 0.3 1.0 0.21 0.99 0.44 0.23 0.21 0.15 0.43 0.77 0.17 0.32 0.55 0.8 0.08 0.72 0.49 0.31 0.39 0.48 0.29 0.78 0.64 0.04 0.11 0.69 0.76 0.9 0.79 0.32 0.03 0.68 0.67 0.35 0.55 0.01 0.03 0.22 0.31 0.3 0.28 0.14 0.01 0.73 0.86 0.67 0.06 0.45 0.32 0.78 0.22 0.84 0.19 0.29 0.8 0.61 0.23 0.71 0.94 0.04 0.86 0.87 0.88 0.65 0.04 0.93 0.1 0.73 0.38 0.88 0.8 0.54 0.62 0.2 0.76 0.66 0.46 0.0 0.32 0.38 0.92 0.85 0.84 0.9 0.85 0.08 0.32 0.98 0.57 0.72 0.48 0.86 0.23 1.0 0.56 0.48 0.13 0.61 0.46 0.38 0.58 0.06 0.95 0.37 0.94 0.11 0.44 0.53 0.26 0.98 0.67 0.28 0.65 0.28 0.48 0.52 0.58 0.01 0.1 0.03 0.29 0.14 0.33 0.5 0.98 0.99 0.68 0.28 0.12 0.6 0.65 0.77 0.69 0.66 0.5 0.76 0.79 0.79 0.64 0.67 0.35 0.78 0.71 0.47 0.5 0.79 0.69 0.13 0.18 0.89 0.29 0.79 0.92 0.54,1.2,100,10
|
|
|
1 |
+
v0|["qed"]||1.2|100|10|4|8|4|1|0.1|3|4|42
|
|
|
|
utils.py
CHANGED
@@ -1,6 +1,9 @@
|
|
1 |
import logging
|
2 |
from collections import defaultdict
|
3 |
-
from typing import List
|
|
|
|
|
|
|
4 |
|
5 |
import mols2grid
|
6 |
import pandas as pd
|
@@ -9,9 +12,23 @@ logger = logging.getLogger(__name__)
|
|
9 |
logger.addHandler(logging.NullHandler())
|
10 |
|
11 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
def draw_grid_generate(
|
13 |
samples: List[str],
|
14 |
-
|
|
|
15 |
n_cols: int = 3,
|
16 |
size=(140, 200),
|
17 |
) -> str:
|
@@ -27,14 +44,22 @@ def draw_grid_generate(
|
|
27 |
HTML to display
|
28 |
"""
|
29 |
|
|
|
|
|
|
|
30 |
result = defaultdict(list)
|
31 |
result.update(
|
32 |
-
{
|
33 |
-
"SMILES": seeds + samples,
|
34 |
-
"Name": [f"Seed_{i}" for i in range(len(seeds))]
|
35 |
-
+ [f"Generated_{i}" for i in range(len(samples))],
|
36 |
-
},
|
37 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
38 |
|
39 |
result_df = pd.DataFrame(result)
|
40 |
obj = mols2grid.display(
|
|
|
1 |
import logging
|
2 |
from collections import defaultdict
|
3 |
+
from typing import List, Callable
|
4 |
+
from gt4sd.properties import PropertyPredictorRegistry
|
5 |
+
from gt4sd.algorithms.prediction.paccmann.core import PaccMann, AffinityPredictor
|
6 |
+
|
7 |
|
8 |
import mols2grid
|
9 |
import pandas as pd
|
|
|
12 |
logger.addHandler(logging.NullHandler())
|
13 |
|
14 |
|
15 |
+
def get_affinity_function(target: str) -> Callable:
|
16 |
+
return lambda mols: PaccMann(
|
17 |
+
AffinityPredictor(protein_targets=[target] * len(mols), ligands=mols)
|
18 |
+
).sample(len(mols))
|
19 |
+
|
20 |
+
|
21 |
+
EVAL_DICT = {
|
22 |
+
"qed": PropertyPredictorRegistry.get_property_predictor("qed"),
|
23 |
+
"sas": PropertyPredictorRegistry.get_property_predictor("sas"),
|
24 |
+
"molwt": PropertyPredictorRegistry.get_property_predictor("molecular_weight"),
|
25 |
+
}
|
26 |
+
|
27 |
+
|
28 |
def draw_grid_generate(
|
29 |
samples: List[str],
|
30 |
+
properties: List[str],
|
31 |
+
protein_target: str,
|
32 |
n_cols: int = 3,
|
33 |
size=(140, 200),
|
34 |
) -> str:
|
|
|
44 |
HTML to display
|
45 |
"""
|
46 |
|
47 |
+
if protein_target != "":
|
48 |
+
EVAL_DICT.update({"affinity": get_affinity_function(protein_target)})
|
49 |
+
|
50 |
result = defaultdict(list)
|
51 |
result.update(
|
52 |
+
{"SMILES": samples, "Name": [f"Generated_{i}" for i in range(len(samples))]},
|
|
|
|
|
|
|
|
|
53 |
)
|
54 |
+
if "affinity" in properties:
|
55 |
+
properties.remove("affinity")
|
56 |
+
vals = EVAL_DICT["affinity"](samples)
|
57 |
+
result["affinity"] = vals
|
58 |
+
# Fill properties
|
59 |
+
for sample in samples:
|
60 |
+
for prop in properties:
|
61 |
+
value = EVAL_DICT[prop](sample)
|
62 |
+
result[prop].append(f"{prop} = {value}")
|
63 |
|
64 |
result_df = pd.DataFrame(result)
|
65 |
obj = mols2grid.display(
|