jannisborn commited on
Commit
a7abc43
·
0 Parent(s):

Duplicate from jannisborn/gt4sd-torchdrug

Browse files
Files changed (10) hide show
  1. .gitattributes +34 -0
  2. .gitignore +1 -0
  3. LICENSE +21 -0
  4. README.md +15 -0
  5. app.py +74 -0
  6. model_cards/article.md +121 -0
  7. model_cards/description.md +10 -0
  8. model_cards/examples.csv +4 -0
  9. requirements.txt +29 -0
  10. utils.py +48 -0
.gitattributes ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tflite filter=lfs diff=lfs merge=lfs -text
29
+ *.tgz filter=lfs diff=lfs merge=lfs -text
30
+ *.wasm filter=lfs diff=lfs merge=lfs -text
31
+ *.xz filter=lfs diff=lfs merge=lfs -text
32
+ *.zip filter=lfs diff=lfs merge=lfs -text
33
+ *.zst filter=lfs diff=lfs merge=lfs -text
34
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ __pycache__/
LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2022 Generative Toolkit 4 Scientific Discovery
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
README.md ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: MoLeR
3
+ emoji: 💡
4
+ colorFrom: green
5
+ colorTo: blue
6
+ sdk: gradio
7
+ sdk_version: 3.9.1
8
+ app_file: app.py
9
+ pinned: false
10
+ python_version: 3.8.13
11
+ pypi_version: 20.2.4
12
+ duplicated_from: jannisborn/gt4sd-torchdrug
13
+ ---
14
+
15
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ import pathlib
3
+
4
+ import gradio as gr
5
+ import pandas as pd
6
+ from gt4sd.algorithms.generation.torchdrug import (
7
+ TorchDrugGenerator,
8
+ TorchDrugGCPN,
9
+ TorchDrugGraphAF,
10
+ )
11
+
12
+ from gt4sd.algorithms.registry import ApplicationsRegistry
13
+ from utils import draw_grid_generate
14
+
15
+ logger = logging.getLogger(__name__)
16
+ logger.addHandler(logging.NullHandler())
17
+
18
+ TITLE = "MoLeR"
19
+
20
+
21
+ def run_inference(algorithm: str, algorithm_version: str, number_of_samples: int):
22
+
23
+ if algorithm == "GCPN":
24
+ config = TorchDrugGCPN(algorithm_version=algorithm_version)
25
+ elif algorithm == "GraphAF":
26
+ config = TorchDrugGraphAF(algorithm_version=algorithm_version)
27
+ else:
28
+ raise ValueError(f"Unsupported model {algorithm}.")
29
+
30
+ model = TorchDrugGenerator(configuration=config)
31
+ samples = list(model.sample(number_of_samples))
32
+
33
+ return draw_grid_generate(samples=samples, n_cols=5)
34
+
35
+
36
+ if __name__ == "__main__":
37
+
38
+ # Preparation (retrieve all available algorithms)
39
+ all_algos = ApplicationsRegistry.list_available()
40
+ algos = [
41
+ x["algorithm_version"]
42
+ for x in list(filter(lambda x: "TorchDrug" in x["algorithm_name"], all_algos))
43
+ ]
44
+
45
+ # Load metadata
46
+ metadata_root = pathlib.Path(__file__).parent.joinpath("model_cards")
47
+
48
+ examples = pd.read_csv(metadata_root.joinpath("examples.csv"), header=None).fillna(
49
+ ""
50
+ )
51
+
52
+ with open(metadata_root.joinpath("article.md"), "r") as f:
53
+ article = f.read()
54
+ with open(metadata_root.joinpath("description.md"), "r") as f:
55
+ description = f.read()
56
+
57
+ demo = gr.Interface(
58
+ fn=run_inference,
59
+ title="TorchDrug (GCPN and GraphAF)",
60
+ inputs=[
61
+ gr.Dropdown(["GCPN", "GraphAF"], label="Algorithm", value="GCPN"),
62
+ gr.Dropdown(
63
+ list(set(algos)), label="Algorithm version", value="zinc250k_v0"
64
+ ),
65
+ gr.Slider(
66
+ minimum=1, maximum=50, value=10, label="Number of samples", step=1
67
+ ),
68
+ ],
69
+ outputs=gr.HTML(label="Output"),
70
+ article=article,
71
+ description=description,
72
+ examples=examples.values.tolist(),
73
+ )
74
+ demo.launch(debug=True, show_error=True)
model_cards/article.md ADDED
@@ -0,0 +1,121 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Model documentation & parameters
2
+
3
+ **Algorithm**: Which model to use (GCPN or GraphAF).
4
+
5
+ **Algorithm Version**: Which model checkpoint to use (trained on different datasets).
6
+
7
+ **Number of samples**: How many samples should be generated (between 1 and 50).
8
+
9
+
10
+ # Model card -- GCPN
11
+
12
+ **Model Details**: GCPN is a graph-based molecular generative model that can be optimized with RL for goal-directed graph generation.
13
+
14
+ **Developers**: Jiaxuan You, Bowen Liu and co-authors from Stanford.
15
+
16
+ **Distributors**: Code provided by TorchDrug developers, wrapped and distributed by GT4SD Team (2023) from IBM Research.
17
+
18
+ **Model date**: Published in 2018.
19
+
20
+ **Model version**: Models trained by GT4SD team on the tasks provided by TorchDrug repo [(see their tutorial)](https://torchdrug.ai/docs/tutorials/generation.html).
21
+ - **ZINC_250k**: 250,000 drug-like molecules with a maximum atom number of 38, taken from [ZINC](https://zinc.docking.org).
22
+ - **QED**: ZINC dataset, but the model was optimized with Proximal Policy Optimization (PPO) to generate molecules with high QED scores.
23
+ - **pLogP**: ZINC dataset, but the model was optimized with Proximal Policy Optimization (PPO) to generate molecules with high pLogP scores.
24
+
25
+ **Model type**: A graph-based molecular generative model that can be optimized with RL for goal-directed graph generation.
26
+
27
+ **Information about training algorithms, parameters, fairness constraints or other applied approaches, and features**: Default parameters as provided in [(TorchDrug tutorial)](https://torchdrug.ai/docs/tutorials/generation.html).
28
+
29
+ **Paper or other resource for more information**: [Graph Convolutional Policy Network for
30
+ Goal-Directed Molecular Graph Generation (NeurIPS 2018)](https://proceedings.neurips.cc/paper/2018/file/d60678e8f2ba9c540798ebbde31177e8-Paper.pdf).
31
+
32
+ **License**: TorchDrug: Apache-2.0 license.
33
+
34
+ **Where to send questions or comments about the model**: Open an issue on [TorchDrug repository](https://github.com/DeepGraphLearning/torchdrug) or ask original authors.
35
+
36
+ **Intended Use. Use cases that were envisioned during development**: Chemical research, in particular drug discovery.
37
+
38
+ **Primary intended uses/users**: Researchers and computational chemists using the model for model comparison or research exploration purposes.
39
+
40
+ **Out-of-scope use cases**: Production-level inference, producing molecules with harmful properties.
41
+
42
+ **Factors**: Not applicable.
43
+
44
+ **Metrics**: Validation loss on decoding correct molecules.
45
+
46
+ **Datasets**: 250,000 drug-like molecules from [ZINC](https://zinc.docking.org) (with a maximum atom number of 38).
47
+
48
+ **Ethical Considerations**: Unclear, please consult with original authors in case of questions.
49
+
50
+ **Caveats and Recommendations**: Unclear, please consult with original authors in case of questions.
51
+
52
+ Model card prototype inspired by [Mitchell et al. (2019)](https://dl.acm.org/doi/abs/10.1145/3287560.3287596?casa_token=XD4eHiE2cRUAAAAA:NL11gMa1hGPOUKTAbtXnbVQBDBbjxwcjGECF_i-WC_3g1aBgU1Hbz_f2b4kI_m1in-w__1ztGeHnwHs)
53
+
54
+ ## Citation
55
+
56
+ ```bib
57
+ @article{you2018graph,
58
+ title={Graph convolutional policy network for goal-directed molecular graph generation},
59
+ author={You, Jiaxuan and Liu, Bowen and Ying, Zhitao and Pande, Vijay and Leskovec, Jure},
60
+ journal={Advances in neural information processing systems},
61
+ volume={31},
62
+ year={2018}
63
+ }
64
+ ```
65
+
66
+
67
+ # Model card -- GraphAF
68
+
69
+ **Model Details**: GraphAF is a flow-based autoregressive graph molecular generative model that can be optimized with RL for goal-directed graph generation.
70
+
71
+ **Developers**: Chence Shi, Minkai Xu and co-authors from Peking and Shanghai University and MILA.
72
+
73
+ **Distributors**: Code provided by TorchDrug developers, wrapped and distributed by GT4SD Team (2023) from IBM Research.
74
+
75
+ **Model date**: Published in 2020.
76
+
77
+ **Model version**: Models trained by GT4SD team on the tasks provided by TorchDrug repo [(see their tutorial)](https://torchdrug.ai/docs/tutorials/generation.html).
78
+ - **ZINC_250k**: 250,000 drug-like molecules with a maximum atom number of 38, taken from [ZINC](https://zinc.docking.org).
79
+ - **QED**: ZINC dataset, but the model was optimized with Proximal Policy Optimization (PPO) to generate molecules with high QED scores.
80
+ - **pLogP**: ZINC dataset, but the model was optimized with Proximal Policy Optimization (PPO) to generate molecules with high pLogP scores.
81
+
82
+ **Model type**: A flow-based autoregressive graph molecular generative model that can be optimized with RL for goal-directed graph generation.
83
+
84
+ **Information about training algorithms, parameters, fairness constraints or other applied approaches, and features**: Default parameters as provided in [(TorchDrug tutorial)](https://torchdrug.ai/docs/tutorials/generation.html).
85
+
86
+ **Paper or other resource for more information**: [GraphAF: a flow-based autoregressive model for molecular graph generation (*ICLR 2020*)](https://openreview.net/pdf?id=S1esMkHYPr).
87
+
88
+ **License**: TorchDrug: Apache-2.0 license.
89
+
90
+ **Where to send questions or comments about the model**: Open an issue on [TorchDrug repository](https://github.com/DeepGraphLearning/torchdrug) or ask original authors.
91
+
92
+ **Intended Use. Use cases that were envisioned during development**: Chemical research, in particular drug discovery.
93
+
94
+ **Primary intended uses/users**: Researchers and computational chemists using the model for model comparison or research exploration purposes.
95
+
96
+ **Out-of-scope use cases**: Production-level inference, producing molecules with harmful properties.
97
+
98
+ **Factors**: Not applicable.
99
+
100
+ **Metrics**: Validation loss on decoding correct molecules.
101
+
102
+ **Datasets**: 250,000 drug-like molecules from [ZINC](https://zinc.docking.org) (with a maximum atom number of 38).
103
+
104
+ **Ethical Considerations**: Unclear, please consult with original authors in case of questions.
105
+
106
+ **Caveats and Recommendations**: Unclear, please consult with original authors in case of questions.
107
+
108
+ Model card prototype inspired by [Mitchell et al. (2019)](https://dl.acm.org/doi/abs/10.1145/3287560.3287596?casa_token=XD4eHiE2cRUAAAAA:NL11gMa1hGPOUKTAbtXnbVQBDBbjxwcjGECF_i-WC_3g1aBgU1Hbz_f2b4kI_m1in-w__1ztGeHnwHs)
109
+
110
+ ## Citation
111
+
112
+ ```bib
113
+ @inproceedings{shi2020graphaf,
114
+ author = {Chence Shi and Minkai Xu and Zhaocheng Zhu and Weinan Zhang and Ming Zhang and Jian Tang},
115
+ title = {GraphAF: a Flow-based Autoregressive Model for Molecular Graph Generation},
116
+ booktitle = {International Conference on Learning Representations, {ICLR} 2020},
117
+ year = {2020},
118
+ url = {https://openreview.net/forum?id=S1esMkHYPr}
119
+ }
120
+ ```
121
+
model_cards/description.md ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ <img align="right" src="https://raw.githubusercontent.com/GT4SD/gt4sd-core/main/docs/_static/gt4sd_logo.png" alt="logo" width="120" >
2
+
3
+
4
+ [TorchDrug](https://github.com/DeepGraphLearning/torchdrug) is a PyTorch toolbox on graph models for drug discovery.
5
+ We, the developers of **GT4SD** (Generative Toolkit for Scientific Discovery), provide access to two graph-based molecular generative models distributed by TorchDrug:
6
+ - **GCPN**: Graph Convolutional Policy Network ([You et al., (2018), *NeurIPS*](https://proceedings.neurips.cc/paper/2018/hash/d60678e8f2ba9c540798ebbde31177e8-Abstract.html))
7
+ - **GraphAF**: GraphAF: a Flow-based Autoregressive Model for Molecular Graph Generation ([Shi et al., (2020), *ICLR*](https://openreview.net/forum?id=S1esMkHYPr))
8
+
9
+ For **examples** and **documentation** of the model parameters, please see below.
10
+ Moreover, we provide a **model card** ([Mitchell et al. (2019)](https://dl.acm.org/doi/abs/10.1145/3287560.3287596?casa_token=XD4eHiE2cRUAAAAA:NL11gMa1hGPOUKTAbtXnbVQBDBbjxwcjGECF_i-WC_3g1aBgU1Hbz_f2b4kI_m1in-w__1ztGeHnwHs)) at the bottom of this page.
model_cards/examples.csv ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ GCPN,zinc250k_v0,5
2
+ GCPN,qed_v0,10
3
+ GraphAF,plogp_v0,5
4
+
requirements.txt ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ -f https://download.pytorch.org/whl/cpu/torch_stable.html
2
+ -f https://data.pyg.org/whl/torch-1.12.1+cpu.html
3
+ # pip==20.2.4
4
+ torch==1.12.1
5
+ torch-scatter
6
+ torch-spline-conv
7
+ torch-sparse
8
+ torch-geometric
9
+ torchvision==0.13.1
10
+ torchaudio==0.12.1
11
+ gt4sd>=1.0.0
12
+ molgx>=0.22.0a1
13
+ molecule_generation
14
+ nglview
15
+ PyTDC==0.3.7
16
+ gradio==3.12.0
17
+ markdown-it-py>=2.1.0
18
+ mols2grid>=0.2.0
19
+ numpy==1.23.5
20
+ pandas>=1.0.0
21
+ terminator @ git+https://github.com/IBM/regression-transformer@gt4sd
22
+ guacamol_baselines @ git+https://github.com/GT4SD/guacamol_baselines.git@v0.0.2
23
+ moses @ git+https://github.com/GT4SD/moses.git@v0.1.0
24
+ paccmann_chemistry @ git+https://github.com/PaccMann/paccmann_chemistry@0.0.4
25
+ paccmann_generator @ git+https://github.com/PaccMann/paccmann_generator@0.0.2
26
+ paccmann_gp @ git+https://github.com/PaccMann/paccmann_gp@0.1.1
27
+ paccmann_omics @ git+https://github.com/PaccMann/paccmann_omics@0.0.1.1
28
+ paccmann_predictor @ git+https://github.com/PaccMann/paccmann_predictor@sarscov2
29
+ reinvent_models @ git+https://github.com/GT4SD/reinvent_models@v0.0.1
utils.py ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ from collections import defaultdict
3
+ from typing import List
4
+
5
+ import mols2grid
6
+ import pandas as pd
7
+
8
+ logger = logging.getLogger(__name__)
9
+ logger.addHandler(logging.NullHandler())
10
+
11
+
12
+ def draw_grid_generate(
13
+ samples: List[str],
14
+ seeds: List[str] = [],
15
+ n_cols: int = 3,
16
+ size=(140, 200),
17
+ ) -> str:
18
+ """
19
+ Uses mols2grid to draw a HTML grid for the generated molecules
20
+
21
+ Args:
22
+ samples: The generated samples.
23
+ n_cols: Number of columns in grid. Defaults to 5.
24
+ size: Size of molecule in grid. Defaults to (140, 200).
25
+
26
+ Returns:
27
+ HTML to display
28
+ """
29
+
30
+ result = defaultdict(list)
31
+ result.update(
32
+ {
33
+ "SMILES": seeds + samples,
34
+ "Name": [f"Seed_{i}" for i in range(len(seeds))]
35
+ + [f"Generated_{i}" for i in range(len(samples))],
36
+ },
37
+ )
38
+
39
+ result_df = pd.DataFrame(result)
40
+ obj = mols2grid.display(
41
+ result_df,
42
+ tooltip=list(result.keys()),
43
+ height=1100,
44
+ n_cols=n_cols,
45
+ name="Results",
46
+ size=size,
47
+ )
48
+ return obj.data