Spaces:
Running
Running
import logging | |
import pathlib | |
from typing import List | |
import gradio as gr | |
import pandas as pd | |
from gt4sd.algorithms.controlled_sampling.paccmann_gp import ( | |
PaccMannGPGenerator, | |
PaccMannGP, | |
) | |
from gt4sd.algorithms.controlled_sampling.paccmann_gp.implementation import ( | |
MINIMIZATION_FUNCTIONS, | |
) | |
from gt4sd.algorithms.registry import ApplicationsRegistry | |
from utils import draw_grid_generate | |
logger = logging.getLogger(__name__) | |
logger.addHandler(logging.NullHandler()) | |
MINIMIZATION_FUNCTIONS.pop("callable", None) | |
MINIMIZATION_FUNCTIONS.pop("molwt", None) | |
def run_inference( | |
algorithm_version: str, | |
targets: List[str], | |
protein_target: str, | |
temperature: float, | |
length: float, | |
number_of_samples: int, | |
limit: int, | |
number_of_steps: int, | |
number_of_initial_points: int, | |
number_of_optimization_rounds: int, | |
sampling_variance: float, | |
samples_for_evaluation: int, | |
maximum_number_of_sampling_steps: int, | |
seed: int, | |
): | |
config = PaccMannGPGenerator( | |
algorithm_version=algorithm_version.split("_")[-1], | |
batch_size=32, | |
temperature=temperature, | |
generated_length=length, | |
limit=limit, | |
acquisition_function="EI", | |
number_of_steps=number_of_steps, | |
number_of_initial_points=number_of_initial_points, | |
initial_point_generator="random", | |
number_of_optimization_rounds=number_of_optimization_rounds, | |
sampling_variance=sampling_variance, | |
samples_for_evaluation=samples_for_evaluation, | |
maximum_number_of_sampling_steps=maximum_number_of_sampling_steps, | |
seed=seed, | |
) | |
target = {i: {} for i in targets} | |
if "affinity" in targets: | |
if protein_target == "" or not isinstance(protein_target, str): | |
raise ValueError( | |
f"Protein target must be specified for affinity prediction, not ={protein_target}" | |
) | |
target["affinity"]["protein"] = protein_target | |
else: | |
protein_target = "" | |
model = PaccMannGP(config, target=target) | |
samples = list(model.sample(number_of_samples)) | |
return draw_grid_generate( | |
samples=samples, | |
n_cols=5, | |
properties=set(target.keys()), | |
protein_target=protein_target, | |
) | |
if __name__ == "__main__": | |
# Preparation (retrieve all available algorithms) | |
all_algos = ApplicationsRegistry.list_available() | |
algos = [ | |
x["algorithm_version"] | |
for x in list(filter(lambda x: "PaccMannGP" in x["algorithm_name"], all_algos)) | |
] | |
# Load metadata | |
metadata_root = pathlib.Path(__file__).parent.joinpath("model_cards") | |
examples = pd.read_csv( | |
metadata_root.joinpath("examples.csv"), header=None, sep="|" | |
).fillna("") | |
examples[1] = examples[1].apply(eval) | |
with open(metadata_root.joinpath("article.md"), "r") as f: | |
article = f.read() | |
with open(metadata_root.joinpath("description.md"), "r") as f: | |
description = f.read() | |
demo = gr.Interface( | |
fn=run_inference, | |
title="PaccMannGP", | |
inputs=[ | |
gr.Dropdown(algos, label="Algorithm version", value="v0"), | |
gr.CheckboxGroup( | |
choices=list(MINIMIZATION_FUNCTIONS.keys()), | |
value=["qed"], | |
multiselect=True, | |
label="Property goals", | |
), | |
gr.Textbox( | |
label="Protein target", | |
placeholder="MVLSPADKTNVKAAWGKVGAHAGEYGAEALERMFLSFPTT", | |
lines=1, | |
), | |
gr.Slider(minimum=0.5, maximum=2, value=1, label="Decoding temperature"), | |
gr.Slider( | |
minimum=5, | |
maximum=400, | |
value=100, | |
label="Maximal sequence length", | |
step=1, | |
), | |
gr.Slider( | |
minimum=1, maximum=50, value=10, label="Number of samples", step=1 | |
), | |
gr.Slider(minimum=1, maximum=8, value=4.0, label="Limit"), | |
gr.Slider(minimum=1, maximum=32, value=8, label="Number of steps", step=1), | |
gr.Slider( | |
minimum=1, maximum=32, value=4, label="Number of initial points", step=1 | |
), | |
gr.Slider( | |
minimum=1, | |
maximum=4, | |
value=1, | |
label="Number of optimization rounds", | |
step=1, | |
), | |
gr.Slider(minimum=0.01, maximum=1, value=0.1, label="Sampling variance"), | |
gr.Slider( | |
minimum=1, | |
maximum=10, | |
value=1, | |
label="Samples used for evaluation", | |
step=1, | |
), | |
gr.Slider( | |
minimum=1, | |
maximum=64, | |
value=4, | |
label="Maximum number of sampling steps", | |
step=1, | |
), | |
gr.Number(value=42, label="Seed", precision=0), | |
], | |
outputs=gr.HTML(label="Output"), | |
article=article, | |
description=description, | |
examples=examples.values.tolist(), | |
) | |
demo.launch(debug=True, show_error=True) | |