Spaces:
Running
on
L4
Running
on
L4
File size: 5,869 Bytes
1825dee 60fae37 85ad568 60fae37 375ee1a 93c23b2 573f2cc ea823ae 573f2cc 93c23b2 573f2cc 5ff04d4 bff3320 8704b30 76c9d92 f07faaf 573f2cc 8704b30 573f2cc 8704b30 76c9d92 f07faaf 8704b30 573f2cc 8704b30 573f2cc 375ee1a 42f5e17 7d63449 93c23b2 2e2af5c 93c23b2 9358437 69b80eb 93c23b2 69b80eb 93c23b2 f5e0235 93c23b2 f5e0235 69b80eb 93c23b2 4d3557b 93c23b2 69b80eb 60fae37 93c23b2 69b80eb 93c23b2 7d63449 1f2402d 7d63449 dee3f5c 23d3986 dee3f5c 8198e38 7d63449 60fae37 6bb464c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 |
import spaces
import gradio as gr
from gradio_molecule3d import Molecule3D
from gradio_cofoldinginput import CofoldingInput
import os
import re
import urllib.request
import yaml
from msa import run_mmseqs2
CCD_URL = "https://huggingface.co/boltz-community/boltz-1/resolve/main/ccd.pkl"
MODEL_URL = "https://huggingface.co/boltz-community/boltz-1/resolve/main/boltz1.ckpt"
cache = "/home/user/.boltz"
os.makedirs(cache)
ccd = f"{cache}/ccd.pkl"
if not os.path.exists(ccd):
print(
f"Downloading the CCD dictionary to {ccd}. You may "
)
urllib.request.urlretrieve(CCD_URL, str(ccd))
# Download model
model =f"{cache}/boltz1.ckpt"
if not os.path.exists(model):
print(
f"Downloading the model weights to {model}"
)
urllib.request.urlretrieve(MODEL_URL, str(model))
@spaces.GPU(duration=120)
def predict(jobname, inputs, recycling_steps, sampling_steps, diffusion_samples):
jobname = re.sub(r'[<>:"/\\|?*]', '_', jobname)
if jobname == "":
raise gr.Error("Job name empty or only invalid characters. Choose a plaintext name.")
os.makedirs(jobname, exist_ok=True)
"""format Gradio Component:
# {"chains": [
# {
# "class": "DNA",
# "sequence": "ATGCGT",
# "chain": "A"
# }
# ], "covMods":[]
# }
"""
sequences_for_msa = []
output = {
"sequences": []
}
representations = []
for chain in inputs["chains"]:
entity_type = chain["class"].lower()
sequence_data = {
entity_type: {
"id": chain["chain"],
}
}
if entity_type in ["protein", "dna", "rna"]:
sequence_data[entity_type]["sequence"] = chain["sequence"]
if entity_type == "protein":
sequences_for_msa.append(chain["sequence"])
sequence_data[entity_type]["msa"] = f"{jobname}/msa.a3m"
representations.append({"model":0, "chain":chain["chain"], "style":"cartoon"})
if entity_type == "ligand":
if "sdf" in chain.keys():
raise gr.Error("Sorry no SDF support yet")
if "name" in chain.keys():
sequence_data[entity_type]["ccd"] = chain["name"]
if "smiles" in chain.keys():
sequence_data[entity_type]["smiles"] = chain["smiles"]
representations.append({"model":0, "chain":chain["chain"], "style":"stick", "color":"greenCarbon"})
if len(inputs["covMods"])>0:
raise gr.Error("Sorry, covMods not supported yet. Coming soon. ")
output["sequences"].append(sequence_data)
# Convert the output to YAML
yaml_file_path = f"{jobname}/{jobname}.yaml"
# Write the YAML output to the file
with open(yaml_file_path, "w") as file:
yaml.dump(output, file, sort_keys=False, default_flow_style=False)
os.system(f"cat {yaml_file_path}")
a3m_lines_mmseqs2 = run_mmseqs2(
sequences_for_msa,
f"./{jobname}",
use_templates=False,
)
with open(f"{jobname}/msa.a3m", "w+") as fp:
fp.writelines(a3m_lines_mmseqs2)
os.system(f"boltz predict {jobname}/{jobname}.yaml --out_dir {jobname} --recycling_steps {recycling_steps} --sampling_steps {sampling_steps} --diffusion_samples {diffusion_samples} --override --output_format pdb")
print(os.listdir(jobname))
print(os.listdir(f"{jobname}/boltz_results_{jobname}/predictions/{jobname}/"))
return Molecule3D(f"{jobname}/boltz_results_{jobname}/predictions/{jobname}/{jobname}_model_0.pdb", label="Output", reps=representations)
with gr.Blocks() as blocks:
gr.Markdown("# Boltz-1")
gr.Markdown("""Open GUI for running [Boltz-1 model](https://github.com/jwohlwend/boltz/) <br>
Key components:
- MMSeqs2 Webserver [Mirdita et al.](https://www.nature.com/articles/s41592-022-01488-1)
- Boltz-1 Model [Wohlwend et al.](https://github.com/jwohlwend/boltz/)
- Gradio Custom Components [Molecule3D](https://huggingface.co/spaces/simonduerr/gradio_molecule3d)/[Cofolding Input](https://huggingface.co/spaces/simonduerr/gradio_cofoldinginput) by myself
- [3dmol.js Rego & Koes](https://academic.oup.com/bioinformatics/article/31/8/1322/213186)
Note: This is an alpha: Some things like covalent modifications or using sdf files don't work yet. You can a Docker image of this on your local infrastructure easily using:
`docker run -it -p 7860:7860 --platform=linux/amd64 --gpus all registry.hf.space/simonduerr-boltz-1:latest python app.py`
""")
with gr.Tab("Main"):
jobname = gr.Textbox(label="Jobname")
inp = CofoldingInput(label="Input")
out = Molecule3D(label="Output")
with gr.Tab("Settings"):
recycling_steps =gr.Slider(value=3, minimum=0, label="Recycling steps")
sampling_steps = gr.Slider(value=200, minimum=0, label="Sampling steps")
diffusion_samples = gr.Slider(value=1, label="Diffusion samples")
gr.Examples([
["TOP7",{"chains": [{"class": "protein","sequence": "MGDIQVQVNIDDNGKNFDYTYTVTTESELQKVLNELMDYIKKQGAKRVRISITARTKKEAEKFAAILIKVFAELGYNDINVTFDGDTVTVEGQLEGGSLEHHHHHH","chain": "A"}], "covMods":[]}],
["ApixacabanBinder", {"chains": [{"class": "protein","sequence": "SVKSEYAEAAAVGQEAVAVFNTMKAAFQNGDKEAVAQYLARLASLYTRHEELLNRILEKARREGNKEAVTLMNEFTATFQTGKSIFNAMVAAFKNGDDDSFESYLQALEKVTAKGETLADQIAKAL","chain": "A"}, {"class":"ligand", "smiles":"COc1ccc(cc1)n2c3c(c(n2)C(=O)N)CCN(C3=O)c4ccc(cc4)N5CCCCC5=O", "chain": "A"}], "covMods":[]}]
],
inputs = [jobname, inp]
)
btn = gr.Button("predict")
btn.click(fn=predict, inputs=[jobname,inp, recycling_steps, sampling_steps, diffusion_samples], outputs=[out], api_name="predict")
blocks.launch(ssr_mode=False) |