Spaces:
Running
on
L4
Running
on
L4
File size: 6,890 Bytes
1825dee 60fae37 85ad568 60fae37 375ee1a 93c23b2 573f2cc ea823ae 573f2cc 971a03f 93c23b2 573f2cc 5ff04d4 bff3320 8704b30 76c9d92 f07faaf 573f2cc 8704b30 573f2cc 8704b30 76c9d92 f07faaf 8704b30 573f2cc 8704b30 573f2cc 375ee1a 42f5e17 7d63449 b8836f0 93c23b2 b8836f0 60fae37 93c23b2 69b80eb 93c23b2 7d63449 1f2402d 7d63449 dee3f5c d1603c7 cd3e25c 9e26736 dee3f5c 8198e38 7d63449 60fae37 6bb464c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 |
import spaces
import gradio as gr
from gradio_molecule3d import Molecule3D
from gradio_cofoldinginput import CofoldingInput
import os
import re
import urllib.request
import yaml
#from msa import run_mmseqs2
CCD_URL = "https://huggingface.co/boltz-community/boltz-1/resolve/main/ccd.pkl"
MODEL_URL = "https://huggingface.co/boltz-community/boltz-1/resolve/main/boltz1.ckpt"
cache = "/home/user/.boltz"
os.makedirs(cache)
ccd = f"{cache}/ccd.pkl"
if not os.path.exists(ccd):
print(
f"Downloading the CCD dictionary to {ccd}. You may "
)
urllib.request.urlretrieve(CCD_URL, str(ccd))
# Download model
model =f"{cache}/boltz1.ckpt"
if not os.path.exists(model):
print(
f"Downloading the model weights to {model}"
)
urllib.request.urlretrieve(MODEL_URL, str(model))
@spaces.GPU(duration=120)
def predict(jobname, inputs, recycling_steps, sampling_steps, diffusion_samples):
try:
jobname = re.sub(r'[<>:"/\\|?*]', '_', jobname)
if jobname == "":
raise gr.Error("Job name empty or only invalid characters. Choose a plaintext name.")
os.makedirs(jobname, exist_ok=True)
"""format Gradio Component:
# {"chains": [
# {
# "class": "DNA",
# "sequence": "ATGCGT",
# "chain": "A"
# }
# ], "covMods":[]
# }
"""
#sequences_for_msa = []
output = {
"sequences": []
}
representations = []
for chain in inputs["chains"]:
entity_type = chain["class"].lower()
sequence_data = {
entity_type: {
"id": chain["chain"],
}
}
if entity_type in ["protein", "dna", "rna"]:
sequence_data[entity_type]["sequence"] = chain["sequence"]
if entity_type == "protein":
#sequences_for_msa.append(chain["sequence"])
if chain["msa"] == False:
sequence_data[entity_type]["msa"] = f"empty"
representations.append({"model":0, "chain":chain["chain"], "style":"cartoon"})
if entity_type == "ligand":
if "sdf" in chain.keys():
if chain["sdf"]!="" and chain["name"]=="":
raise gr.Error("Sorry, no SDF support yet.")
if "name" in chain.keys() and len(chain["name"])==3:
sequence_data[entity_type]["ccd"] = chain["name"]
elif "smiles" in chain.keys():
sequence_data[entity_type]["smiles"] = chain["smiles"]
else:
raise gr.Error("No ligand found, or not in the right format. CCD codes have 3 letters")
representations.append({"model":0, "chain":chain["chain"], "style":"stick", "color":"greenCarbon"})
if len(inputs["covMods"])>0:
raise gr.Error("Sorry, covMods not supported yet. Coming soon. ")
output["sequences"].append(sequence_data)
# Convert the output to YAML
yaml_file_path = f"{jobname}/{jobname}.yaml"
# Write the YAML output to the file
with open(yaml_file_path, "w") as file:
yaml.dump(output, file, sort_keys=False, default_flow_style=False)
os.system(f"cat {yaml_file_path}")
#a3m_lines_mmseqs2 = run_mmseqs2(
# sequences_for_msa,
# f"./{jobname}",
# use_templates=False,
# )
#with open(f"{jobname}/msa.a3m", "w+") as fp:
# fp.writelines(a3m_lines_mmseqs2)
os.system(f"boltz predict {jobname}/{jobname}.yaml --use_msa_server --out_dir {jobname} --recycling_steps {recycling_steps} --sampling_steps {sampling_steps} --diffusion_samples {diffusion_samples} --override --output_format pdb")
print(os.listdir(jobname))
print(os.listdir(f"{jobname}/boltz_results_{jobname}/predictions/{jobname}/"))
return Molecule3D(f"{jobname}/boltz_results_{jobname}/predictions/{jobname}/{jobname}_model_0.pdb", label="Output", reps=representations)
except Exception as e:
raise gr.Error(f"failed with error:{e}")
with gr.Blocks() as blocks:
gr.Markdown("# Boltz-1")
gr.Markdown("""Open GUI for running [Boltz-1 model](https://github.com/jwohlwend/boltz/) <br>
Key components:
- MMSeqs2 Webserver [Mirdita et al.](https://www.nature.com/articles/s41592-022-01488-1)
- Boltz-1 Model [Wohlwend et al.](https://github.com/jwohlwend/boltz/)
- Gradio Custom Components [Molecule3D](https://huggingface.co/spaces/simonduerr/gradio_molecule3d)/[Cofolding Input](https://huggingface.co/spaces/simonduerr/gradio_cofoldinginput) by myself
- [3dmol.js Rego & Koes](https://academic.oup.com/bioinformatics/article/31/8/1322/213186)
Note: This is an alpha: Some things like covalent modifications or using sdf files don't work yet. You can a Docker image of this on your local infrastructure easily using:
`docker run -it -p 7860:7860 --platform=linux/amd64 --gpus all registry.hf.space/simonduerr-boltz-1:latest python app.py`
""")
with gr.Tab("Main"):
jobname = gr.Textbox(label="Jobname")
inp = CofoldingInput(label="Input")
out = Molecule3D(label="Output")
with gr.Tab("Settings"):
recycling_steps =gr.Slider(value=3, minimum=0, label="Recycling steps")
sampling_steps = gr.Slider(value=200, minimum=0, label="Sampling steps")
diffusion_samples = gr.Slider(value=1, label="Diffusion samples")
gr.Examples([
["TOP7",{"chains": [{"class": "protein", "msa":True,"sequence": "MGDIQVQVNIDDNGKNFDYTYTVTTESELQKVLNELMDYIKKQGAKRVRISITARTKKEAEKFAAILIKVFAELGYNDINVTFDGDTVTVEGQLEGGSLEHHHHHH","chain": "A"}], "covMods":[]}],
["ApixacabanBinderSmiles", {"chains": [{"class": "protein", "msa":True,"sequence": "SVKSEYAEAAAVGQEAVAVFNTMKAAFQNGDKEAVAQYLARLASLYTRHEELLNRILEKARREGNKEAVTLMNEFTATFQTGKSIFNAMVAAFKNGDDDSFESYLQALEKVTAKGETLADQIAKAL","chain": "A"}, {"class":"ligand", "smiles":"COc1ccc(cc1)n2c3c(c(n2)C(=O)N)CCN(C3=O)c4ccc(cc4)N5CCCCC5=O", "sdf":"","name":"","chain": "B"}], "covMods":[]}],
["ApixacabanBinderCCD", {"chains": [{"class": "protein","msa":True,"sequence": "SVKSEYAEAAAVGQEAVAVFNTMKAAFQNGDKEAVAQYLARLASLYTRHEELLNRILEKARREGNKEAVTLMNEFTATFQTGKSIFNAMVAAFKNGDDDSFESYLQALEKVTAKGETLADQIAKAL","chain": "A"}, {"class":"ligand", "name":"GG2", "sdf":"","chain": "B"}], "covMods":[]}]
],
inputs = [jobname, inp]
)
btn = gr.Button("predict")
btn.click(fn=predict, inputs=[jobname,inp, recycling_steps, sampling_steps, diffusion_samples], outputs=[out], api_name="predict")
blocks.launch(ssr_mode=False) |