blackhole_models_pli

Running

App Files Files Community

blackhole_models_pli / inference_app.py

OleinikovasV

Update inference_app.py

063629c verified 5 months ago

raw

history blame

4.33 kB


	import time

	import gradio as gr

	from gradio_molecule3d import Molecule3D

	import numpy as np
	from biotite.structure.io.pdb import PDBFile
	from rdkit import Chem
	from rdkit.Chem import AllChem


	def generate_input_conformer(
	ligand_smiles: str,
	addHs: bool = False,
	minimize_maxIters: int = -1,
	) -> Chem.Mol:

	_mol = Chem.MolFromSmiles(ligand_smiles)
	# need to add Hs to generate sensible conformers
	_mol = Chem.AddHs(_mol)

	# try embedding molecule using ETKDGv2 (default)
	confid = AllChem.EmbedMolecule(
	_mol,
	useRandomCoords=True,
	useBasicKnowledge=True,
	maxAttempts=100,
	randomSeed=42,
	)
	if confid != -1:
	if minimize_maxIters > 0:
	# molecule successfully embedded - minimize
	success = AllChem.MMFFOptimizeMolecule(_mol, maxIters=minimize_maxIters)
	# 0 if the optimization converged,
	# -1 if the forcefield could not be set up,
	# 1 if more iterations are required.
	if success == 1:
	# extend optimization to double the steps (extends by the same amount)
	AllChem.MMFFOptimizeMolecule(_mol, maxIters=minimize_maxIters)
	else:
	# this means EmbedMolecule failed
	# try less optimal approach
	confid = AllChem.EmbedMolecule(
	_mol,
	useRandomCoords=True,
	useBasicKnowledge=False,
	maxAttempts=100,
	randomSeed=42,
	)
	return _mol


	def set_protein_to_new_coord(input_pdb_file, new_coord, output_file):
	structure = PDBFile.read(input_pdb_file).get_structure()
	structure.coord = np.array([new_coord] * len(structure.coord))
	file = PDBFile()
	file.set_structure(structure)
	file.write(output_file)


	def predict(input_sequence, input_ligand, input_msa, input_protein):
	start_time = time.time()

	# Do inference here
	mol = generate_input_conformer(input_ligand, minimize_maxIters=100)

	molwriter = Chem.SDWriter("test_docking_pose.sdf")
	molwriter.write(mol)

	mol_coords = mol.GetConformer().GetPositions()
	# new_coord = [0, 0, 0]
	new_coord = np.mean(mol_coords, axis=1)
	output_file = "test_out.pdb"
	set_protein_to_new_coord(input_protein, new_coord, output_file)

	# return an output pdb file with the protein and ligand with resname LIG or UNK.
	# also return any metrics you want to log, metrics will not be used for evaluation but might be useful for users
	# metrics = {"mean_plddt": 80, "binding_affinity": -2}
	metrics = {}

	end_time = time.time()
	run_time = end_time - start_time
	return ["test_out.pdb", "test_docking_pose.sdf"], metrics, run_time

	with gr.Blocks() as app:

	gr.Markdown("# Template for inference")

	gr.Markdown("Title, description, and other information about the model")
	with gr.Row():
	input_sequence = gr.Textbox(lines=3, label="Input Protein sequence (FASTA)")
	input_ligand = gr.Textbox(lines=3, label="Input ligand SMILES")
	with gr.Row():
	input_msa = gr.File(label="Input Protein MSA (A3M)")
	input_protein = gr.File(label="Input protein monomer")


	# define any options here

	# for automated inference the default options are used
	# slider_option = gr.Slider(0,10, label="Slider Option")
	# checkbox_option = gr.Checkbox(label="Checkbox Option")
	# dropdown_option = gr.Dropdown(["Option 1", "Option 2", "Option 3"], label="Radio Option")

	btn = gr.Button("Run Inference")

	gr.Examples(
	[
	[
	"",
	"COc1ccc(cc1)n2c3c(c(n2)C(=O)N)CCN(C3=O)c4ccc(cc4)N5CCCCC5=O",
	"empty_file.a3m",
	"test_out.pdb"
	],
	],
	[input_sequence, input_ligand, input_msa, input_protein],
	)
	reps = [
	{
	"model": 0,
	"style": "cartoon",
	"color": "whiteCarbon",
	},
	{
	"model": 1,
	"style": "stick",
	"color": "greenCarbon",
	}

	]

	out = Molecule3D(reps=reps)
	metrics = gr.JSON(label="Metrics")
	run_time = gr.Textbox(label="Runtime")

	btn.click(predict, inputs=[input_sequence, input_ligand, input_msa, input_protein], outputs=[out, metrics, run_time])

	app.launch()