Spaces:
Running
Running
import sys | |
import os.path | |
import time | |
from random import shuffle | |
import numpy as np | |
import pdbfixer | |
import openmm as mm | |
import openmm.app as mm_app | |
import openmm.unit as mm_unit | |
from openmm import CustomExternalForce | |
from openmm.app import Modeller | |
from openmmforcefields.generators import SystemGenerator | |
from openff.toolkit import Molecule | |
from openff.toolkit.utils.exceptions import UndefinedStereochemistryError, RadicalsNotSupportedError | |
import mdtraj | |
from rdkit import Chem | |
from rdkit.Chem import AllChem | |
from rdkit.Geometry import Point3D | |
import Bio.PDB | |
from Bio.SVDSuperimposer import SVDSuperimposer | |
# -- Relax protein and ligand. Code adapted from: | |
# https://github.com/patrickbryant1/Umol/blob/f7cd2b4de09b4e7cc1b68606791dd1cc81deeebc/src/relax/openmm_relax.py | |
def fix_pdb(pdb_path, hydrogen_added_pdb_path): | |
"""Add hydrogens to the PDB file | |
""" | |
fixer = pdbfixer.PDBFixer(pdb_path) | |
fixer.findMissingResidues() | |
fixer.findNonstandardResidues() | |
fixer.replaceNonstandardResidues() | |
fixer.findMissingAtoms() | |
fixer.addMissingAtoms() | |
fixer.addMissingHydrogens(7.0) | |
mm_app.PDBFile.writeFile(fixer.topology, fixer.positions, open(hydrogen_added_pdb_path, 'w')) | |
return fixer.topology, fixer.positions | |
def minimize_energy(topology, system, positions, output_pdb_path): | |
'''Function that minimizes energy, given topology, OpenMM system, and positions ''' | |
# Use a Brownian Integrator | |
integrator = mm.BrownianIntegrator( | |
100 * mm.unit.kelvin, | |
100. / mm.unit.picoseconds, | |
2.0 * mm.unit.femtoseconds | |
) | |
simulation = mm.app.Simulation(topology, system, integrator) | |
# Initialize the DCDReporter | |
reportInterval = 100 # Adjust this value as needed | |
reporter = mdtraj.reporters.DCDReporter('positions.dcd', reportInterval) | |
# Add the reporter to the simulation | |
simulation.reporters.append(reporter) | |
simulation.context.setPositions(positions) | |
simulation.minimizeEnergy(1, 1000) | |
# Save positions | |
minpositions = simulation.context.getState(getPositions=True).getPositions() | |
mm_app.PDBFile.writeFile(topology, minpositions, open(output_pdb_path, "w")) | |
reporter.close() | |
return topology, minpositions | |
def add_restraints(system, topology, positions, restraint_type): | |
# Code adapted from https://gist.github.com/peastman/ad8cda653242d731d75e18c836b2a3a5 | |
restraint = CustomExternalForce('k*periodicdistance(x, y, z, x0, y0, z0)^2') | |
system.addForce(restraint) | |
restraint.addGlobalParameter('k', 100.0*mm_unit.kilojoules_per_mole/mm_unit.nanometer**2) | |
restraint.addPerParticleParameter('x0') | |
restraint.addPerParticleParameter('y0') | |
restraint.addPerParticleParameter('z0') | |
for atom in topology.atoms(): | |
if restraint_type == 'protein': | |
if 'x' not in atom.name: | |
restraint.addParticle(atom.index, positions[atom.index]) | |
elif restraint_type == 'CA+ligand': | |
if ('x' in atom.name) or (atom.name == "CA"): | |
restraint.addParticle(atom.index, positions[atom.index]) | |
return system | |
def create_joined_relaxed(protein_pdb_path: str, ligand_sdf_path: str, hydorgen_added_protein_pdb_path: str, | |
relaxed_joined_path: str): | |
restraint_type = 'CA+ligand' | |
start_time = time.time() | |
print('Reading ligand') | |
try: | |
ligand_mol = Molecule.from_file(ligand_sdf_path) | |
# Check for undefined stereochemistry, allow undefined stereochemistry to be loaded | |
except UndefinedStereochemistryError: | |
print('Undefined Stereochemistry Error found! Trying with undefined stereo flag True') | |
ligand_mol = Molecule.from_file(ligand_sdf_path, allow_undefined_stereo=True) | |
# Check for radicals -- break out of script if radical is encountered | |
except RadicalsNotSupportedError: | |
print('OpenFF does not currently support radicals -- use unrelaxed structure') | |
sys.exit() | |
# Assigning partial charges first because the default method (am1bcc) does not work | |
ligand_mol.assign_partial_charges(partial_charge_method='gasteiger') | |
# Read protein PDB and add hydrogens | |
protein_topology, protein_positions = fix_pdb(protein_pdb_path, hydorgen_added_protein_pdb_path) | |
print('Added all atoms...') | |
modeller = Modeller(protein_topology, protein_positions) | |
print('System has %d atoms' % modeller.topology.getNumAtoms()) | |
print('Adding ligand...') | |
lig_top = ligand_mol.to_topology() | |
modeller.add(lig_top.to_openmm(), lig_top.get_positions().to_openmm()) | |
print('System has %d atoms' % modeller.topology.getNumAtoms()) | |
print('Preparing system') | |
# Initialize a SystemGenerator using the GAFF for the ligand and implicit water. | |
# forcefield_kwargs = {'constraints': mm_app.HBonds, 'rigidWater': True, 'removeCMMotion': False, | |
# 'hydrogenMass': 4*mm_unit.amu } | |
system_generator = SystemGenerator( | |
forcefields=['amber14-all.xml', 'implicit/gbn2.xml'], | |
small_molecule_forcefield='gaff-2.11', | |
molecules=[ligand_mol], | |
# forcefield_kwargs=forcefield_kwargs | |
) | |
system = system_generator.create_system(modeller.topology, molecules=ligand_mol) | |
print('Adding restraints on protein CAs and ligand atoms') | |
system = add_restraints(system, modeller.topology, modeller.positions, restraint_type=restraint_type) | |
minimize_energy(modeller.topology, system, modeller.positions, relaxed_joined_path) | |
print(f'Time taken for relax calculation is {time.time() - start_time:.1f} seconds') | |
# -- Fix ligand changed structure. Code adapted from: | |
# https://github.com/patrickbryant1/Umol/blob/f7cd2b4de09b4e7cc1b68606791dd1cc81deeebc/src/relax/align_ligand_conformer.py | |
def generate_best_conformer(pred_coords, ligand_smiles, max_confs=100): | |
"""Generate conformers and compare the coords with the predicted atom positions | |
Generating with constraints doesn't seem to work. | |
cids = Chem.rdDistGeom.EmbedMultipleConfs(m,max_confs,ps) | |
if len([x for x in m.GetConformers()])<1: | |
print('Could not generate conformer with constraints') | |
""" | |
# Generate conformers | |
m = Chem.AddHs(Chem.MolFromSmiles(ligand_smiles)) | |
# Embed in 3D to get distance matrix | |
AllChem.EmbedMolecule(m, maxAttempts=500) | |
bounds = AllChem.Get3DDistanceMatrix(m) | |
# Get pred distance matrix | |
pred_dmat = np.sqrt(1e-10 + np.sum((pred_coords[:, None] - pred_coords[None, :]) ** 2 ,axis=-1)) | |
# Go through the atom types and add the constraints if not H | |
# The order here will be the same as for the pred ligand as the smiles are identical | |
ai, mi = 0, 0 | |
bounds_mapping = {} | |
for atom in m.GetAtoms(): | |
if atom.GetSymbol() != 'H': | |
bounds_mapping[ai] = mi | |
ai += 1 | |
mi += 1 | |
# Assign available pred bound atoms | |
bounds_keys = [*bounds_mapping.keys()] | |
for i in range(len(bounds_keys)): | |
key_i = bounds_keys[i] | |
for j in range(i+1, len(bounds_keys)): | |
key_j = bounds_keys[j] | |
try: | |
bounds[bounds_mapping[key_i], bounds_mapping[key_j]] = pred_dmat[i, j] | |
bounds[bounds_mapping[key_j], bounds_mapping[key_i]] = pred_dmat[j, i] | |
except: | |
continue | |
# Now generate conformers using the bounds | |
ps = Chem.rdDistGeom.ETKDGv3() | |
ps.randomSeed = 0xf00d | |
ps.SetBoundsMat(bounds) | |
cids = Chem.rdDistGeom.EmbedMultipleConfs(m, max_confs) | |
# Get all conformer dmats | |
nonH_inds = [*bounds_mapping.values()] | |
conf_errs = [] | |
for conf in m.GetConformers(): | |
pos = conf.GetPositions() | |
nonH_pos = pos[nonH_inds] | |
conf_dmat = np.sqrt(1e-10 + np.sum((nonH_pos[:,None]-nonH_pos[None,:])**2,axis=-1)) | |
err = np.mean(np.sqrt(1e-10 + (conf_dmat-pred_dmat)**2)) | |
conf_errs.append(err) | |
# Get the best | |
best_conf_id = np.argmin(conf_errs) | |
best_conf_err = conf_errs[best_conf_id] | |
best_conf = [x for x in m.GetConformers()][best_conf_id] | |
best_conf_pos = best_conf.GetPositions() | |
return best_conf, best_conf_pos, best_conf_err, [atom.GetSymbol() for atom in m.GetAtoms()], nonH_inds, m, best_conf_id | |
def align_coords_transform(pred_pos, conf_pos, nonH_inds): | |
"""Align the predicted and conformer positions | |
""" | |
sup = SVDSuperimposer() | |
sup.set(pred_pos, conf_pos[nonH_inds]) # (reference_coords, coords) | |
sup.run() | |
rot, tran = sup.get_rotran() | |
# Rotate coords from new chain to its new relative position/orientation | |
tr_coords = np.dot(conf_pos, rot) + tran | |
return tr_coords | |
def write_sdf(mol, conf, aligned_conf_pos, best_conf_id, outname): | |
for i in range(mol.GetNumAtoms()): | |
x, y, z = aligned_conf_pos[i] | |
conf.SetAtomPosition(i, Point3D(x, y, z)) | |
writer = Chem.SDWriter(outname) | |
writer.write(mol, confId=int(best_conf_id)) | |
# Main function | |
def relax_complex(protein_pdb_path: str, ligand_sdf_path: str, relaxed_protein_path: str, relaxed_ligand_path: str): | |
hydorgen_added_protein_pdb_path = protein_pdb_path + "_hydrogen_added.pdb" | |
relaxed_joined_path = protein_pdb_path + "_joined_relaxed.pdb" | |
create_joined_relaxed(protein_pdb_path, ligand_sdf_path, hydorgen_added_protein_pdb_path, relaxed_joined_path) | |
parser = Bio.PDB.PDBParser(QUIET=True) | |
joined_structure = next(iter(parser.get_structure('', relaxed_joined_path))) | |
# save the relaxed protein | |
io = Bio.PDB.PDBIO() | |
io.set_structure(joined_structure["A"]) | |
io.save(relaxed_protein_path) | |
relaxed_ligand_coords = np.array([atom.get_coord() for atom in joined_structure["B"].get_atoms() | |
if atom.get_id()[0] != "H"]) | |
original_ligand = Chem.SDMolSupplier(ligand_sdf_path)[0] | |
ligand_smiles = Chem.MolToSmiles(original_ligand) | |
best_conf, best_conf_pos, best_conf_err, atoms, nonH_inds, mol, best_conf_id = generate_best_conformer( | |
relaxed_ligand_coords, ligand_smiles, max_confs=100 | |
) | |
aligned_conf_pos = align_coords_transform(relaxed_ligand_coords, best_conf_pos, nonH_inds) | |
write_sdf(mol, best_conf, aligned_conf_pos, best_conf_id, relaxed_ligand_path) | |
def relax_folder(folder_path: str): | |
all_jobnames = [] | |
filenames = os.listdir(folder_path) | |
shuffle(filenames) | |
for filename in filenames: | |
if filename.endswith("_predicted_protein.pdb"): | |
jobname = filename.split("_predicted_protein.pdb")[0] | |
ligand_path = os.path.join(folder_path, jobname + "_predicted_ligand_0.sdf") | |
if not os.path.exists(ligand_path): | |
continue | |
all_jobnames.append(jobname) | |
success = 0 | |
for jobname in all_jobnames: | |
protein_pdb_path = os.path.join(folder_path, jobname + "_predicted_protein.pdb") | |
ligand_sdf_path = os.path.join(folder_path, jobname + "_predicted_ligand_0.sdf") | |
relaxed_protein_path = os.path.join(folder_path, jobname + "_protein_relaxed.pdb") | |
relaxed_ligand_path = os.path.join(folder_path, jobname + "_ligand_relaxed.sdf") | |
if os.path.exists(relaxed_protein_path) and os.path.exists(relaxed_ligand_path): | |
print("Already has relaxed", jobname) | |
success += 1 | |
continue | |
print("Relaxing", jobname) | |
try: | |
relax_complex(protein_pdb_path, ligand_sdf_path, relaxed_protein_path, relaxed_ligand_path) | |
success += 1 | |
except Exception as e: | |
print("Failed to relax", jobname, e) | |
print(f"Relaxed {success}/{len(all_jobnames)}") | |
if __name__ == "__main__": | |
relax_folder(os.path.abspath(sys.argv[1])) | |