import numpy as np from rdkit import Chem import os import sys import copy import re from typing import List, Any from indigo import * indigo = Indigo() import rdkit from rdkit import RDLogger RDLogger.DisableLog('rdApp.*') print('rdkit version:' + rdkit.__version__) BOND_TYPES = [None, Chem.rdchem.BondType.SINGLE, Chem.rdchem.BondType.DOUBLE, \ Chem.rdchem.BondType.TRIPLE, Chem.rdchem.BondType.AROMATIC] BOND_FLOAT_TO_TYPE = { 0.0: BOND_TYPES[0], 1.0: BOND_TYPES[1], 2.0: BOND_TYPES[2], 3.0: BOND_TYPES[3], 1.5: BOND_TYPES[4],} def get_bond_info(mol: Chem.Mol): """Get information on bonds in the molecule. Parameters ---------- mol: Chem.Mol Molecule """ if mol is None: return {} bond_info = {} for bond in mol.GetBonds(): a_start = bond.GetBeginAtom().GetAtomMapNum() a_end = bond.GetEndAtom().GetAtomMapNum() key_pair = sorted([a_start, a_end]) bond_info[tuple(key_pair)] = [bond.GetBondTypeAsDouble(), bond.GetIdx()] return bond_info def map_reac_and_frag(reac_mols: List[Chem.Mol], frag_mols: List[Chem.Mol]): """Aligns reactant and fragment mols by computing atom map overlaps. Parameters ---------- reac_mols: List[Chem.Mol], List of reactant mols frag_mols: List[Chem.Mol], List of fragment mols """ if len(reac_mols) != len(frag_mols): return reac_mols, frag_mols reac_maps = [[atom.GetAtomMapNum() for atom in mol.GetAtoms()] for mol in reac_mols] frag_maps = [[atom.GetAtomMapNum() for atom in mol.GetAtoms()] for mol in frag_mols] overlaps = {i: [] for i in range(len(frag_mols))} for i, fmap in enumerate(frag_maps): overlaps[i].extend([len(set(fmap).intersection(set(rmap))) for rmap in reac_maps]) overlaps[i] = overlaps[i].index(max(overlaps[i])) new_frag = [Chem.Mol(mol) for mol in frag_mols] new_reac = [Chem.Mol(reac_mols[overlaps[i]]) for i in overlaps] return new_reac, new_frag def remove_s_H(frag_mol): while True: idx = '' for atom in frag_mol.GetAtoms(): if atom.GetAtomicNum() == 1 and atom.GetDegree() == 0: idx= atom.GetIdx() if idx != '' : edit_mol = Chem.RWMol(frag_mol) edit_mol.RemoveAtom(idx) frag_mol = edit_mol.GetMol() else: break return frag_mol def apply_edits_to_mol_change(mol, edits): """Apply edits to molecular graph. Parameters ---------- mol: Chem.Mol, RDKit mol object edits: Iterable[str], Iterable of edits to apply. An edit is structured as a1:a2:b1:b2, where a1, a2 are atom maps of participating atoms and b1, b2 are previous and new bond orders. When a2 = 0, we update the hydrogen count. """ new_mol = Chem.RWMol(mol) amap = {atom.GetAtomMapNum(): atom.GetIdx() for atom in new_mol.GetAtoms()} for edit in edits: x, y, prev_bo, new_bo = edit.split(":") x, y = int(x), int(y) new_bo = float(new_bo) bond = new_mol.GetBondBetweenAtoms(amap[x],amap[y]) if new_bo > 0: if bond is not None: new_mol.RemoveBond(amap[x],amap[y]) new_mol.AddBond(amap[x],amap[y],BOND_FLOAT_TO_TYPE[new_bo]) atom_x,atom_y = new_mol.GetAtomWithIdx(amap[x]),new_mol.GetAtomWithIdx(amap[y]) try: atom_x.SetNumExplicitHs(int(atom_x.GetNumExplicitHs()+ float(prev_bo)-float(new_bo))) except: atom_x.SetNumExplicitHs(0) try: atom_y.SetNumExplicitHs(int(atom_y.GetNumExplicitHs()+ float(prev_bo)-float(new_bo))) except: atom_y.SetNumExplicitHs(0) pred_mol = new_mol.GetMol() return pred_mol def apply_edits_to_mol_break(mol, edits): """Apply edits to molecular graph. Parameters ---------- mol: Chem.Mol, RDKit mol object edits: Iterable[str], Iterable of edits to apply. An edit is structured as a1:a2:b1:b2, where a1, a2 are atom maps of participating atoms and b1, b2 are previous and new bond orders. When a2 = 0, we update the hydrogen count. """ mol = Chem.AddHs(mol) Chem.Kekulize(mol) for atom in mol.GetAtoms(): atom.SetNoImplicit(True) new_mol = Chem.RWMol(mol) amap = {atom.GetAtomMapNum(): atom.GetIdx() for atom in new_mol.GetAtoms()} for edit in edits: x, y, prev_bo, new_bo = edit.split(":") x, y = int(x), int(y) new_bo = float(new_bo) if y == 0: cent_atom = mol.GetAtomWithIdx(amap[x]) for neibor in cent_atom.GetNeighbors(): if neibor.GetAtomicNum() == 1: new_mol.RemoveBond(amap[x],neibor.GetIdx()) break else: pass elif y != 0: bond = new_mol.GetBondBetweenAtoms(amap[x],amap[y]) if bond is not None: new_mol.RemoveBond(amap[x],amap[y]) pred_mol = new_mol.GetMol() pred_mol = Chem.RemoveHs(pred_mol,sanitize = False) return pred_mol def find_reac_edit(frag_mols_1,reac_mols_1,core_edits): reac_mol_map_num = [i.GetAtomMapNum() for i in reac_mols_1.GetAtoms()] frag_mol_map_num = [i.GetAtomMapNum() for i in frag_mols_1.GetAtoms()] lg_map_num = [i for i in reac_mol_map_num if i not in frag_mol_map_num] attach_map_num = 0 reac_edit = [] core_edits = core_edits + [':'.join([i.split(':')[1],i.split(':')[0],i.split(':')[2],i.split(':')[3]]) for i in core_edits] for core_edit in core_edits: core_edit_ = core_edit.split(':') if float(core_edit_[3]) == 0 and int(core_edit_[0]) in frag_mol_map_num: attach_map_num = int(core_edit_[0]) elif float(core_edit_[2]) - float(core_edit_[3]) > 0 and int(core_edit_[0]) in frag_mol_map_num: attach_map_num = int(core_edit_[0]) else: continue if str(attach_map_num) != '0' and str(attach_map_num) != core_edit_[0]: continue frag_mols_1_amap = {atom.GetAtomMapNum(): atom.GetIdx() for atom in frag_mols_1.GetAtoms()} reac_mols_1_amap = {atom.GetAtomMapNum(): atom.GetIdx() for atom in reac_mols_1.GetAtoms()} frag_attach_H = frag_mols_1.GetAtomWithIdx(frag_mols_1_amap[attach_map_num]).GetNumExplicitHs() reac_attach_H = reac_mols_1.GetAtomWithIdx(reac_mols_1_amap[attach_map_num]).GetNumExplicitHs() frag_attach_charge = frag_mols_1.GetAtomWithIdx(frag_mols_1_amap[attach_map_num]).GetFormalCharge() reac_attach_charge = reac_mols_1.GetAtomWithIdx(reac_mols_1_amap[attach_map_num]).GetFormalCharge() if lg_map_num != []: for bond in reac_mols_1.GetBonds(): EndMapNum = bond.GetEndAtom().GetAtomMapNum() BeginMapNum = bond.GetBeginAtom().GetAtomMapNum() if (BeginMapNum == attach_map_num) and (EndMapNum in lg_map_num): reac_edit.append("{}:{}:{}:{}".format(BeginMapNum,EndMapNum,bond.GetBondTypeAsDouble(),0.0)) elif (EndMapNum == attach_map_num) and (BeginMapNum in lg_map_num): reac_edit.append("{}:{}:{}:{}".format(EndMapNum,BeginMapNum,bond.GetBondTypeAsDouble(),0.0)) elif lg_map_num == []: if Chem.MolToSmiles(reac_mols_1) == Chem.MolToSmiles(frag_mols_1): reac_edit.append("{}:{}:{}:{}".format(attach_map_num,0,0.0,0.0)) if (reac_attach_H - frag_attach_H) == 1 and (reac_attach_charge - frag_attach_charge) == 0: reac_edit.append("{}:{}:{}:{}".format(attach_map_num,0,1.0,0.0)) if (reac_attach_H - frag_attach_H) == 2 and (reac_attach_charge - frag_attach_charge) == 0: reac_edit.append("{}:{}:{}:{}".format(attach_map_num,0,2.0,0.0)) if (reac_attach_charge - frag_attach_charge) == -1: if "{}:{}:{}:{}".format(attach_map_num,0,0.0,-1.0) not in reac_edit: reac_edit.append("{}:{}:{}:{}".format(attach_map_num,0,0.0,-1.0)) if (reac_attach_charge - frag_attach_charge) == 1: if "{}:{}:{}:{}".format(attach_map_num,0,0.0,1.0) not in reac_edit: reac_edit.append("{}:{}:{}:{}".format(attach_map_num,0,0.0,1.0)) return reac_edit def correct_mol_1(mol,is_nitrine_c): mol = copy.deepcopy(mol) for atom in mol.GetAtoms(): if is_nitrine_c == True and atom.GetAtomicNum() == 7 and sum([i.GetBondTypeAsDouble() for i in atom.GetBonds()]) == 4 and 1.5 not in [i.GetBondTypeAsDouble() for i in atom.GetBonds()] and atom.GetFormalCharge()==0: #调整N的电荷 atom.SetFormalCharge(1) else: pass atom.SetNumRadicalElectrons(0) atom.SetIsAromatic(False) atom.SetNoImplicit(False) return mol def correct_mol(mol_,keep_map): mol = copy.deepcopy(mol_) atom_map_lis = [] idx_H_dic = {} for atom in mol.GetAtoms(): atom_map_lis.append(atom.GetAtomMapNum()) for atom in mol.GetAtoms(): if atom.GetAtomicNum() == 7 and sum([i.GetBondTypeAsDouble() for i in atom.GetBonds()]) == 4 and 1.5 not in [i.GetBondTypeAsDouble() for i in atom.GetBonds()] and atom.GetFormalCharge()==0: #调整N的电荷 pass elif atom.GetAtomicNum() == 15 and atom.GetExplicitValence() == 5 and 1.5 not in [i.GetBondTypeAsDouble() for i in atom.GetBonds()] and atom.GetFormalCharge()==0: #调整N的电荷 idx_H_dic[atom.GetIdx()] = atom.GetNumExplicitHs() else: pass atom.SetNumRadicalElectrons(0) atom.SetNoImplicit(False) atom.SetAtomMapNum(0) for atom in mol.GetAtoms(): atom.SetIsAromatic(False) temp = Chem.MolToMolBlock(mol,kekulize = True) mol = Chem.MolFromMolBlock(temp,removeHs = False,sanitize= False) if keep_map: for i in range(0,mol.GetNumAtoms()): mol.GetAtomWithIdx(i).SetAtomMapNum(atom_map_lis[i]) if i in idx_H_dic.keys(): mol.GetAtomWithIdx(i).SetNoImplicit(True) mol.GetAtomWithIdx(i).SetNumExplicitHs(idx_H_dic[i]) for i in range(0,mol.GetNumAtoms()): mol.GetAtomWithIdx(i).SetChiralTag(mol_.GetAtomWithIdx(i).GetChiralTag()) n_Chirals = Chem.FindMolChiralCenters(mol) return mol def get_atom_map_chai_dic(mol): dic = {} for idx,chiral in Chem.FindMolChiralCenters(mol): atom_map = mol.GetAtomWithIdx(idx).GetAtomMapNum() dic[atom_map] = chiral return dic def get_atom_map_stereo_dic(mol): map_a = {atom.GetIdx(): atom.GetAtomMapNum() for atom in mol.GetAtoms()} stereo_dic = {} for bond in mol.GetBonds(): b_map,e_map = map_a[bond.GetBeginAtomIdx()],map_a[bond.GetEndAtomIdx()] stereo_dic[tuple(sorted([b_map,e_map]))] = bond.GetStereo() return stereo_dic def cano_smiles_map(smiles): atom_map_lis = [] mol = Chem.MolFromSmiles(smiles,sanitize = False) for atom in mol.GetAtoms(): atom_map_lis.append(atom.GetAtomMapNum()) atom.SetAtomMapNum(0) smiles = Chem.MolToSmiles(mol,canonical = False,kekuleSmiles=True) mol = Chem.MolFromSmiles(smiles,sanitize = False) for atom in mol.GetAtoms(): atom.SetAtomMapNum(atom_map_lis[atom.GetIdx()]) smiles = Chem.MolToSmiles(mol,canonical = False,kekuleSmiles=True) return smiles def get_stereo_edit_mine(reac_mol,prod_mol): reac_map_a = {atom.GetIdx(): atom.GetAtomMapNum() for atom in reac_mol.GetAtoms()} prod_map_a = {atom.GetIdx(): atom.GetAtomMapNum() for atom in prod_mol.GetAtoms()} for atom in reac_mol.GetAtoms(): atom.SetAtomMapNum(0) r_rank = list(Chem.CanonicalRankAtoms(reac_mol, breakTies=False)) r_idx = [i for i in range(reac_mol.GetNumAtoms())] dic_idx_rank = dict(zip(r_idx,r_rank)) p_stereo_dic = {} for bond in prod_mol.GetBonds(): b_map,e_map = prod_map_a[bond.GetBeginAtomIdx()],prod_map_a[bond.GetEndAtomIdx()] p_stereo_dic[tuple(sorted([b_map,e_map]))] = bond.GetStereo() r_stereo_dic = {} for bond in reac_mol.GetBonds(): if bond.GetBondTypeAsDouble() == 2.0: b_atom,e_atom = bond.GetBeginAtom(),bond.GetEndAtom() b_neis = b_atom.GetNeighbors() b_neis = [i for i in b_neis if i.GetIdx() != e_atom.GetIdx()] b_neis_rank = [dic_idx_rank[i.GetIdx()] for i in b_neis] e_neis = e_atom.GetNeighbors() e_neis = [i for i in e_neis if i.GetIdx() != b_atom.GetIdx()] e_neis_rank = [dic_idx_rank[i.GetIdx()] for i in e_neis] b_neis_rank = b_neis_rank + ['H'] * (2 - len(b_neis_rank)) e_neis_rank = e_neis_rank + ['H'] * (2 - len(e_neis_rank)) if len(b_neis_rank) == len(set(b_neis_rank)) and len(e_neis_rank) == len(set(e_neis_rank)): b_map,e_map = reac_map_a[bond.GetBeginAtomIdx()],reac_map_a[bond.GetEndAtomIdx()] r_stereo_dic[tuple(sorted([b_map,e_map]))] = bond.GetStereo() else: pass stereo_edits = [] for atom_pair,stereo in r_stereo_dic.items(): if atom_pair in p_stereo_dic.keys() and stereo != p_stereo_dic[atom_pair]: if stereo == Chem.rdchem.BondStereo.STEREONONE: stereo = 'a' elif stereo == Chem.rdchem.BondStereo.STEREOE: stereo = 'e' elif stereo == Chem.rdchem.BondStereo.STEREOZ: stereo = 'z' stereo_edits.append('{}:{}:{}:{}'.format(atom_pair[0],atom_pair[1],0,stereo)) return stereo_edits def apply_stereo_change(prod_mol,stereo_edits): p_amap_idx = {atom.GetAtomMapNum(): atom.GetIdx() for atom in prod_mol.GetAtoms()} prod_mol = copy.deepcopy(prod_mol) prod_mol_t = copy.deepcopy(prod_mol) for stereo_edit in stereo_edits: b_map = int(stereo_edit.split(':')[0]) e_map = int(stereo_edit.split(':')[1]) b_n = prod_mol.GetAtomWithIdx(p_amap_idx[b_map]).GetNeighbors() b_n = [i.GetAtomMapNum() for i in b_n] b_n = [i for i in b_n if i not in [b_map,e_map]] e_n = prod_mol.GetAtomWithIdx(p_amap_idx[e_map]).GetNeighbors() e_n = [i.GetAtomMapNum() for i in e_n] e_n = [i for i in e_n if i not in [b_map,e_map]] f_b_n = b_n[0] m_cip_rank = 0 for i in b_n[:]: c_cip_rank = int(prod_mol_t.GetAtomWithIdx(p_amap_idx[i]).GetProp('_CIPRank')) if c_cip_rank >= m_cip_rank: f_b_n = i m_cip_rank = c_cip_rank f_e_n = e_n[0] m_cip_rank = 0 for i in e_n[:]: c_cip_rank = int(prod_mol_t.GetAtomWithIdx(p_amap_idx[i]).GetProp('_CIPRank')) if c_cip_rank >= m_cip_rank: f_e_n = i m_cip_rank = c_cip_rank if stereo_edit[-2:] == ':e': bond = prod_mol.GetBondBetweenAtoms(p_amap_idx[b_map],p_amap_idx[e_map]) bond.SetStereo(Chem.rdchem.BondStereo.STEREOE) try: bond.SetStereoAtoms(p_amap_idx[f_b_n],p_amap_idx[f_e_n]) except: bond.SetStereoAtoms(p_amap_idx[f_e_n],p_amap_idx[f_b_n]) if stereo_edit[-2:] == ':z': bond = prod_mol.GetBondBetweenAtoms(p_amap_idx[b_map],p_amap_idx[e_map]) bond.SetStereo(Chem.rdchem.BondStereo.STEREOZ) try: bond.SetStereoAtoms(p_amap_idx[f_b_n],p_amap_idx[f_e_n]) except: bond.SetStereoAtoms(p_amap_idx[f_e_n],p_amap_idx[f_b_n]) elif stereo_edit[-2:] == ':a': bond = prod_mol.GetBondBetweenAtoms(p_amap_idx[b_map],p_amap_idx[e_map]) bond.SetStereo(Chem.rdchem.BondStereo.STEREOANY) return prod_mol def add_Cl(mol): add_Cl_atom_idx = [] for atom in mol.GetAtoms(): Double_O_count = 0 if atom.GetAtomicNum() == 16 and sorted([i.GetBondTypeAsDouble() for i in atom.GetBonds()]) == [1,2,2]: neibors = atom.GetNeighbors() for neibor in neibors: if neibor.GetAtomicNum() == 8: bond = mol.GetBondBetweenAtoms(atom.GetIdx(),neibor.GetIdx()) if bond.GetBondTypeAsDouble() == 2: Double_O_count += 1 else: pass else: pass if Double_O_count == 2: add_Cl_atom_idx.append(atom.GetIdx()) if len(add_Cl_atom_idx) == 1: map_lis = [i.GetAtomMapNum() for i in mol.GetAtoms()] mw = Chem.RWMol(mol) mw.AddAtom(Chem.Atom(17)) mw.GetAtomWithIdx(len(map_lis)).SetAtomMapNum(max(map_lis)+1) mw.AddBond(add_Cl_atom_idx[0],len(map_lis), BOND_FLOAT_TO_TYPE[1]) mol = mw.GetMol() return mol def neu_sulf_charge(mol): for atom in mol.GetAtoms(): if atom.GetAtomicNum() == 8 and atom.GetFormalCharge() == -1: neibors = atom.GetNeighbors() if len(neibors) == 1 and neibors[0].GetAtomicNum() == 16 and neibors[0].GetExplicitValence() == 4: atom.SetFormalCharge(0) else: pass return mol def align_kekule_pairs(r: str, p: str) : """Aligns kekule pairs to ensure unchanged bonds have same bond order in previously aromatic rings. Parameters ---------- r: str, SMILES string representing the reactants p: str, SMILES string representing the product """ reac_mol = Chem.MolFromSmiles(r) max_amap = max([atom.GetAtomMapNum() for atom in reac_mol.GetAtoms()]) for atom in reac_mol.GetAtoms(): if atom.GetAtomMapNum() == 0: atom.SetAtomMapNum(max_amap + 1) max_amap = max_amap + 1 prod_mol = Chem.MolFromSmiles(p) prod_prev = get_bond_info(prod_mol) Chem.Kekulize(prod_mol) prod_new = get_bond_info(prod_mol) reac_prev = get_bond_info(reac_mol) Chem.Kekulize(reac_mol) reac_new = get_bond_info(reac_mol) reac_edit = {} for bond in prod_new: if bond in reac_new and (prod_prev[bond][0] == reac_prev[bond][0]): if reac_new[bond][0] != prod_new[bond][0] or reac_prev[bond][0] == 1.5: reac_new[bond][0] = prod_new[bond][0] reac_edit[bond] = reac_new[bond] reac_mol = Chem.RWMol(reac_mol) amap_idx = {atom.GetAtomMapNum(): atom.GetIdx() for atom in reac_mol.GetAtoms()} for bond in reac_edit: idx1, idx2 = amap_idx[bond[0]], amap_idx[bond[1]] bo = reac_new[bond][0] reac_mol.RemoveBond(idx1, idx2) reac_mol.AddBond(idx1, idx2, BOND_FLOAT_TO_TYPE[bo]) return reac_mol.GetMol(), prod_mol def count_kekule_d(r,p): prod_mol = Chem.MolFromSmiles(p) prod_s = get_bond_info(prod_mol) prod_mol = Chem.MolFromSmiles(p,sanitize = False) prod_k = get_bond_info(prod_mol) reac_mol = Chem.MolFromSmiles(r) reac_s = get_bond_info(reac_mol) reac_mol = Chem.MolFromSmiles(r,sanitize = False) reac_k = get_bond_info(reac_mol) d_count = 0 for pair in reac_s.keys(): if pair in prod_s.keys(): if reac_s[pair][0] == prod_s[pair][0]: if reac_k[pair][0] != prod_k[pair][0]: d_count += 1 return d_count def get_kekule_aligned_r(r,p): if count_kekule_d(r,p) == 0: return r else: min_r_s_lis = [] for r_s in r.split('.'): min_count = 1000 min_r_s = '' mol = Chem.MolFromSmiles(r_s) suppl = Chem.ResonanceMolSupplier(mol, Chem.KEKULE_ALL) for i in range(len(suppl)): r_s = Chem.MolToSmiles(suppl[i],kekuleSmiles = True) count = count_kekule_d(r_s,p) if count <= min_count: min_r_s = r_s min_count = count min_r_s_lis.append(min_r_s) return '.'.join(min_r_s_lis) def apply_edits_to_mol_connect(mol, edits): """Apply edits to molecular graph. Parameters ---------- mol: Chem.Mol, RDKit mol object edits: Iterable[str], Iterable of edits to apply. An edit is structured as a1:a2:b1:b2, where a1, a2 are atom maps of participating atoms and b1, b2 are previous and new bond orders. When a2 = 0, we update the hydrogen count. """ new_mol = Chem.RWMol(mol) amap = {atom.GetAtomMapNum(): atom.GetIdx() for atom in new_mol.GetAtoms()} for edit in edits: x, y, prev_bo, new_bo = edit.split(":") x, y = int(x), int(y) new_bo = float(new_bo) new_mol.AddBond(amap[x],amap[y],BOND_FLOAT_TO_TYPE[new_bo]) pred_mol = new_mol.GetMol() return pred_mol def get_charge_edit_mine(reac_mol, prod_mol,core_edits): lg_site_lis = [] for core_edit in core_edits: x,y,bo,n_bo = core_edit.split(':') if float(bo) - float(n_bo) > 0: lg_site_lis.append(int(x)) lg_site_lis.append(int(y)) lg_site_lis = [i for i in lg_site_lis if i != 0] dict_reac_charges = {} for atom in reac_mol.GetAtoms(): dict_reac_charges[atom.GetAtomMapNum()] = atom.GetFormalCharge() dict_prod_charges = {} for atom in prod_mol.GetAtoms(): dict_prod_charges[atom.GetAtomMapNum()] = atom.GetFormalCharge() charge_edits = [] for atom_map, charge in dict_prod_charges.items(): if atom_map in dict_reac_charges.keys(): if dict_reac_charges[atom_map] != charge and atom_map not in lg_site_lis: edit = f"{atom_map}:{0}:{0}:{dict_reac_charges[atom_map]}" charge_edits.append(edit) return charge_edits def get_atom_map_charge_dic(mol): dic = {} for atom in mol.GetAtoms(): dic[atom.GetAtomMapNum()] = atom.GetFormalCharge() return dic def apply_charge_change(mol,charge_edits): amap = {atom.GetAtomMapNum(): atom.GetIdx() for atom in mol.GetAtoms()} for edit in charge_edits: x, y, prev_charge, new_charge = edit.split(":") mol.GetAtomWithIdx(amap[int(x)]).SetFormalCharge(int(new_charge)) return mol def get_core_edit_mine(reac_mol, prod_mol): prod_bonds = get_bond_info(prod_mol) reac_bonds = get_bond_info(reac_mol) rxn_core_break = set() rxn_core_lack = set() rxn_core = set() core_edits = [] p_amap_idx = {atom.GetAtomMapNum(): atom.GetIdx() for atom in prod_mol.GetAtoms()} reac_amap = {atom.GetAtomMapNum(): atom.GetIdx() for atom in reac_mol.GetAtoms()} for bond in prod_bonds: if bond in reac_bonds and prod_bonds[bond][0] != reac_bonds[bond][0]: a_start, a_end = bond prod_bo, reac_bo = prod_bonds[bond][0], reac_bonds[bond][0] a_start, a_end = sorted([a_start, a_end]) edit = f"{a_start}:{a_end}:{prod_bo}:{reac_bo}" core_edits.append(edit) rxn_core.update([a_start, a_end]) if bond not in reac_bonds: a_start, a_end = bond reac_bo = 0.0 prod_bo = prod_bonds[bond][0] start, end = sorted([a_start, a_end]) edit = f"{a_start}:{a_end}:{prod_bo}:{reac_bo}" core_edits.append(edit) rxn_core.update([a_start, a_end]) rxn_core_break.update([a_start, a_end]) for bond in reac_bonds: if bond not in prod_bonds: amap1, amap2 = bond rxn_core_lack.update([amap1, amap2]) if (amap1 in p_amap_idx) and (amap2 in p_amap_idx): a_start, a_end = sorted([amap1, amap2]) reac_bo = reac_bonds[bond][0] edit = f"{a_start}:{a_end}:{0.0}:{reac_bo}" core_edits.append(edit) rxn_core.update([a_start, a_end]) if True: reac_amap = {atom.GetAtomMapNum(): atom.GetIdx() for atom in reac_mol.GetAtoms()} for atom in prod_mol.GetAtoms(): amap_num = atom.GetAtomMapNum() if (amap_num in rxn_core_break) or (amap_num not in rxn_core_lack): pass else: amap_num = atom.GetAtomMapNum() numHs_prod = atom.GetTotalNumHs() numHs_reac = reac_mol.GetAtomWithIdx(reac_amap[amap_num]).GetTotalNumHs() if numHs_prod != numHs_reac: edit = f"{amap_num}:{0}:{1.0}:{0.0}" core_edits.append(edit) rxn_core.add(amap_num) return core_edits def get_chai_edit_mine(reac_mol, prod_mol): reac_map_a = {atom.GetIdx(): atom.GetAtomMapNum() for atom in reac_mol.GetAtoms()} prod_map_a = {atom.GetIdx(): atom.GetAtomMapNum() for atom in prod_mol.GetAtoms()} reac_ChiralCenters = [] for ChiralCenters in Chem.FindMolChiralCenters(reac_mol,includeUnassigned=True): reac_ChiralCenters.append((reac_map_a[ChiralCenters[0]],ChiralCenters[1])) prod_ChiralCenters = [] for ChiralCenters in Chem.FindMolChiralCenters(prod_mol,includeUnassigned=True): prod_ChiralCenters.append((prod_map_a[ChiralCenters[0]],ChiralCenters[1])) dict_reac_ChiralCenters = dict(reac_ChiralCenters) dict_prod_ChiralCenters = dict(prod_ChiralCenters) chai_edits = [] for amap_num,chiral in dict_prod_ChiralCenters.items(): if amap_num in dict_reac_ChiralCenters.keys(): if chiral != dict_reac_ChiralCenters[amap_num]: edit = f"{amap_num}:{0}:{0}:{dict_reac_ChiralCenters[amap_num]}" chai_edits.append(edit) else: pass for amap_num,chiral in dict_reac_ChiralCenters.items(): if (amap_num not in dict_prod_ChiralCenters.keys()) and (amap_num in prod_map_a.values()) and chiral != '?': edit = f"{amap_num}:{0}:{0}:{chiral}" chai_edits.append(edit) return chai_edits def get_chai_edit_mine(reac_mol, prod_mol): reac_map_a = {atom.GetIdx(): atom.GetAtomMapNum() for atom in reac_mol.GetAtoms()} prod_map_a = {atom.GetIdx(): atom.GetAtomMapNum() for atom in prod_mol.GetAtoms()} reac_ChiralCenters = [] for ChiralCenters in Chem.FindMolChiralCenters(reac_mol,includeUnassigned=True): reac_ChiralCenters.append((reac_map_a[ChiralCenters[0]],ChiralCenters[1])) prod_ChiralCenters = [] for ChiralCenters in Chem.FindMolChiralCenters(prod_mol,includeUnassigned=True): prod_ChiralCenters.append((prod_map_a[ChiralCenters[0]],ChiralCenters[1])) dict_reac_ChiralCenters = dict(reac_ChiralCenters) dict_prod_ChiralCenters = dict(prod_ChiralCenters) chai_edits = [] for amap_num,chiral in dict_prod_ChiralCenters.items(): if amap_num in dict_reac_ChiralCenters.keys(): if chiral != dict_reac_ChiralCenters[amap_num]: edit = f"{amap_num}:{0}:{0}:{dict_reac_ChiralCenters[amap_num]}" chai_edits.append(edit) else: pass for amap_num,chiral in dict_reac_ChiralCenters.items(): if (amap_num not in dict_prod_ChiralCenters.keys())and (amap_num in prod_map_a.values()): edit = f"{amap_num}:{0}:{0}:{chiral}" chai_edits.append(edit) return chai_edits def get_chai_edit_mine(reac_mol, prod_mol): reac_map_a = {atom.GetIdx(): atom.GetAtomMapNum() for atom in reac_mol.GetAtoms()} prod_map_a = {atom.GetIdx(): atom.GetAtomMapNum() for atom in prod_mol.GetAtoms()} reac_ChiralCenters = [] for ChiralCenters in Chem.FindMolChiralCenters(reac_mol,includeUnassigned=True): reac_ChiralCenters.append((reac_map_a[ChiralCenters[0]],ChiralCenters[1])) prod_ChiralCenters = [] for ChiralCenters in Chem.FindMolChiralCenters(prod_mol,includeUnassigned=True): prod_ChiralCenters.append((prod_map_a[ChiralCenters[0]],ChiralCenters[1])) dict_reac_ChiralCenters = dict(reac_ChiralCenters) dict_prod_ChiralCenters = dict(prod_ChiralCenters) chai_edits = [] for amap_num,chiral in dict_prod_ChiralCenters.items(): if amap_num in dict_reac_ChiralCenters.keys(): if chiral != dict_reac_ChiralCenters[amap_num]: edit = f"{amap_num}:{0}:{0}:{dict_reac_ChiralCenters[amap_num]}" chai_edits.append(edit) else: pass for amap_num,chiral in dict_reac_ChiralCenters.items(): if (amap_num not in dict_prod_ChiralCenters.keys()) and (amap_num in prod_map_a.values()) and chiral != '?': edit = f"{amap_num}:{0}:{0}:{chiral}" chai_edits.append(edit) return chai_edits def get_lg_map_lis(frag_mols,reac_mols,core_edits,prod_mol): lg_map_lis = [] prod_map_num_lis = [i.GetAtomMapNum() for i in prod_mol.GetAtoms()] for frag_mols_1,reac_mols_1 in zip(frag_mols[:],reac_mols[:]): reac_edits = find_reac_edit(frag_mols_1,reac_mols_1,core_edits) reac_edits_a = [] reac_edits_b = [] for reac_edit in reac_edits: if reac_edit[:3] == '0:0': reac_edits_a.append(reac_edit) elif reac_edit[-7:] == '0.0:0.0': reac_edits_a.append(reac_edit) elif reac_edit[-10:] == '0:0.0:-1.0': reac_edits_a.append(reac_edit) elif reac_edit[-9:] == '0:0.0:1.0': reac_edits_a.append(reac_edit) else: reac_edits_b.append(reac_edit) for reac_edit in reac_edits_a: if reac_edit[:3] == '0:0': pass elif reac_edit[-7:] == '0.0:0.0': pass elif reac_edit[-10:] == '0:0.0:-1.0': edit_map_num_lis = reac_edit.split(':')[:2] attach_map_num_1 = [int(i) for i in edit_map_num_lis if int(i) in prod_map_num_lis] lg_smiles = '-1.0' lg_map_lis.append((lg_smiles,attach_map_num_1)) elif reac_edit[-9:] == '0:0.0:1.0': edit_map_num_lis = reac_edit.split(':')[:2] attach_map_num_1 = [int(i) for i in edit_map_num_lis if int(i) in prod_map_num_lis] lg_smiles = '1.0' lg_map_lis.append((lg_smiles,attach_map_num_1)) frag_1_map_num_lis = [i.GetAtomMapNum() for i in frag_mols_1.GetAtoms() if i.GetAtomMapNum() != 0] reac_frag_mol = apply_edits_to_mol_break(reac_mols_1 , reac_edits_b) reac_frag_mols = Chem.GetMolFrags(reac_frag_mol,asMols=True,sanitizeFrags = False) reac_edit_added = [] for reac_frag_mol in reac_frag_mols[:]: reac_frag_map_num_lis = [i.GetAtomMapNum() for i in reac_frag_mol.GetAtoms() if i.GetAtomMapNum() != 0] if set(reac_frag_map_num_lis) == set(frag_1_map_num_lis): pass else: attach_map_num_1 = [] for reac_edit in reac_edits: if reac_edit in reac_edit_added: continue else: pass b,e = int(reac_edit.split(':')[0]),int(reac_edit.split(':')[1]) if e in reac_frag_map_num_lis and b in frag_1_map_num_lis: for atom in reac_frag_mol.GetAtoms(): if atom.GetAtomMapNum() == int(e): atom.SetAtomMapNum(500+atom.GetAtomMapNum()) break else: pass reac_edit_added.append(reac_edit) if len(attach_map_num_1) == 1: if [str(attach_map_num_1[0]),str(atom.GetAtomMapNum()-500)] in [i.split(':')[:2] for i in reac_edits ]: #上一个合成子上的连接点和本离去基团的连接点配对 if atom.GetAtomMapNum() == max([i.GetAtomMapNum() for i in reac_frag_mol.GetAtoms()]): attach_map_num_1 = [b] + attach_map_num_1 else: attach_map_num_1.append(b) else: if atom.GetAtomMapNum() == max([i.GetAtomMapNum() for i in reac_frag_mol.GetAtoms()]): attach_map_num_1.append(b) else: attach_map_num_1 = [b] + attach_map_num_1 elif len(attach_map_num_1) == 0: attach_map_num_1.append(b) else: pass if reac_frag_mol.GetAtomWithIdx(0).GetAtomicNum() == 1 and len(attach_map_num_1) == 1: break lg_smiles = Chem.MolToSmiles(reac_frag_mol,kekuleSmiles = True) lg = Chem.MolFromSmiles(lg_smiles) Chem.Kekulize(lg) for atom in lg.GetAtoms(): if atom.GetAtomMapNum() >= 500: atom.SetAtomMapNum(1) pass else: atom.SetAtomMapNum(0) lg_smiles = Chem.MolToSmiles(lg,canonical = False,kekuleSmiles = True) if attach_map_num_1 != []: lg_map_lis.append((lg_smiles,attach_map_num_1)) return lg_map_lis def get_core_edit_mine(reac_mol, prod_mol): prod_bonds = get_bond_info(prod_mol) reac_bonds = get_bond_info(reac_mol) rxn_core_break = set() rxn_core_lack = set() rxn_core = set() core_edits = [] p_amap_idx = {atom.GetAtomMapNum(): atom.GetIdx() for atom in prod_mol.GetAtoms()} reac_amap = {atom.GetAtomMapNum(): atom.GetIdx() for atom in reac_mol.GetAtoms()} for bond in prod_bonds: if bond in reac_bonds and prod_bonds[bond][0] != reac_bonds[bond][0]: a_start, a_end = bond prod_bo, reac_bo = prod_bonds[bond][0], reac_bonds[bond][0] a_start, a_end = sorted([a_start, a_end]) edit = f"{a_start}:{a_end}:{prod_bo}:{reac_bo}" core_edits.append(edit) rxn_core.update([a_start, a_end]) if bond not in reac_bonds: a_start, a_end = bond reac_bo = 0.0 prod_bo = prod_bonds[bond][0] start, end = sorted([a_start, a_end]) edit = f"{a_start}:{a_end}:{prod_bo}:{reac_bo}" core_edits.append(edit) rxn_core.update([a_start, a_end]) rxn_core_break.update([a_start, a_end]) for bond in reac_bonds: if bond not in prod_bonds: amap1, amap2 = bond rxn_core_lack.update([amap1, amap2]) if (amap1 in p_amap_idx) and (amap2 in p_amap_idx): a_start, a_end = sorted([amap1, amap2]) reac_bo = reac_bonds[bond][0] edit = f"{a_start}:{a_end}:{0.0}:{reac_bo}" core_edits.append(edit) rxn_core.update([a_start, a_end]) reac_amap = {atom.GetAtomMapNum(): atom.GetIdx() for atom in reac_mol.GetAtoms()} for atom in prod_mol.GetAtoms(): amap_num = atom.GetAtomMapNum() if (amap_num in rxn_core_break) or (amap_num not in rxn_core_lack): pass else: amap_num = atom.GetAtomMapNum() numHs_prod = atom.GetTotalNumHs() numHs_reac = reac_mol.GetAtomWithIdx(reac_amap[amap_num]).GetTotalNumHs() if numHs_prod != numHs_reac: edit = f"{amap_num}:{0}:{1.0}:{0.0}" core_edits.append(edit) rxn_core.add(amap_num) for atom in prod_mol.GetAtoms(): amap_num = atom.GetAtomMapNum() if amap_num in rxn_core: pass else: amap_num = atom.GetAtomMapNum() Degree_prod = atom.GetDegree() Degree_reac = reac_mol.GetAtomWithIdx(reac_amap[amap_num]).GetDegree() if Degree_prod - Degree_reac == -1: edit = f"{amap_num}:{0}:{1.0}:{0.0}" core_edits.append(edit) rxn_core.add(amap_num) return core_edits def find_reac_edit(frag_mols_1,reac_mols_1,core_edits): reac_mol_map_num = [i.GetAtomMapNum() for i in reac_mols_1.GetAtoms()] frag_mol_map_num = [i.GetAtomMapNum() for i in frag_mols_1.GetAtoms()] lg_map_num = [i for i in reac_mol_map_num if i not in frag_mol_map_num] attach_map_num = 0 reac_edit = [] core_edits = core_edits + [':'.join([i.split(':')[1],i.split(':')[0],i.split(':')[2],i.split(':')[3]]) for i in core_edits] for core_edit in core_edits: core_edit_ = core_edit.split(':') if float(core_edit_[3]) == 0 and int(core_edit_[0]) in frag_mol_map_num: attach_map_num = int(core_edit_[0]) elif float(core_edit_[2]) - float(core_edit_[3]) > 0 and int(core_edit_[0]) in frag_mol_map_num: attach_map_num = int(core_edit_[0]) else: continue if str(attach_map_num) != '0' and str(attach_map_num) != core_edit_[0]: continue frag_mols_1_amap = {atom.GetAtomMapNum(): atom.GetIdx() for atom in frag_mols_1.GetAtoms()} reac_mols_1_amap = {atom.GetAtomMapNum(): atom.GetIdx() for atom in reac_mols_1.GetAtoms()} frag_attach_H = frag_mols_1.GetAtomWithIdx(frag_mols_1_amap[attach_map_num]).GetNumExplicitHs() reac_attach_H = reac_mols_1.GetAtomWithIdx(reac_mols_1_amap[attach_map_num]).GetNumExplicitHs() frag_attach_charge = frag_mols_1.GetAtomWithIdx(frag_mols_1_amap[attach_map_num]).GetFormalCharge() reac_attach_charge = reac_mols_1.GetAtomWithIdx(reac_mols_1_amap[attach_map_num]).GetFormalCharge() if lg_map_num != []: for bond in reac_mols_1.GetBonds(): EndMapNum = bond.GetEndAtom().GetAtomMapNum() BeginMapNum = bond.GetBeginAtom().GetAtomMapNum() if (BeginMapNum == attach_map_num) and (EndMapNum in lg_map_num): reac_edit.append("{}:{}:{}:{}".format(BeginMapNum,EndMapNum,bond.GetBondTypeAsDouble(),0.0)) elif (EndMapNum == attach_map_num) and (BeginMapNum in lg_map_num): reac_edit.append("{}:{}:{}:{}".format(EndMapNum,BeginMapNum,bond.GetBondTypeAsDouble(),0.0)) elif lg_map_num == []: if Chem.MolToSmiles(reac_mols_1) == Chem.MolToSmiles(frag_mols_1): reac_edit.append("{}:{}:{}:{}".format(attach_map_num,0,0.0,0.0)) if (reac_attach_H - frag_attach_H) == 1 and (reac_attach_charge - frag_attach_charge) == 0: reac_edit.append("{}:{}:{}:{}".format(attach_map_num,0,1.0,0.0)) if (reac_attach_H - frag_attach_H) == 2 and (reac_attach_charge - frag_attach_charge) == 0: reac_edit.append("{}:{}:{}:{}".format(attach_map_num,0,2.0,0.0)) if (reac_attach_charge - frag_attach_charge) == -1: if "{}:{}:{}:{}".format(attach_map_num,0,0.0,-1.0) not in reac_edit: reac_edit.append("{}:{}:{}:{}".format(attach_map_num,0,0.0,-1.0)) if (reac_attach_charge - frag_attach_charge) == 1: if "{}:{}:{}:{}".format(attach_map_num,0,0.0,1.0) not in reac_edit: reac_edit.append("{}:{}:{}:{}".format(attach_map_num,0,0.0,1.0)) if (reac_attach_charge - frag_attach_charge) == 2: if "{}:{}:{}:{}".format(attach_map_num,0,0.0,2.0) not in reac_edit: reac_edit.append("{}:{}:{}:{}".format(attach_map_num,0,0.0,2.0)) return reac_edit def get_lg_map_lis(frag_mols,reac_mols,core_edits,prod_mol): lg_map_lis = [] prod_map_num_lis = [i.GetAtomMapNum() for i in prod_mol.GetAtoms()] for frag_mols_1,reac_mols_1 in zip(frag_mols[:],reac_mols[:]): reac_edits = find_reac_edit(frag_mols_1,reac_mols_1,core_edits) reac_edits_a = [] reac_edits_b = [] for reac_edit in reac_edits: if reac_edit[:3] == '0:0': reac_edits_a.append(reac_edit) elif reac_edit[-7:] == '0.0:0.0': reac_edits_a.append(reac_edit) elif reac_edit[-10:] == '0:0.0:-1.0': reac_edits_a.append(reac_edit) elif reac_edit[-9:] == '0:0.0:1.0': reac_edits_a.append(reac_edit) elif reac_edit[-9:] == '0:0.0:2.0': reac_edits_a.append(reac_edit) else: reac_edits_b.append(reac_edit) for reac_edit in reac_edits_a: if reac_edit[:3] == '0:0': pass elif reac_edit[-7:] == '0.0:0.0': pass elif reac_edit[-10:] == '0:0.0:-1.0': edit_map_num_lis = reac_edit.split(':')[:2] attach_map_num_1 = [int(i) for i in edit_map_num_lis if int(i) in prod_map_num_lis] lg_smiles = '-1' lg_map_lis.append((lg_smiles,attach_map_num_1)) elif reac_edit[-9:] == '0:0.0:1.0': edit_map_num_lis = reac_edit.split(':')[:2] attach_map_num_1 = [int(i) for i in edit_map_num_lis if int(i) in prod_map_num_lis] lg_smiles = '1' lg_map_lis.append((lg_smiles,attach_map_num_1)) elif reac_edit[-9:] == '0:0.0:2.0': edit_map_num_lis = reac_edit.split(':')[:2] attach_map_num_1 = [int(i) for i in edit_map_num_lis if int(i) in prod_map_num_lis] lg_smiles = '2' lg_map_lis.append((lg_smiles,attach_map_num_1)) frag_1_map_num_lis = [i.GetAtomMapNum() for i in frag_mols_1.GetAtoms() if i.GetAtomMapNum() != 0] reac_frag_mol = apply_edits_to_mol_break(reac_mols_1 , reac_edits_b) reac_frag_mols = Chem.GetMolFrags(reac_frag_mol,asMols=True,sanitizeFrags = False) reac_edit_added = [] for reac_frag_mol in reac_frag_mols[:]: reac_frag_map_num_lis = [i.GetAtomMapNum() for i in reac_frag_mol.GetAtoms() if i.GetAtomMapNum() != 0] if set(reac_frag_map_num_lis) == set(frag_1_map_num_lis): pass else: attach_map_num_1 = [] for reac_edit in reac_edits: if reac_edit in reac_edit_added: continue else: pass b,e = int(reac_edit.split(':')[0]),int(reac_edit.split(':')[1]) if e in reac_frag_map_num_lis and b in frag_1_map_num_lis: for atom in reac_frag_mol.GetAtoms(): if atom.GetAtomMapNum() == int(e): atom.SetAtomMapNum(500+atom.GetAtomMapNum()) break else: pass reac_edit_added.append(reac_edit) if len(attach_map_num_1) == 1: if [str(attach_map_num_1[0]),str(atom.GetAtomMapNum()-500)] in [i.split(':')[:2] for i in reac_edits ]: #上一个合成子上的连接点和本离去基团的连接点配对 if atom.GetAtomMapNum() == max([i.GetAtomMapNum() for i in reac_frag_mol.GetAtoms()]): attach_map_num_1 = [b] + attach_map_num_1 else: attach_map_num_1.append(b) else: if atom.GetAtomMapNum() == max([i.GetAtomMapNum() for i in reac_frag_mol.GetAtoms()]): attach_map_num_1.append(b) else: attach_map_num_1 = [b] + attach_map_num_1 elif len(attach_map_num_1) == 0: attach_map_num_1.append(b) else: pass if reac_frag_mol.GetAtomWithIdx(0).GetAtomicNum() == 1 and len(attach_map_num_1) == 1: break lg_smiles = Chem.MolToSmiles(reac_frag_mol,kekuleSmiles = True) lg = Chem.MolFromSmiles(lg_smiles) Chem.Kekulize(lg) for atom in lg.GetAtoms(): if atom.GetAtomMapNum() >= 500: atom.SetAtomMapNum(1) pass else: atom.SetAtomMapNum(0) lg_smiles = Chem.MolToSmiles(lg,canonical = False,kekuleSmiles = True) if attach_map_num_1 != []: lg_map_lis.append((lg_smiles,attach_map_num_1)) return lg_map_lis def get_chai_edit_mine(reac_mol, prod_mol): reac_map_a = {atom.GetIdx(): atom.GetAtomMapNum() for atom in reac_mol.GetAtoms()} prod_map_a = {atom.GetIdx(): atom.GetAtomMapNum() for atom in prod_mol.GetAtoms()} reac_mol_= copy.deepcopy(reac_mol) prod_mol_= copy.deepcopy(prod_mol) for atom in reac_mol_.GetAtoms(): atom.SetAtomMapNum(0) for atom in prod_mol_.GetAtoms(): atom.SetAtomMapNum(0) reac_ChiralCenters = [] for ChiralCenters in Chem.FindMolChiralCenters(Chem.MolFromMolBlock(Chem.MolToMolBlock(reac_mol_)),includeUnassigned=True): reac_ChiralCenters.append((reac_map_a[ChiralCenters[0]],ChiralCenters[1])) prod_ChiralCenters = [] for ChiralCenters in Chem.FindMolChiralCenters(Chem.MolFromMolBlock(Chem.MolToMolBlock(prod_mol_)),includeUnassigned=True): prod_ChiralCenters.append((prod_map_a[ChiralCenters[0]],ChiralCenters[1])) dict_reac_ChiralCenters = dict(reac_ChiralCenters) dict_prod_ChiralCenters = dict(prod_ChiralCenters) chai_edits = [] for amap_num,chiral in dict_prod_ChiralCenters.items(): if amap_num in dict_reac_ChiralCenters.keys(): if chiral != dict_reac_ChiralCenters[amap_num]: edit = f"{amap_num}:{0}:{0}:{dict_reac_ChiralCenters[amap_num]}" chai_edits.append(edit) else: pass for amap_num,chiral in dict_reac_ChiralCenters.items(): if (amap_num not in dict_prod_ChiralCenters.keys()) and (amap_num in prod_map_a.values()) and chiral != '?': edit = f"{amap_num}:{0}:{0}:{chiral}" chai_edits.append(edit) return chai_edits def get_original_chair_edit(p,b): b = copy.deepcopy(b) for atom in b.GetAtoms(): atom.SetAtomMapNum(0) b_dic = dict(Chem.FindMolChiralCenters(Chem.MolFromMolBlock(Chem.MolToMolBlock(b)),includeUnassigned=True)) temp_p = Chem.MolFromSmiles(p) for atom in temp_p.GetAtoms(): atom.SetAtomMapNum(0) temp_dic = dict(Chem.FindMolChiralCenters(Chem.MolFromMolBlock(Chem.MolToMolBlock(temp_p)),includeUnassigned=True)) out = [] for i,j in temp_dic.items(): if i in b_dic: out.append('{}:0:0:{}'.format(i+1,j)) return out def apply_chirality_change(prod_mol,chai_edits): p_amap_idx = {atom.GetAtomMapNum(): atom.GetIdx() for atom in prod_mol.GetAtoms()} prod_mol = copy.deepcopy(prod_mol) for chai_edit in chai_edits: amap = int(chai_edit.split(':')[0]) if chai_edit[-2:] == ':R': atom = prod_mol.GetAtomWithIdx(p_amap_idx[amap]) atom.SetChiralTag(Chem.ChiralType.CHI_TETRAHEDRAL_CCW) temp_mol_dic = get_chair_dict_without_atom_map(prod_mol) if temp_mol_dic[atom.GetIdx()] == 'R': pass else: atom.SetChiralTag(Chem.ChiralType.CHI_TETRAHEDRAL_CW) elif chai_edit[-2:] == ':S': atom = prod_mol.GetAtomWithIdx(p_amap_idx[amap]) atom.SetChiralTag(Chem.ChiralType.CHI_TETRAHEDRAL_CCW) temp_mol_dic = get_chair_dict_without_atom_map(prod_mol) if temp_mol_dic[atom.GetIdx()] == 'S': pass else: atom.SetChiralTag(Chem.ChiralType.CHI_TETRAHEDRAL_CW) temp_mol_dic = dict(Chem.FindMolChiralCenters(Chem.MolFromMolBlock(Chem.MolToMolBlock(prod_mol)),includeUnassigned=True)) elif chai_edit[-2:] == ':?': atom = prod_mol.GetAtomWithIdx(p_amap_idx[amap]) atom.SetChiralTag(Chem.ChiralType.CHI_UNSPECIFIED) return prod_mol def get_chair_dict_without_atom_map(temp_p): temp_p = copy.deepcopy(temp_p) for atom in temp_p.GetAtoms(): atom.SetAtomMapNum(0) temp_dic = dict(Chem.FindMolChiralCenters(Chem.MolFromMolBlock(Chem.MolToMolBlock(temp_p)),includeUnassigned=True)) return temp_dic def run_get_p_b_l(rxn_smi): try: r, p = rxn_smi.split(">>") if Chem.MolFromSmiles(p).GetNumAtoms() >= 150 or Chem.MolFromSmiles(r).GetNumAtoms() >= 150: print('error type 3') return 'error type 3' else: pass r,p = cano_smiles_map(r),cano_smiles_map(p) reac_mol, prod_mol = align_kekule_pairs(r, p) reac_mol = Chem.MolFromSmiles(Chem.MolToSmiles(reac_mol,kekuleSmiles = True),sanitize = False) reac_smiles_temp = Chem.MolToSmiles(reac_mol,kekuleSmiles = True) reac_mol_temp = Chem.MolFromSmiles(reac_smiles_temp) if reac_mol_temp != None and Chem.MolToSmiles(reac_mol_temp) == Chem.MolToSmiles(Chem.MolFromSmiles(r)): pass else: r_k = get_kekule_aligned_r(r,p) if count_kekule_d(r_k,p) == 0: reac_mol, prod_mol = Chem.MolFromSmiles(r_k),Chem.MolFromSmiles(p) Chem.Kekulize(reac_mol) Chem.Kekulize(prod_mol) else: reac_mol, prod_mol = Chem.MolFromSmiles(r_k),Chem.MolFromSmiles(p) Chem.Kekulize(reac_mol) Chem.Kekulize(prod_mol) core_edits_add = [i for i in core_edits if (float(i.split(':')[2]) == 0) and (float(i.split(':')[1]) != 0)] core_edits = [i for i in core_edits if i not in core_edits_add] edit_c = [i for i in core_edits if (float(i.split(':')[-1]) > 0)] edit_b = [i for i in core_edits if (float(i.split(':')[-1]) == 0)] chai_edits = get_chai_edit_mine(Chem.MolFromSmiles(r), Chem.MolFromSmiles(p)) stereo_edits = get_stereo_edit_mine(Chem.MolFromSmiles(r), Chem.MolFromSmiles(p)) charge_edits = get_charge_edit_mine(reac_mol, prod_mol,core_edits) o_p_Chiral_dic = get_atom_map_chai_dic(Chem.MolFromSmiles(p)) o_p_Stereo_dic = get_atom_map_stereo_dic(Chem.MolFromSmiles(p)) frag_mol = apply_edits_to_mol_break(prod_mol,edit_b) frag_mol = apply_edits_to_mol_change(frag_mol,edit_c) frag_mol = apply_edits_to_mol_connect(frag_mol, core_edits_add) frag_mol = remove_s_H(frag_mol) reac_mols = Chem.GetMolFrags(reac_mol,asMols=True,sanitizeFrags = False) frag_mols = Chem.GetMolFrags(frag_mol,asMols=True,sanitizeFrags = False) if len(reac_mols) != len(frag_mols): frag_mols = [frag_mol for frag_mol in frag_mols if Chem.MolToSmiles(frag_mol) != '[H]'] else: pass if len(reac_mols) != len(frag_mols): frag_mols = [frag_mol] else: pass if len(reac_mols) == len(frag_mols): reac_mols, frag_mols = map_reac_and_frag(reac_mols,frag_mols) else: print('error type 0') lg_map_lis_temp = get_lg_map_lis(frag_mols[:],reac_mols[:],core_edits,prod_mol) lg_map_lis = [] for lg, map_ in lg_map_lis_temp: lg, map_ = copy.deepcopy(lg),copy.deepcopy(map_) map_new = [] if lg.count(':') > 1: lg = Chem.MolFromSmiles(lg) Chem.Kekulize(lg) for atom in lg.GetAtoms(): if atom.GetAtomMapNum() == 0: map_new.append('*') else: map_new.append(map_.pop(0)) lg_smiles = Chem.MolToSmiles(lg,kekuleSmiles = True) rank = list(Chem.CanonicalRankAtoms(lg, breakTies=False)) map_new = sorted(map_new, key=lambda x: rank[map_new.index(x)]) map_new = [i for i in map_new if i != '*'] lg_map_lis.append((lg_smiles,map_new)) else: lg_map_lis.append((lg, map_ )) total_mol = frag_mol for lg_smile,map_nums in lg_map_lis[:]: if lg_smile not in ['-1.0','1.0','2.0']: lg = Chem.MolFromSmiles(lg_smile) total_mol_map_num_lis = [i.GetAtomMapNum() for i in total_mol.GetAtoms()] max_map = max(total_mol_map_num_lis) count = 1 for atom in lg.GetAtoms(): if atom.GetAtomMapNum() == 1: atom.SetAtomMapNum(max_map + count) count += 1 else: pass total_mol_map_num_lis = [i.GetAtomMapNum() for i in total_mol.GetAtoms()] max_map = max(total_mol_map_num_lis) for atom in lg.GetAtoms(): if atom.GetAtomMapNum() == 0: atom.SetAtomMapNum(max_map + count) count += 1 else: pass total_mol = Chem.CombineMols(total_mol,lg) amap = {atom.GetAtomMapNum(): atom.GetIdx() for atom in total_mol.GetAtoms()} new_mol = Chem.RWMol(total_mol) is_multi_bond = 0 for idx in range(len(map_nums)): map_num = map_nums[idx] if lg_smile.count(':') == len(map_nums): lg_map = max_map + 1 + idx atom = total_mol.GetAtomWithIdx(amap[lg_map]) is_multi_bond = 0 else: lg_map = max_map + 1 atom = total_mol.GetAtomWithIdx(amap[lg_map]) is_multi_bond= 1 if atom.GetSymbol() == 'O' and atom.GetTotalValence() == 0 and atom.GetFormalCharge() == 0 and is_multi_bond == 0: bond_float = 2.0 elif atom.GetSymbol() == 'S' and atom.GetTotalValence() in [0,2,4] and atom.GetFormalCharge() == 0 and is_multi_bond == 0: bond_float = 2.0 elif atom.GetSymbol() == 'S' and atom.GetTotalValence() ==1 and atom.GetFormalCharge() == 1 and is_multi_bond == 0: bond_float = 2.0 elif atom.GetSymbol() == 'P' and atom.GetTotalValence() == 3 and atom.GetFormalCharge() == 0 and is_multi_bond == 0: bond_float = 2.0 elif atom.GetSymbol() == 'C' and atom.GetTotalValence() == 2 and atom.GetFormalCharge() == 0 and is_multi_bond == 0: bond_float = 2.0 elif atom.GetSymbol() == 'N' and atom.GetTotalValence() == 2 and atom.GetFormalCharge() == 1 and is_multi_bond == 0: bond_float = 2.0 elif atom.GetSymbol() == 'N' and atom.GetTotalValence() == 1 and atom.GetFormalCharge() == 0 and is_multi_bond == 0: bond_float = 2.0 elif atom.GetSymbol() == 'N' and atom.GetTotalValence() == 0 and atom.GetFormalCharge() == -1 and is_multi_bond == 0: bond_float = 2.0 elif atom.GetSymbol() == 'Se' and atom.GetTotalValence() == 2 and atom.GetFormalCharge() == 0 and is_multi_bond == 0: bond_float = 2.0 elif atom.GetSymbol() == 'Si' and atom.GetTotalValence() == 2 and atom.GetFormalCharge() == 0 and is_multi_bond == 0: bond_float = 2.0 elif atom.GetSymbol() == 'Mn' and atom.GetTotalValence() == 5 and atom.GetFormalCharge() == 0 and is_multi_bond == 0: bond_float = 2.0 elif atom.GetSymbol() == 'Cr' and atom.GetTotalValence() == 4 and atom.GetFormalCharge() == 0 and is_multi_bond == 0: bond_float = 2.0 elif atom.GetSymbol() == 'O' and atom.GetTotalValence() == 1 and atom.GetFormalCharge() == 1 and is_multi_bond == 0: bond_float = 2.0 elif atom.GetSymbol() == 'N' and atom.GetTotalValence() == 0 and atom.GetFormalCharge() == 0 and is_multi_bond == 0: bond_float = 3.0 elif atom.GetSymbol() == 'C' and atom.GetTotalValence() == 1 and atom.GetFormalCharge() == 0 and is_multi_bond == 0: bond_float = 3.0 elif atom.GetSymbol() == 'C' and atom.GetTotalValence() == 0 and atom.GetFormalCharge() == -1 and is_multi_bond == 0: bond_float = 3.0 else: bond_float = 1.0 new_mol.AddBond(amap[map_num],amap[lg_map],BOND_FLOAT_TO_TYPE[bond_float]) total_mol = new_mol.GetMol() else: map_num = map_nums[0] amap = {atom.GetAtomMapNum(): atom.GetIdx() for atom in total_mol.GetAtoms()} atom = total_mol.GetAtomWithIdx(amap[map_num]) atom.SetNumRadicalElectrons(0) atom.SetFormalCharge(int(atom.GetFormalCharge()+float(lg_smile))) total_mol = correct_mol_1(total_mol,is_nitrine_c = True) b = correct_mol(total_mol,keep_map = True) b_Chiral_dic = get_atom_map_chai_dic(b) b_Stereo_dic = get_atom_map_stereo_dic(b) dic_map_idx = dict([(i.GetAtomMapNum(),i.GetIdx()) for i in b.GetAtoms()]) act = 0 for b_map,Chiral in b_Chiral_dic.items(): if b_map not in o_p_Chiral_dic.keys(): pass elif b_map in o_p_Chiral_dic.keys() and b_Chiral_dic[b_map] != o_p_Chiral_dic[b_map] and b_map not in [int(i.split(':')[0]) for i in chai_edits]: act =1 atom = b.GetAtomWithIdx(dic_map_idx[b_map]) if atom.GetChiralTag() == Chem.ChiralType.CHI_TETRAHEDRAL_CCW: atom.SetChiralTag(Chem.ChiralType.CHI_TETRAHEDRAL_CW) elif atom.GetChiralTag() == Chem.ChiralType.CHI_TETRAHEDRAL_CW: atom.SetChiralTag(Chem.ChiralType.CHI_TETRAHEDRAL_CCW) if act == 1: pass for b_map,Stereo in b_Stereo_dic.items(): if b_map not in o_p_Stereo_dic.keys(): pass elif b_map in o_p_Stereo_dic.keys() and Stereo != o_p_Stereo_dic[b_map] and b_map not in [tuple([int(i) for i in i.split(':')[:2]]) for i in stereo_edits]: bond = b.GetBondBetweenAtoms(dic_map_idx[b_map[0]],dic_map_idx[b_map[1]]) bond.SetStereo(o_p_Stereo_dic[b_map]) b = apply_charge_change(b,charge_edits) if chai_edits == []: o_chai_edits = get_original_chair_edit(p,b) b = apply_chirality_change(b,o_chai_edits) else: b = apply_chirality_change(b,chai_edits) b = Chem.MolFromSmiles(Chem.MolToSmiles(b,canonical = False)) b = apply_stereo_change(b,stereo_edits) for atom in b.GetAtoms(): atom.SetAtomMapNum(0) for bond in b.GetBonds(): if bond.GetStereo() == Chem.rdchem.BondStereo.STEREONONE: bond.SetStereo(Chem.rdchem.BondStereo.STEREOANY) else: pass pre_smiles = Chem.MolToSmiles(b) pre_smiles = pre_smiles.replace('[H]/C=C/','C=C').replace('[H]/C=C(\\','C=C(').replace('[H]/C=C(/','C=C(').replace('[MgH2]','[Mg]').replace('/C=N\\','C=C') pre_smiles = Chem.MolToSmiles(Chem.MolFromSmiles(pre_smiles)) reac_mol = Chem.MolFromSmiles(r) for atom in reac_mol.GetAtoms(): atom.SetAtomMapNum(0) reac_mol_smiles = Chem.MolToSmiles(reac_mol) reac_mol_smiles = Chem.MolToSmiles(Chem.MolFromSmiles(reac_mol_smiles)) if [float(i[-3:]) for i in core_edits_add] == []: max_add = 0 elif max([float(i[-3:]) for i in core_edits_add]) == 1: max_add = 1 else: max_add = 2 charges = [int(i[-1]) for i in charge_edits] + [0] if pre_smiles == reac_mol_smiles and len(core_edits_add) <= 1 and max_add <=1 and max(charges)<=1 and min(charges)>=-1: return ([p,core_edits,chai_edits,stereo_edits,charge_edits,core_edits_add,lg_map_lis]) else: print(pre_smiles,reac_mol_smiles,chai_edits,stereo_edits) return 'error type 1' except: print('error type 2') return 'error type 2' def run_get_p_b_l_forward(rxn_smi): try: r, p = rxn_smi.split(">>") if Chem.MolFromSmiles(p).GetNumAtoms() >= 150 or Chem.MolFromSmiles(r).GetNumAtoms() >= 150: return 'error type 1' else: pass r,p = cano_smiles_map(r),cano_smiles_map(p) reac_mol, prod_mol = align_kekule_pairs(r, p) reac_mol = Chem.MolFromSmiles(Chem.MolToSmiles(reac_mol,kekuleSmiles = True),sanitize = False) reac_smiles_temp = Chem.MolToSmiles(reac_mol,kekuleSmiles = True) reac_mol_temp = Chem.MolFromSmiles(reac_smiles_temp) if reac_mol_temp != None and Chem.MolToSmiles(reac_mol_temp) == Chem.MolToSmiles(Chem.MolFromSmiles(r)): pass else: r_k = get_kekule_aligned_r(r,p) if count_kekule_d(r_k,p) == 0: reac_mol, prod_mol = Chem.MolFromSmiles(r_k),Chem.MolFromSmiles(p) Chem.Kekulize(reac_mol) Chem.Kekulize(prod_mol) else: reac_mol, prod_mol = Chem.MolFromSmiles(r_k),Chem.MolFromSmiles(p) Chem.Kekulize(reac_mol) Chem.Kekulize(prod_mol) core_edits= get_core_edit_mine(reac_mol,prod_mol) core_edits_add = [i for i in core_edits if (float(i.split(':')[2]) == 0) and (float(i.split(':')[1]) != 0)] core_edits = [i for i in core_edits if i not in core_edits_add] edit_c = [i for i in core_edits if (float(i.split(':')[-1]) > 0)] edit_b = [i for i in core_edits if (float(i.split(':')[-1]) == 0)] chai_edits = get_chai_edit_mine(Chem.MolFromSmiles(r), Chem.MolFromSmiles(p)) stereo_edits = get_stereo_edit_mine(Chem.MolFromSmiles(r), Chem.MolFromSmiles(p)) charge_edits = get_charge_edit_mine(reac_mol, prod_mol,core_edits) o_p_Chiral_dic = get_atom_map_chai_dic(Chem.MolFromSmiles(p)) o_p_Stereo_dic = get_atom_map_stereo_dic(Chem.MolFromSmiles(p)) frag_mol = apply_edits_to_mol_break(prod_mol,edit_b) frag_mol = apply_edits_to_mol_change(frag_mol,edit_c) frag_mol = apply_edits_to_mol_connect(frag_mol, core_edits_add) frag_mol = remove_s_H(frag_mol) reac_mols = Chem.GetMolFrags(reac_mol,asMols=True,sanitizeFrags = False) frag_mols = Chem.GetMolFrags(frag_mol,asMols=True,sanitizeFrags = False) if len(reac_mols) != len(frag_mols): frag_mols = [frag_mol for frag_mol in frag_mols if Chem.MolToSmiles(frag_mol) != '[H]'] else: pass if len(reac_mols) != len(frag_mols): frag_mols = [frag_mol] else: pass if len(reac_mols) == len(frag_mols): reac_mols, frag_mols = map_reac_and_frag(reac_mols,frag_mols) else: pass lg_map_lis_temp = get_lg_map_lis(frag_mols[:],reac_mols[:],core_edits,prod_mol) lg_map_lis = [] for lg, map_ in lg_map_lis_temp: lg, map_ = copy.deepcopy(lg),copy.deepcopy(map_) map_new = [] if lg.count(':') > 1: lg = Chem.MolFromSmiles(lg) Chem.Kekulize(lg) for atom in lg.GetAtoms(): if atom.GetAtomMapNum() == 0: map_new.append('*') else: map_new.append(map_.pop(0)) lg_smiles = Chem.MolToSmiles(lg,kekuleSmiles = True) rank = list(Chem.CanonicalRankAtoms(lg, breakTies=False)) map_new = sorted(map_new, key=lambda x: rank[map_new.index(x)]) map_new = [i for i in map_new if i != '*'] lg_map_lis.append((lg_smiles,map_new)) else: lg_map_lis.append((lg, map_ )) return ([p,core_edits,chai_edits,stereo_edits,charge_edits,core_edits_add,lg_map_lis]) except: return 'error type 2' def run_get_p_b_l_backward(p,core_edits,chai_edits,stereo_edits,charge_edits,core_edits_add,lg_map_lis): prod_mol = Chem.MolFromSmiles(p) core_edits = [i for i in core_edits if i not in core_edits_add] edit_c = [i for i in core_edits if (float(i.split(':')[-1]) > 0)] edit_b = [i for i in core_edits if (float(i.split(':')[-1]) == 0)] o_p_Chiral_dic = get_atom_map_chai_dic(Chem.MolFromSmiles(p)) # o_p_Stereo_dic = get_atom_map_stereo_dic(Chem.MolFromSmiles(p)) frag_mol = apply_edits_to_mol_break(prod_mol,edit_b) frag_mol = apply_edits_to_mol_change(frag_mol,edit_c) frag_mol = apply_edits_to_mol_connect(frag_mol, core_edits_add) frag_mol = remove_s_H(frag_mol) total_mol = frag_mol for lg_smile,map_nums in lg_map_lis[:]: if lg_smile not in ['-1','1','2']: lg = Chem.MolFromSmiles(lg_smile) total_mol_map_num_lis = [i.GetAtomMapNum() for i in total_mol.GetAtoms()] max_map = max(total_mol_map_num_lis) count = 1 for atom in lg.GetAtoms(): if atom.GetAtomMapNum() == 1: atom.SetAtomMapNum(max_map + count) count += 1 else: pass total_mol_map_num_lis = [i.GetAtomMapNum() for i in total_mol.GetAtoms()] max_map = max(total_mol_map_num_lis) for atom in lg.GetAtoms(): if atom.GetAtomMapNum() == 0: atom.SetAtomMapNum(max_map + count) count += 1 else: pass total_mol = Chem.CombineMols(total_mol,lg) amap = {atom.GetAtomMapNum(): atom.GetIdx() for atom in total_mol.GetAtoms()} new_mol = Chem.RWMol(total_mol) is_multi_bond = 0 for idx in range(len(map_nums)): map_num = map_nums[idx] if lg_smile.count(':') == len(map_nums): lg_map = max_map + 1 + idx atom = total_mol.GetAtomWithIdx(amap[lg_map]) is_multi_bond = 0 else: lg_map = max_map + 1 atom = total_mol.GetAtomWithIdx(amap[lg_map]) is_multi_bond= 1 if atom.GetSymbol() == 'O' and atom.GetTotalValence() == 0 and atom.GetFormalCharge() == 0 and is_multi_bond == 0: bond_float = 2.0 elif atom.GetSymbol() == 'S' and atom.GetTotalValence() in [0,2,4] and atom.GetFormalCharge() == 0 and is_multi_bond == 0: bond_float = 2.0 elif atom.GetSymbol() == 'S' and atom.GetTotalValence() ==1 and atom.GetFormalCharge() == 1 and is_multi_bond == 0: bond_float = 2.0 elif atom.GetSymbol() == 'P' and atom.GetTotalValence() == 3 and atom.GetFormalCharge() == 0 and is_multi_bond == 0: bond_float = 2.0 elif atom.GetSymbol() == 'C' and atom.GetTotalValence() == 2 and atom.GetFormalCharge() == 0 and is_multi_bond == 0: bond_float = 2.0 elif atom.GetSymbol() == 'N' and atom.GetTotalValence() == 2 and atom.GetFormalCharge() == 1 and is_multi_bond == 0: bond_float = 2.0 elif atom.GetSymbol() == 'N' and atom.GetTotalValence() == 1 and atom.GetFormalCharge() == 0 and is_multi_bond == 0: bond_float = 2.0 elif atom.GetSymbol() == 'N' and atom.GetTotalValence() == 0 and atom.GetFormalCharge() == -1 and is_multi_bond == 0: bond_float = 2.0 elif atom.GetSymbol() == 'Se' and atom.GetTotalValence() == 2 and atom.GetFormalCharge() == 0 and is_multi_bond == 0: bond_float = 2.0 elif atom.GetSymbol() == 'Si' and atom.GetTotalValence() == 2 and atom.GetFormalCharge() == 0 and is_multi_bond == 0: bond_float = 2.0 elif atom.GetSymbol() == 'Mn' and atom.GetTotalValence() == 5 and atom.GetFormalCharge() == 0 and is_multi_bond == 0: bond_float = 2.0 elif atom.GetSymbol() == 'Cr' and atom.GetTotalValence() == 4 and atom.GetFormalCharge() == 0 and is_multi_bond == 0: bond_float = 2.0 elif atom.GetSymbol() == 'O' and atom.GetTotalValence() == 1 and atom.GetFormalCharge() == 1 and is_multi_bond == 0: bond_float = 2.0 elif atom.GetSymbol() == 'N' and atom.GetTotalValence() == 0 and atom.GetFormalCharge() == 0 and is_multi_bond == 0: bond_float = 3.0 elif atom.GetSymbol() == 'C' and atom.GetTotalValence() == 1 and atom.GetFormalCharge() == 0 and is_multi_bond == 0: bond_float = 3.0 elif atom.GetSymbol() == 'C' and atom.GetTotalValence() == 0 and atom.GetFormalCharge() == -1 and is_multi_bond == 0: bond_float = 3.0 else: bond_float = 1.0 new_mol.AddBond(amap[map_num],amap[lg_map],BOND_FLOAT_TO_TYPE[bond_float]) total_mol = new_mol.GetMol() else: map_num = map_nums[0] amap = {atom.GetAtomMapNum(): atom.GetIdx() for atom in total_mol.GetAtoms()} atom = total_mol.GetAtomWithIdx(amap[map_num]) atom.SetNumRadicalElectrons(0) atom.SetFormalCharge(int(atom.GetFormalCharge()+float(lg_smile))) total_mol = correct_mol_1(total_mol,is_nitrine_c = True) b = correct_mol(total_mol,keep_map = True) b_Chiral_dic = get_atom_map_chai_dic(b) b_Stereo_dic = get_atom_map_stereo_dic(b) dic_map_idx = dict([(i.GetAtomMapNum(),i.GetIdx()) for i in b.GetAtoms()]) act = 0 for b_map,Chiral in b_Chiral_dic.items(): if b_map not in o_p_Chiral_dic.keys(): pass elif b_map in o_p_Chiral_dic.keys() and b_Chiral_dic[b_map] != o_p_Chiral_dic[b_map] and b_map not in [int(i.split(':')[0]) for i in chai_edits]: act =1 atom = b.GetAtomWithIdx(dic_map_idx[b_map]) if atom.GetChiralTag() == Chem.ChiralType.CHI_TETRAHEDRAL_CCW: atom.SetChiralTag(Chem.ChiralType.CHI_TETRAHEDRAL_CW) elif atom.GetChiralTag() == Chem.ChiralType.CHI_TETRAHEDRAL_CW: atom.SetChiralTag(Chem.ChiralType.CHI_TETRAHEDRAL_CCW) if act == 1: pass for b_map,Stereo in b_Stereo_dic.items(): if b_map not in o_p_Stereo_dic.keys(): pass elif b_map in o_p_Stereo_dic.keys() and Stereo != o_p_Stereo_dic[b_map] and b_map not in [tuple([int(i) for i in i.split(':')[:2]]) for i in stereo_edits]: bond = b.GetBondBetweenAtoms(dic_map_idx[b_map[0]],dic_map_idx[b_map[1]]) bond.SetStereo(o_p_Stereo_dic[b_map]) b = apply_charge_change(b,charge_edits) if chai_edits == []: o_chai_edits = get_original_chair_edit(p,b) b = apply_chirality_change(b,o_chai_edits) else: b = apply_chirality_change(b,chai_edits) b = Chem.MolFromSmiles(Chem.MolToSmiles(b,canonical = False)) b = apply_stereo_change(b,stereo_edits) for atom in b.GetAtoms(): atom.SetAtomMapNum(0) for bond in b.GetBonds(): if bond.GetStereo() == Chem.rdchem.BondStereo.STEREONONE: bond.SetStereo(Chem.rdchem.BondStereo.STEREOANY) else: pass pre_smiles = Chem.MolToSmiles(b) pre_smiles = pre_smiles.replace('[H]/C=C/','C=C').replace('[H]/C=C(\\','C=C(').replace('[H]/C=C(/','C=C(').replace('[MgH2]','[Mg]').replace('/C=N\\','C=C') pre_smiles = Chem.MolToSmiles(Chem.MolFromSmiles(pre_smiles)) return pre_smiles def run_get_p_b_l_backward_with_mapping(p,core_edits,chai_edits,stereo_edits,charge_edits,core_edits_add,lg_map_lis): prod_mol = Chem.MolFromSmiles(p) core_edits = [i for i in core_edits if i not in core_edits_add] edit_c = [i for i in core_edits if (float(i.split(':')[-1]) > 0)] edit_b = [i for i in core_edits if (float(i.split(':')[-1]) == 0)] o_p_Chiral_dic = get_atom_map_chai_dic(Chem.MolFromSmiles(p)) # o_p_Stereo_dic = get_atom_map_stereo_dic(Chem.MolFromSmiles(p)) frag_mol = apply_edits_to_mol_break(prod_mol,edit_b) frag_mol = apply_edits_to_mol_change(frag_mol,edit_c) frag_mol = apply_edits_to_mol_connect(frag_mol, core_edits_add) frag_mol = remove_s_H(frag_mol) total_mol = frag_mol for lg_smile,map_nums in lg_map_lis[:]: if lg_smile not in ['-1','1','2']: lg = Chem.MolFromSmiles(lg_smile) total_mol_map_num_lis = [i.GetAtomMapNum() for i in total_mol.GetAtoms()] max_map = max(total_mol_map_num_lis) count = 1 for atom in lg.GetAtoms(): if atom.GetAtomMapNum() == 1: atom.SetAtomMapNum(max_map + count) count += 1 else: pass total_mol_map_num_lis = [i.GetAtomMapNum() for i in total_mol.GetAtoms()] max_map = max(total_mol_map_num_lis) for atom in lg.GetAtoms(): if atom.GetAtomMapNum() == 0: atom.SetAtomMapNum(max_map + count) count += 1 else: pass total_mol = Chem.CombineMols(total_mol,lg) amap = {atom.GetAtomMapNum(): atom.GetIdx() for atom in total_mol.GetAtoms()} new_mol = Chem.RWMol(total_mol) is_multi_bond = 0 for idx in range(len(map_nums)): map_num = map_nums[idx] if lg_smile.count(':') == len(map_nums): lg_map = max_map + 1 + idx atom = total_mol.GetAtomWithIdx(amap[lg_map]) is_multi_bond = 0 else: lg_map = max_map + 1 atom = total_mol.GetAtomWithIdx(amap[lg_map]) is_multi_bond= 1 if atom.GetSymbol() == 'O' and atom.GetTotalValence() == 0 and atom.GetFormalCharge() == 0 and is_multi_bond == 0: bond_float = 2.0 elif atom.GetSymbol() == 'S' and atom.GetTotalValence() in [0,2,4] and atom.GetFormalCharge() == 0 and is_multi_bond == 0: bond_float = 2.0 elif atom.GetSymbol() == 'S' and atom.GetTotalValence() ==1 and atom.GetFormalCharge() == 1 and is_multi_bond == 0: bond_float = 2.0 elif atom.GetSymbol() == 'P' and atom.GetTotalValence() == 3 and atom.GetFormalCharge() == 0 and is_multi_bond == 0: bond_float = 2.0 elif atom.GetSymbol() == 'C' and atom.GetTotalValence() == 2 and atom.GetFormalCharge() == 0 and is_multi_bond == 0: bond_float = 2.0 elif atom.GetSymbol() == 'N' and atom.GetTotalValence() == 2 and atom.GetFormalCharge() == 1 and is_multi_bond == 0: bond_float = 2.0 elif atom.GetSymbol() == 'N' and atom.GetTotalValence() == 1 and atom.GetFormalCharge() == 0 and is_multi_bond == 0: bond_float = 2.0 elif atom.GetSymbol() == 'N' and atom.GetTotalValence() == 0 and atom.GetFormalCharge() == -1 and is_multi_bond == 0: bond_float = 2.0 elif atom.GetSymbol() == 'Se' and atom.GetTotalValence() == 2 and atom.GetFormalCharge() == 0 and is_multi_bond == 0: bond_float = 2.0 elif atom.GetSymbol() == 'Si' and atom.GetTotalValence() == 2 and atom.GetFormalCharge() == 0 and is_multi_bond == 0: bond_float = 2.0 elif atom.GetSymbol() == 'Mn' and atom.GetTotalValence() == 5 and atom.GetFormalCharge() == 0 and is_multi_bond == 0: bond_float = 2.0 elif atom.GetSymbol() == 'Cr' and atom.GetTotalValence() == 4 and atom.GetFormalCharge() == 0 and is_multi_bond == 0: bond_float = 2.0 elif atom.GetSymbol() == 'O' and atom.GetTotalValence() == 1 and atom.GetFormalCharge() == 1 and is_multi_bond == 0: bond_float = 2.0 elif atom.GetSymbol() == 'N' and atom.GetTotalValence() == 0 and atom.GetFormalCharge() == 0 and is_multi_bond == 0: bond_float = 3.0 elif atom.GetSymbol() == 'C' and atom.GetTotalValence() == 1 and atom.GetFormalCharge() == 0 and is_multi_bond == 0: bond_float = 3.0 elif atom.GetSymbol() == 'C' and atom.GetTotalValence() == 0 and atom.GetFormalCharge() == -1 and is_multi_bond == 0: bond_float = 3.0 else: bond_float = 1.0 new_mol.AddBond(amap[map_num],amap[lg_map],BOND_FLOAT_TO_TYPE[bond_float]) total_mol = new_mol.GetMol() else: map_num = map_nums[0] amap = {atom.GetAtomMapNum(): atom.GetIdx() for atom in total_mol.GetAtoms()} atom = total_mol.GetAtomWithIdx(amap[map_num]) atom.SetNumRadicalElectrons(0) atom.SetFormalCharge(int(atom.GetFormalCharge()+float(lg_smile))) total_mol = correct_mol_1(total_mol,is_nitrine_c = True) b = correct_mol(total_mol,keep_map = True) b_Chiral_dic = get_atom_map_chai_dic(b) b_Stereo_dic = get_atom_map_stereo_dic(b) dic_map_idx = dict([(i.GetAtomMapNum(),i.GetIdx()) for i in b.GetAtoms()]) act = 0 for b_map,Chiral in b_Chiral_dic.items(): if b_map not in o_p_Chiral_dic.keys(): pass elif b_map in o_p_Chiral_dic.keys() and b_Chiral_dic[b_map] != o_p_Chiral_dic[b_map] and b_map not in [int(i.split(':')[0]) for i in chai_edits]: act =1 atom = b.GetAtomWithIdx(dic_map_idx[b_map]) if atom.GetChiralTag() == Chem.ChiralType.CHI_TETRAHEDRAL_CCW: atom.SetChiralTag(Chem.ChiralType.CHI_TETRAHEDRAL_CW) elif atom.GetChiralTag() == Chem.ChiralType.CHI_TETRAHEDRAL_CW: atom.SetChiralTag(Chem.ChiralType.CHI_TETRAHEDRAL_CCW) if act == 1: pass for b_map,Stereo in b_Stereo_dic.items(): if b_map not in o_p_Stereo_dic.keys(): pass elif b_map in o_p_Stereo_dic.keys() and Stereo != o_p_Stereo_dic[b_map] and b_map not in [tuple([int(i) for i in i.split(':')[:2]]) for i in stereo_edits]: bond = b.GetBondBetweenAtoms(dic_map_idx[b_map[0]],dic_map_idx[b_map[1]]) bond.SetStereo(o_p_Stereo_dic[b_map]) b = apply_charge_change(b,charge_edits) if chai_edits == []: o_chai_edits = get_original_chair_edit(p,b) b = apply_chirality_change(b,o_chai_edits) else: b = apply_chirality_change(b,chai_edits) b = Chem.MolFromSmiles(Chem.MolToSmiles(b,canonical = False)) b = apply_stereo_change(b,stereo_edits) # for atom in b.GetAtoms(): # atom.SetAtomMapNum(0) for bond in b.GetBonds(): if bond.GetStereo() == Chem.rdchem.BondStereo.STEREONONE: bond.SetStereo(Chem.rdchem.BondStereo.STEREOANY) else: pass pre_smiles = Chem.MolToSmiles(b) # pre_smiles = pre_smiles.replace('[H]/C=C/','C=C').replace('[H]/C=C(\\','C=C(').replace('[H]/C=C(/','C=C(').replace('[MgH2]','[Mg]').replace('/C=N\\','C=C') # pre_smiles = Chem.MolToSmiles(Chem.MolFromSmiles(pre_smiles)) return pre_smiles def run_get_p_b_l_check(rxn): try: p,core_edits,chai_edits,stereo_edits,charge_edits,core_edits_add,lg_map_lis = run_get_p_b_l_forward(rxn) except: return 'error type 3' try: pre_smiles = run_get_p_b_l_backward(p,core_edits,chai_edits,stereo_edits,charge_edits,core_edits_add,lg_map_lis) # 加个5 except: return 'error type 5' r = rxn.split('>>')[0] reac_mol = Chem.MolFromSmiles(r) for atom in reac_mol.GetAtoms(): atom.SetAtomMapNum(0) reac_mol_smiles = Chem.MolToSmiles(reac_mol) reac_mol_smiles = Chem.MolToSmiles(Chem.MolFromSmiles(reac_mol_smiles)) if [float(i[-3:]) for i in core_edits_add] == []: max_add = 0 elif max([float(i[-3:]) for i in core_edits_add]) == 1: max_add = 1 else: max_add = 2 charges = [int(i[-1]) for i in charge_edits] + [0] if pre_smiles == reac_mol_smiles and len(core_edits_add) <= 1 and max_add <=1 and max(charges)<=1 and min(charges)>=-1: return p,core_edits,chai_edits,stereo_edits,charge_edits,core_edits_add,lg_map_lis else: return 'error type 4' def get_atom_pair_bond_idx_dic(concise_smiles): mol_indigo = indigo.loadMolecule(concise_smiles) mol_block_indigo = mol_indigo.molfile() mol = Chem.MolFromSmiles(concise_smiles,sanitize = False) atom_num = len(mol.GetAtoms()) bond_num = len(mol.GetBonds()) mol_block_lis = mol_block_indigo.split('\n') bond_line_lis = mol_block_lis[4+atom_num:4+atom_num+bond_num] atom_pair_bond_idx_dic = {} count = 0 for bond_line in bond_line_lis: s_atom = int(bond_line[:3]) e_atom = int(bond_line[3:6]) min_atom = min((s_atom,e_atom)) max_atom = max((s_atom,e_atom)) atom_pair_bond_idx_dic[(min_atom,max_atom)] = count count += 1 return atom_pair_bond_idx_dic def get_rm_token_lis(concise_smiles,detailed_smiles): detailed_smiles_length = len(detailed_smiles) idx = 0 rm_token_lis = [] for _ in range(len(detailed_smiles)): if detailed_smiles[idx] != concise_smiles[idx]: rm_token_lis.append(detailed_smiles[idx]) detailed_smiles = detailed_smiles[:idx] + detailed_smiles[idx+1:] else: idx += 1 rm_token_lis.append(' ') if detailed_smiles == concise_smiles and len(rm_token_lis) == detailed_smiles_length: return rm_token_lis else: print('error') pass def get_bond_token_lis(detailed_smiles): bond_token_lis = [] for i in range(len(detailed_smiles)): if detailed_smiles[i] in ['-','=','#',':','/','\\'] and detailed_smiles[i+1] != ']': bond_token_lis.append(detailed_smiles[i]) else: bond_token_lis.append(' ') pass return bond_token_lis def get_bond_token_idx_dic(bond_token_lis): bond_token_idx_dic = {} bond_idx = 0 token_idx = 0 for i in bond_token_lis: token_idx += 1 if i != ' ': bond_idx += 1 else: pass bond_token_idx_dic[bond_idx] = token_idx return bond_token_idx_dic def rerank_special_bond(mol_block_indigo_lis,bond_idx): mol = Chem.MolFromMolBlock('\n'.join(mol_block_indigo_lis),removeHs = False) q = mol_block_indigo_lis[mol.GetNumAtoms()+ 4 +bond_idx][:3] h = mol_block_indigo_lis[mol.GetNumAtoms()+ 4 +bond_idx][3:6] mol_block_indigo_lis[mol.GetNumAtoms()+ 4 +bond_idx] = h + q + mol_block_indigo_lis[mol.GetNumAtoms()+ 4 +bond_idx][6:] return mol_block_indigo_lis def get_caption_r(caption): words = re.findall(r'[{](.*?)[}]', caption) words = ['{' + i + '}' for i in words ] caption_r = caption count = 400 for i in words: count += 1 caption_r = caption_r.replace(i,'[{}Au]'.format(count),1) return caption_r,words def get_b_smiles_detailed_smiles(caption_r,smiles): b_smiles = caption_r b_smiles = b_smiles.replace('/','/-').replace('\\','\\-') b_smiles = b_smiles.replace('-!','!').replace('-?','?') mol_tmp = Chem.MolFromSmiles(smiles,sanitize = False) detailed_smiles = Chem.MolToSmiles(mol_tmp,canonical = False,allBondsExplicit = True) detailed_smiles = detailed_smiles.replace('/','/-').replace('\\','\\-') # for i in range(len(detailed_smiles)): if detailed_smiles[i] != b_smiles[i]: if b_smiles[i] in ['!','_',';','^','&','{','}','。','《','》']: pass else: b_smiles = b_smiles[:i] + detailed_smiles[i] + b_smiles[i:] else: pass return b_smiles,detailed_smiles def get_bond_dic(b_smiles,detailed_smiles): b_smiles = b_smiles.replace('-]',']') detailed_smiles = detailed_smiles.replace('-]',']') count = 0 bond_dic = {} for i,j in zip(detailed_smiles,b_smiles): if i != j: bond_dic[count] = j if i in ['-','=','#',':']: count += 1 return bond_dic def get_t_smiles(e_smiles,o_smiles): e_smiles_r = e_smiles.replace('!','-').replace('_','-').replace(';','-').replace('^','-').replace('&','=').replace('{','=').replace('}','=').replace('。','=').replace('《','=').replace('》','=') mol_r = Chem.MolFromSmiles(e_smiles_r,sanitize = False) a = Chem.MolFromSmiles(o_smiles,sanitize = False) for atom in a.GetAtoms(): atom.SetAtomMapNum(0) for atom in mol_r.GetAtoms(): if atom.GetIsotope() != 0: a.GetAtomWithIdx(atom.GetIdx()).SetIsotope(atom.GetIsotope()) t_smiles = Chem.MolToSmiles(a,canonical = False) return t_smiles def get_b_smiles(p_b): o_smiles = p_b[0] core_edits = p_b[1] chai_edits = p_b[2] stereo_edits = p_b[3] charge_edits = p_b[4] core_edits_add = p_b[5] atom_idx_mark_dic = {} for edit in core_edits: b = int(edit.split(':')[0]) e = int(edit.split(':')[1]) new_b = edit.split(':')[3] if min([b,e]) == 0: atom_map = max([b,e]) if new_b == '0.0': atom_idx_mark_dic[atom_map] = 9 else: pass for edit in chai_edits: edit_l = edit.split(':') if edit_l[3] == 'R': if int(edit_l[0]) not in atom_idx_mark_dic.keys(): atom_idx_mark_dic[int(edit_l[0])] = 10 else: atom_idx_mark_dic[int(edit_l[0])] = 10 + atom_idx_mark_dic[int(edit_l[0])] elif edit_l[3] == 'S': if int(edit_l[0]) not in atom_idx_mark_dic.keys(): atom_idx_mark_dic[int(edit_l[0])] = 20 else: atom_idx_mark_dic[int(edit_l[0])] = 20 + atom_idx_mark_dic[int(edit_l[0])] elif edit_l[3] == '?': if int(edit_l[0]) not in atom_idx_mark_dic.keys(): atom_idx_mark_dic[int(edit_l[0])] = 30 else: atom_idx_mark_dic[int(edit_l[0])] = 30 + atom_idx_mark_dic[int(edit_l[0])] for edit in charge_edits: edit_l = edit.split(':') if edit_l[3] == '1': if int(edit_l[0]) not in atom_idx_mark_dic.keys(): atom_idx_mark_dic[int(edit_l[0])] = 200 else: atom_idx_mark_dic[int(edit_l[0])] = 200 + atom_idx_mark_dic[int(edit_l[0])] pass elif edit_l[3] == '0': if int(edit_l[0]) not in atom_idx_mark_dic.keys(): atom_idx_mark_dic[int(edit_l[0])] = 400 else: atom_idx_mark_dic[int(edit_l[0])] = 400 + atom_idx_mark_dic[int(edit_l[0])] elif edit_l[3] == '-1': if int(edit_l[0]) not in atom_idx_mark_dic.keys(): atom_idx_mark_dic[int(edit_l[0])] = 600 else: atom_idx_mark_dic[int(edit_l[0])] = 600 + atom_idx_mark_dic[int(edit_l[0])] for edit in core_edits_add: edit_l = edit.split(':') if int(edit_l[0]) not in atom_idx_mark_dic.keys(): atom_idx_mark_dic[int(edit_l[0])] = 100 else: atom_idx_mark_dic[int(edit_l[0])] = 100 + atom_idx_mark_dic[int(edit_l[0])] if int(edit_l[1]) not in atom_idx_mark_dic.keys(): atom_idx_mark_dic[int(edit_l[1])] = 100 else: atom_idx_mark_dic[int(edit_l[1])] = 100 + atom_idx_mark_dic[int(edit_l[1])] a = Chem.MolFromSmiles(o_smiles,sanitize = False) for atom in a.GetAtoms(): if atom.GetAtomMapNum() in atom_idx_mark_dic.keys(): atom_map = atom.GetAtomMapNum() atom.SetIsotope(atom_idx_mark_dic[atom_map]) else: pass atom.SetAtomMapNum(0) mol = copy.deepcopy(a) detailed_smiles = Chem.MolToSmiles(mol,canonical = False,allBondsExplicit = True,kekuleSmiles=True) concise_smiles = Chem.MolToSmiles(mol,canonical = False,kekuleSmiles=True) concise_smiles_no_chirality = Chem.MolToSmiles(mol,canonical = False,isomericSmiles = False,kekuleSmiles=True) atom_pair_bond_idx_dic = get_atom_pair_bond_idx_dic(concise_smiles_no_chirality) rm_token_lis = get_rm_token_lis(concise_smiles,detailed_smiles) bond_token_lis = get_bond_token_lis(detailed_smiles) bond_token_idx_dic = get_bond_token_idx_dic(bond_token_lis) bond_idx_mark_dic = {} for edit in core_edits: b = int(edit.split(':')[0]) e = int(edit.split(':')[1]) org_b = edit.split(':')[2] new_b = edit.split(':')[3] if min([b,e]) != 0: bond_idx = atom_pair_bond_idx_dic[min([b,e]),max([b,e])] if new_b == '0.0': mark = '!' elif new_b == '1.0': mark = '_' elif new_b == '2.0': mark = ';' elif new_b == '3.0': mark = '^' bond_idx_mark_dic[bond_idx] = mark else: pass for edit in stereo_edits: b = int(edit.split(':')[0]) e = int(edit.split(':')[1]) new_b = edit.split(':')[3] if min([b,e]) != 0: bond_idx = atom_pair_bond_idx_dic[min([b,e]),max([b,e])] if bond_idx not in bond_idx_mark_dic.keys(): if new_b == 'a': mark = '&' elif new_b == 'e': mark = '{' elif new_b == 'z': mark = '}' bond_idx_mark_dic[bond_idx] = mark else: bond_idx in bond_idx_mark_dic.keys() if new_b == 'a': mark = '。' elif new_b == 'e': mark = '《' elif new_b == 'z': mark = '》' bond_idx_mark_dic[bond_idx] = mark else: pass for bond_idx,mark in bond_idx_mark_dic.items(): token_idx = bond_token_idx_dic[bond_idx] rm_token_lis[token_idx] = mark new_smiles_lis = [] for i in range(len(rm_token_lis)): if rm_token_lis[i] == ' ': new_smiles_lis.append(detailed_smiles[i]) elif rm_token_lis[i][-1] in ['!','_',';','^','&','{','}','。','《','》']: new_smiles_lis.append(rm_token_lis[i]) else: pass caption = ''.join(new_smiles_lis) out_b_smiles_lis.append(caption) caption_r = caption t_smiles = get_t_smiles(caption_r,o_smiles) b_smiles,detailed_smiles = get_b_smiles_detailed_smiles(caption_r,t_smiles) bond_dic = get_bond_dic(b_smiles,detailed_smiles) atom_pair_bond_idx = {} for atom_pair,bond_idx in get_atom_pair_bond_idx_dic(o_smiles).items(): atom_pair_bond_idx[bond_idx] = atom_pair mol = Chem.MolFromSmiles(t_smiles) Chem.Kekulize(mol) core_edits_ = [] chai_edits_ = [] stereo_edits_ = [] charge_edits_ = [] core_edits_add_ = [] for bond_idx,mark in bond_dic.items(): b,e = atom_pair_bond_idx[bond_idx] o_bond = mol.GetBondBetweenAtoms(b-1,e-1).GetBondTypeAsDouble() if mark == '!': n_bond = '0.0' core_edits_.append('{}:{}:{}:{}'.format(b,e,o_bond,n_bond)) elif mark == '_': n_bond = '1.0' core_edits_.append('{}:{}:{}:{}'.format(b,e,o_bond,n_bond)) elif mark == ';': n_bond = '2.0' core_edits_.append('{}:{}:{}:{}'.format(b,e,o_bond,n_bond)) elif mark == '^': n_bond = '3.0' core_edits_.append('{}:{}:{}:{}'.format(b,e,o_bond,n_bond)) elif mark == '&': stereo_edits_.append('{}:{}:{}:{}'.format(b,e,0,'a')) elif mark == '{': stereo_edits_.append('{}:{}:{}:{}'.format(b,e,0,'e')) elif mark == '}': stereo_edits_.append('{}:{}:{}:{}'.format(b,e,0,'z')) elif mark == '。': n_bond = '2.0' core_edits_.append('{}:{}:{}:{}'.format(b,e,o_bond,n_bond)) stereo_edits_.append('{}:{}:{}:{}'.format(b,e,0,'a')) elif mark == '《': n_bond = '2.0' core_edits_.append('{}:{}:{}:{}'.format(b,e,o_bond,n_bond)) stereo_edits_.append('{}:{}:{}:{}'.format(b,e,0,'e')) elif mark == '》': n_bond = '2.0' core_edits_.append('{}:{}:{}:{}'.format(b,e,o_bond,n_bond)) stereo_edits_.append('{}:{}:{}:{}'.format(b,e,0,'z')) core_edits_add_atom_lis = [] for atom in mol.GetAtoms(): Isotope = atom.GetIsotope() g_w = Isotope % 10 s_w = Isotope % 100 //10 b_w = Isotope // 100 if g_w == 9: core_edits_.append('{}:{}:{}:{}'.format(atom.GetIdx()+1,0,'1.0','0.0')) else: pass if s_w == 1: chai_edits_.append('{}:{}:{}:{}'.format(atom.GetIdx()+1,0,'0','R')) elif s_w == 2: chai_edits_.append('{}:{}:{}:{}'.format(atom.GetIdx()+1,0,'0','S')) elif s_w == 3: chai_edits_.append('{}:{}:{}:{}'.format(atom.GetIdx()+1,0,'0','?')) if b_w == 2 or b_w == 3: charge_edits_.append('{}:{}:{}:{}'.format(atom.GetIdx()+1,0,'0',1)) elif b_w == 4 or b_w == 5: charge_edits_.append('{}:{}:{}:{}'.format(atom.GetIdx()+1,0,'0',0)) elif b_w == 6 or b_w == 7: charge_edits_.append('{}:{}:{}:{}'.format(atom.GetIdx()+1,0,'0',-1)) if b_w % 2 == 1: core_edits_add_atom_lis.append(atom.GetIdx()+1) if core_edits_add_atom_lis != []: core_edits_add_.append('{}:{}:{}:{}'.format(core_edits_add_atom_lis[0],core_edits_add_atom_lis[1],'0.0','1.0')) else: pass if sorted(core_edits_) != sorted(core_edits) or sorted(chai_edits_) != sorted(chai_edits) or sorted(stereo_edits_) != sorted(stereo_edits) or sorted(charge_edits_) != sorted(charge_edits) or sorted(core_edits_add_) != sorted(core_edits_add): print(core_edits_,core_edits) print(chai_edits_,chai_edits) print(core_edits_add_,core_edits_add) return 'error' else: return caption pass def get_b_smiles_forward(p_b): o_smiles = p_b[0] core_edits = p_b[1] chai_edits = p_b[2] stereo_edits = p_b[3] charge_edits = p_b[4] core_edits_add = p_b[5] atom_idx_mark_dic = {} for edit in core_edits: b = int(edit.split(':')[0]) e = int(edit.split(':')[1]) new_b = edit.split(':')[3] if min([b,e]) == 0: atom_map = max([b,e]) if new_b == '0.0': atom_idx_mark_dic[atom_map] = 9 else: pass for edit in chai_edits: edit_l = edit.split(':') if edit_l[3] == 'R': if int(edit_l[0]) not in atom_idx_mark_dic.keys(): atom_idx_mark_dic[int(edit_l[0])] = 10 else: atom_idx_mark_dic[int(edit_l[0])] = 10 + atom_idx_mark_dic[int(edit_l[0])] elif edit_l[3] == 'S': if int(edit_l[0]) not in atom_idx_mark_dic.keys(): atom_idx_mark_dic[int(edit_l[0])] = 20 else: atom_idx_mark_dic[int(edit_l[0])] = 20 + atom_idx_mark_dic[int(edit_l[0])] elif edit_l[3] == '?': if int(edit_l[0]) not in atom_idx_mark_dic.keys(): atom_idx_mark_dic[int(edit_l[0])] = 30 else: atom_idx_mark_dic[int(edit_l[0])] = 30 + atom_idx_mark_dic[int(edit_l[0])] for edit in charge_edits: edit_l = edit.split(':') if edit_l[3] == '1': if int(edit_l[0]) not in atom_idx_mark_dic.keys(): atom_idx_mark_dic[int(edit_l[0])] = 200 else: atom_idx_mark_dic[int(edit_l[0])] = 200 + atom_idx_mark_dic[int(edit_l[0])] pass elif edit_l[3] == '0': if int(edit_l[0]) not in atom_idx_mark_dic.keys(): atom_idx_mark_dic[int(edit_l[0])] = 400 else: atom_idx_mark_dic[int(edit_l[0])] = 400 + atom_idx_mark_dic[int(edit_l[0])] elif edit_l[3] == '-1': if int(edit_l[0]) not in atom_idx_mark_dic.keys(): atom_idx_mark_dic[int(edit_l[0])] = 600 else: atom_idx_mark_dic[int(edit_l[0])] = 600 + atom_idx_mark_dic[int(edit_l[0])] for edit in core_edits_add: edit_l = edit.split(':') if int(edit_l[0]) not in atom_idx_mark_dic.keys(): atom_idx_mark_dic[int(edit_l[0])] = 100 else: atom_idx_mark_dic[int(edit_l[0])] = 100 + atom_idx_mark_dic[int(edit_l[0])] if int(edit_l[1]) not in atom_idx_mark_dic.keys(): atom_idx_mark_dic[int(edit_l[1])] = 100 else: atom_idx_mark_dic[int(edit_l[1])] = 100 + atom_idx_mark_dic[int(edit_l[1])] a = Chem.MolFromSmiles(o_smiles,sanitize = False) for atom in a.GetAtoms(): if atom.GetAtomMapNum() in atom_idx_mark_dic.keys(): atom_map = atom.GetAtomMapNum() atom.SetIsotope(atom_idx_mark_dic[atom_map]) else: pass atom.SetAtomMapNum(0) mol = copy.deepcopy(a) detailed_smiles = Chem.MolToSmiles(mol,canonical = False,allBondsExplicit = True,kekuleSmiles=True) concise_smiles = Chem.MolToSmiles(mol,canonical = False,kekuleSmiles=True) concise_smiles_no_chirality = Chem.MolToSmiles(mol,canonical = False,isomericSmiles = False,kekuleSmiles=True) atom_pair_bond_idx_dic = get_atom_pair_bond_idx_dic(concise_smiles_no_chirality) rm_token_lis = get_rm_token_lis(concise_smiles,detailed_smiles) bond_token_lis = get_bond_token_lis(detailed_smiles) bond_token_idx_dic = get_bond_token_idx_dic(bond_token_lis) bond_idx_mark_dic = {} for edit in core_edits: b = int(edit.split(':')[0]) e = int(edit.split(':')[1]) org_b = edit.split(':')[2] new_b = edit.split(':')[3] if min([b,e]) != 0: bond_idx = atom_pair_bond_idx_dic[min([b,e]),max([b,e])] if new_b == '0.0': mark = '!' elif new_b == '1.0': mark = '_' elif new_b == '2.0': mark = ';' elif new_b == '3.0': mark = '^' bond_idx_mark_dic[bond_idx] = mark else: pass for edit in stereo_edits: b = int(edit.split(':')[0]) e = int(edit.split(':')[1]) new_b = edit.split(':')[3] if min([b,e]) != 0: bond_idx = atom_pair_bond_idx_dic[min([b,e]),max([b,e])] if bond_idx not in bond_idx_mark_dic.keys(): if new_b == 'a': mark = '&' elif new_b == 'e': mark = '{' elif new_b == 'z': mark = '}' bond_idx_mark_dic[bond_idx] = mark else: bond_idx in bond_idx_mark_dic.keys() if new_b == 'a': mark = '。' elif new_b == 'e': mark = '《' elif new_b == 'z': mark = '》' bond_idx_mark_dic[bond_idx] = mark else: pass for bond_idx,mark in bond_idx_mark_dic.items(): token_idx = bond_token_idx_dic[bond_idx] rm_token_lis[token_idx] = mark new_smiles_lis = [] for i in range(len(rm_token_lis)): if rm_token_lis[i] == ' ': new_smiles_lis.append(detailed_smiles[i]) elif rm_token_lis[i][-1] in ['!','_',';','^','&','{','}','。','《','》']: new_smiles_lis.append(rm_token_lis[i]) else: pass return ''.join(new_smiles_lis) def get_b_smiles_backward(caption_r,o_smiles): t_smiles = get_t_smiles(caption_r,o_smiles) b_smiles,detailed_smiles = get_b_smiles_detailed_smiles(caption_r,t_smiles) bond_dic = get_bond_dic(b_smiles,detailed_smiles) atom_pair_bond_idx = {} for atom_pair,bond_idx in get_atom_pair_bond_idx_dic(o_smiles).items(): atom_pair_bond_idx[bond_idx] = atom_pair mol = Chem.MolFromSmiles(t_smiles) Chem.Kekulize(mol) core_edits_ = [] chai_edits_ = [] stereo_edits_ = [] charge_edits_ = [] core_edits_add_ = [] for bond_idx,mark in bond_dic.items(): b,e = atom_pair_bond_idx[bond_idx] o_bond = mol.GetBondBetweenAtoms(b-1,e-1).GetBondTypeAsDouble() if mark == '!': n_bond = '0.0' core_edits_.append('{}:{}:{}:{}'.format(b,e,o_bond,n_bond)) elif mark == '_': n_bond = '1.0' core_edits_.append('{}:{}:{}:{}'.format(b,e,o_bond,n_bond)) elif mark == ';': n_bond = '2.0' core_edits_.append('{}:{}:{}:{}'.format(b,e,o_bond,n_bond)) elif mark == '^': n_bond = '3.0' core_edits_.append('{}:{}:{}:{}'.format(b,e,o_bond,n_bond)) elif mark == '&': stereo_edits_.append('{}:{}:{}:{}'.format(b,e,0,'a')) elif mark == '{': stereo_edits_.append('{}:{}:{}:{}'.format(b,e,0,'e')) elif mark == '}': stereo_edits_.append('{}:{}:{}:{}'.format(b,e,0,'z')) elif mark == '。': n_bond = '2.0' core_edits_.append('{}:{}:{}:{}'.format(b,e,o_bond,n_bond)) stereo_edits_.append('{}:{}:{}:{}'.format(b,e,0,'a')) #any elif mark == '《': n_bond = '2.0' core_edits_.append('{}:{}:{}:{}'.format(b,e,o_bond,n_bond)) stereo_edits_.append('{}:{}:{}:{}'.format(b,e,0,'e')) elif mark == '》': n_bond = '2.0' core_edits_.append('{}:{}:{}:{}'.format(b,e,o_bond,n_bond)) stereo_edits_.append('{}:{}:{}:{}'.format(b,e,0,'z')) core_edits_add_atom_lis = [] for atom in mol.GetAtoms(): Isotope = atom.GetIsotope() g_w = Isotope % 10 s_w = Isotope % 100 //10 b_w = Isotope // 100 if g_w == 9: core_edits_.append('{}:{}:{}:{}'.format(atom.GetIdx()+1,0,'1.0','0.0')) else: pass if s_w == 1: chai_edits_.append('{}:{}:{}:{}'.format(atom.GetIdx()+1,0,'0','R')) elif s_w == 2: chai_edits_.append('{}:{}:{}:{}'.format(atom.GetIdx()+1,0,'0','S')) elif s_w == 3: chai_edits_.append('{}:{}:{}:{}'.format(atom.GetIdx()+1,0,'0','?')) if b_w == 2 or b_w == 3: charge_edits_.append('{}:{}:{}:{}'.format(atom.GetIdx()+1,0,'0',1)) elif b_w == 4 or b_w == 5: charge_edits_.append('{}:{}:{}:{}'.format(atom.GetIdx()+1,0,'0',0)) elif b_w == 6 or b_w == 7: charge_edits_.append('{}:{}:{}:{}'.format(atom.GetIdx()+1,0,'0',-1)) if b_w % 2 == 1: core_edits_add_atom_lis.append(atom.GetIdx()+1) if core_edits_add_atom_lis != []: core_edits_add_.append('{}:{}:{}:{}'.format(core_edits_add_atom_lis[0],core_edits_add_atom_lis[1],'0.0','1.0')) else: pass return core_edits_,chai_edits_,stereo_edits_,charge_edits_,core_edits_add_ def get_b_smiles_check(p_b): p,core_edits,chai_edits,stereo_edits,charge_edits,core_edits_add,lg_map_lis = p_b b_smiles = get_b_smiles_forward(p_b) core_edits_,chai_edits_,stereo_edits_,charge_edits_,core_edits_add_ = get_b_smiles_backward(b_smiles,p_b[0]) if sorted(core_edits_) != sorted(core_edits) or sorted(chai_edits_) != sorted(chai_edits) or sorted(stereo_edits_) != sorted(stereo_edits) or sorted(charge_edits_) != sorted(charge_edits) or sorted(core_edits_add_) != sorted(core_edits_add): print(core_edits_,core_edits) print(chai_edits_,chai_edits) print(core_edits_add_,core_edits_add) return 'error' else: return b_smiles import re def replacenth(string, sub, wanted, n): where = [m.start() for m in re.finditer(sub, string)][n-1] before = string[:where] after = string[where:] after = after.replace(sub, wanted, 1) newString = before + after return newString def cano_smiles_map(smiles): atom_map_lis = [] mol = Chem.MolFromSmiles(smiles,sanitize = False) for atom in mol.GetAtoms(): atom_map_lis.append(atom.GetAtomMapNum()) atom.SetAtomMapNum(0) smiles = Chem.MolToSmiles(mol,canonical = False,kekuleSmiles=True) mol = Chem.MolFromSmiles(smiles,sanitize = False) for atom in mol.GetAtoms(): atom.SetAtomMapNum(atom_map_lis[atom.GetIdx()]) smiles = Chem.MolToSmiles(mol,canonical = False,kekuleSmiles=True) return smiles def get_lg_forward(core_edits,lg_map): attach_idx = [] for core_edit in core_edits: core_edit = core_edit.split(':') if float(core_edit[2])-float(core_edit[3]) > 0: attach_idx.append(int(core_edit[0])) attach_idx.append(int(core_edit[1])) attach_idx = sorted(list(set(attach_idx))) attach_idx = [i for i in attach_idx if i != 0] lg_lis = [()]*len(attach_idx) for lg,map_lis in lg_map: if len(map_lis) == 1: map_ = map_lis[0] id_ = attach_idx.index(map_) lg_lis[id_] = tuple(list(lg_lis[id_]) +[lg]) elif len(map_lis) != 1 and len(set(map_lis)) == 1: map_ = map_lis[0] id_ = attach_idx.index(map_) lg_lis[id_] = tuple(list(lg_lis[id_]) +[lg]) elif len(map_lis) != 1 and len(set(map_lis)) != 1 and lg.count(':') == 1: for map_ in map_lis: id_ = attach_idx.index(map_) lg_lis[id_] = tuple(list(lg_lis[id_]) +[lg + "*"]) elif len(map_lis) != 1 and len(set(map_lis)) != 1 and lg.count(':') == 2: if map_lis[0] 0: attach_idx.append(int(core_edit[0])) attach_idx.append(int(core_edit[1])) attach_idx = [i for i in attach_idx if i != 0] attach_idx = sorted(list(set(attach_idx))) lg_map_new = [] for id_,lg_ in zip(attach_idx,lg_lis): for lg in list(lg_): if lg.count(':') > 1: lg_map_new.append((lg,[id_]*lg.count(':'))) else: lg_map_new.append((lg,[id_])) dic_t = {} for i,j in lg_map_new: if '*' in i: dic_t.setdefault(i,[]).append(j[0]) else: pass lg_map_new_k =[] for i,j in lg_map_new: if '*' not in i: lg_map_new_k.append((i,j)) else: pass for i,j in dic_t.items(): if ':2' not in i: lg_map_new_k.append((i.replace('*',''),j)) elif i.index(':1') <= i.index(':2'): lg_map_new_k.append((i.replace('*','').replace(':2',':1'),j)) else: j.reverse() lg_map_new_k.append((i.replace('*','').replace(':2',':1'),j)) lg_map_new = lg_map_new_k return lg_map_new dic_str_to_num = {} for l in range(4,0,-1): for a,i in zip([0,200,400,600,100,300,500,700],['','α','β','γ','δ','αδ','βδ','γδ']): for b,j in zip([0,10,20,30],['','r','s','?']): for c,k in zip([0,9],['','~']): if len(k+j+i) == l: dic_str_to_num[k+j+i] = str(a+b+c) dic_num_to_str = {} for l in range(3,0,-1): for a,i in zip([0,200,400,600,100,300,500,700],['','α','β','γ','δ','αδ','βδ','γδ']): for b,j in zip([0,10,20,30],['','r','s','?']): for c,k in zip([0,9],['','~']): if len(str(a+b+c)) == l and len(k+j+i) != 0: dic_num_to_str[str(a+b+c)] = k+j+i def iso_to_symbo(txt,dic_num_to_str): for i,j in dic_num_to_str.items(): i = '[' + i j = '[' + j txt = txt.replace(i,j) txt = txt.replace('。',';&').replace('》',';}').replace('《',';{') return txt def symbo_to_iso(txt,dic_str_to_num): for i,j in dic_str_to_num.items(): i = '[' + i j = '[' + j txt = txt.replace(i,j) txt = txt.replace(';&','。').replace(';}','》').replace(';{','《') return txt def merge_smiles_only(text): text = symbo_to_iso(text,dic_str_to_num) o_smiles = text.split('>>>')[0] b_smiles = text.split('>>>')[1].split('<')[0] lg_lis = [] for i in re.findall(r"[<](.*?)[>]", text): if i == '': lg_lis.append(tuple()) else: lg_lis.append(tuple(i.split(','))) core_edits,chai_edits,stereo_edits,charge_edits,core_edits_add = get_b_smiles_backward(b_smiles,o_smiles) lg_map_lis = get_lg_backward(core_edits,lg_lis) p = Chem.MolFromSmiles(o_smiles,sanitize = False) for atom in p.GetAtoms(): atom.SetAtomMapNum(atom.GetIdx()+1) p = Chem.MolToSmiles(p) pre_smiles = run_get_p_b_l_backward(p,core_edits,chai_edits,stereo_edits,charge_edits,core_edits_add,lg_map_lis) return pre_smiles def merge_smiles_with_mapping_only(text): text = symbo_to_iso(text,dic_str_to_num) o_smiles = text.split('>>>')[0] b_smiles = text.split('>>>')[1].split('<')[0] lg_lis = [] for i in re.findall(r"[<](.*?)[>]", text): if i == '': lg_lis.append(tuple()) else: lg_lis.append(tuple(i.split(','))) core_edits,chai_edits,stereo_edits,charge_edits,core_edits_add = get_b_smiles_backward(b_smiles,o_smiles) lg_map_lis = get_lg_backward(core_edits,lg_lis) p = Chem.MolFromSmiles(o_smiles,sanitize = False) for atom in p.GetAtoms(): atom.SetAtomMapNum(atom.GetIdx()+1) p = Chem.MolToSmiles(p) pre_smiles = run_get_p_b_l_backward_with_mapping(p,core_edits,chai_edits,stereo_edits,charge_edits,core_edits_add,lg_map_lis) return pre_smiles def merge_smiles(text): try: return merge_smiles_only(text) except: return "" def merge_smiles_with_mapping(text): try: return merge_smiles_with_mapping_only(text) except: return "" def get_e_smiles(rxn): p_b = run_get_p_b_l_forward(rxn) b_smiles = get_b_smiles_check(p_b) lg_lis = get_lg_forward(p_b[1],p_b[6]) k = p_b b = b_smiles c = lg_lis a = Chem.MolFromSmiles(k[0],sanitize = False) for atom in a.GetAtoms(): atom.SetAtomMapNum(0) a = Chem.MolToSmiles(a,canonical = False) str_ = '' for i in c: str_ = str_ + '<{}>'.format(','.join(i)) txt = a +'>>>'+ b+str_ return iso_to_symbo(txt,dic_num_to_str) def get_e_smiles_with_check(rxn): p_b = run_get_p_b_l_check(rxn) b_smiles = get_b_smiles_check(p_b) lg_lis = get_lg_forward(p_b[1],p_b[6]) k = p_b b = b_smiles c = lg_lis a = Chem.MolFromSmiles(k[0],sanitize = False) for atom in a.GetAtoms(): atom.SetAtomMapNum(0) a = Chem.MolToSmiles(a,canonical = False) str_ = '' for i in c: str_ = str_ + '<{}>'.format(','.join(i)) txt = a +'>>>'+ b+str_ return iso_to_symbo(txt,dic_num_to_str) def get_edit_from_e_smiles(text): text = symbo_to_iso(text,dic_str_to_num) o_smiles = text.split('>>>')[0] b_smiles = text.split('>>>')[1].split('<')[0] lg_lis = [] for i in re.findall(r"[<](.*?)[>]", text): if i == '': lg_lis.append(tuple()) else: lg_lis.append(tuple(i.split(','))) core_edits,chai_edits,stereo_edits,charge_edits,core_edits_add = get_b_smiles_backward(b_smiles,o_smiles) lg_map_lis = get_lg_backward(core_edits,lg_lis) return core_edits,chai_edits,stereo_edits,charge_edits,core_edits_add,lg_map_lis