|
import numpy as np |
|
from rdkit import Chem |
|
import os |
|
import sys |
|
import copy |
|
import re |
|
from typing import List, Any |
|
from indigo import * |
|
indigo = Indigo() |
|
import rdkit |
|
from rdkit import RDLogger |
|
RDLogger.DisableLog('rdApp.*') |
|
print('rdkit version:' + rdkit.__version__) |
|
|
|
|
|
BOND_TYPES = [None, Chem.rdchem.BondType.SINGLE, Chem.rdchem.BondType.DOUBLE, \ |
|
Chem.rdchem.BondType.TRIPLE, Chem.rdchem.BondType.AROMATIC] |
|
BOND_FLOAT_TO_TYPE = { |
|
0.0: BOND_TYPES[0], |
|
1.0: BOND_TYPES[1], |
|
2.0: BOND_TYPES[2], |
|
3.0: BOND_TYPES[3], |
|
1.5: BOND_TYPES[4],} |
|
|
|
|
|
|
|
def get_bond_info(mol: Chem.Mol): |
|
"""Get information on bonds in the molecule. |
|
|
|
Parameters |
|
---------- |
|
mol: Chem.Mol |
|
Molecule |
|
""" |
|
if mol is None: |
|
return {} |
|
|
|
bond_info = {} |
|
for bond in mol.GetBonds(): |
|
a_start = bond.GetBeginAtom().GetAtomMapNum() |
|
a_end = bond.GetEndAtom().GetAtomMapNum() |
|
key_pair = sorted([a_start, a_end]) |
|
bond_info[tuple(key_pair)] = [bond.GetBondTypeAsDouble(), bond.GetIdx()] |
|
|
|
return bond_info |
|
|
|
|
|
def map_reac_and_frag(reac_mols: List[Chem.Mol], frag_mols: List[Chem.Mol]): |
|
"""Aligns reactant and fragment mols by computing atom map overlaps. |
|
|
|
Parameters |
|
---------- |
|
reac_mols: List[Chem.Mol], |
|
List of reactant mols |
|
frag_mols: List[Chem.Mol], |
|
List of fragment mols |
|
""" |
|
if len(reac_mols) != len(frag_mols): |
|
return reac_mols, frag_mols |
|
reac_maps = [[atom.GetAtomMapNum() for atom in mol.GetAtoms()] for mol in reac_mols] |
|
frag_maps = [[atom.GetAtomMapNum() for atom in mol.GetAtoms()] for mol in frag_mols] |
|
|
|
overlaps = {i: [] for i in range(len(frag_mols))} |
|
for i, fmap in enumerate(frag_maps): |
|
overlaps[i].extend([len(set(fmap).intersection(set(rmap))) for rmap in reac_maps]) |
|
overlaps[i] = overlaps[i].index(max(overlaps[i])) |
|
|
|
new_frag = [Chem.Mol(mol) for mol in frag_mols] |
|
new_reac = [Chem.Mol(reac_mols[overlaps[i]]) for i in overlaps] |
|
return new_reac, new_frag |
|
|
|
|
|
def remove_s_H(frag_mol): |
|
while True: |
|
idx = '' |
|
for atom in frag_mol.GetAtoms(): |
|
if atom.GetAtomicNum() == 1 and atom.GetDegree() == 0: |
|
idx= atom.GetIdx() |
|
|
|
if idx != '' : |
|
edit_mol = Chem.RWMol(frag_mol) |
|
edit_mol.RemoveAtom(idx) |
|
frag_mol = edit_mol.GetMol() |
|
else: |
|
break |
|
|
|
return frag_mol |
|
|
|
|
|
|
|
def apply_edits_to_mol_change(mol, edits): |
|
"""Apply edits to molecular graph. |
|
|
|
Parameters |
|
---------- |
|
mol: Chem.Mol, |
|
RDKit mol object |
|
edits: Iterable[str], |
|
Iterable of edits to apply. An edit is structured as a1:a2:b1:b2, where |
|
a1, a2 are atom maps of participating atoms and b1, b2 are previous and |
|
new bond orders. When a2 = 0, we update the hydrogen count. |
|
""" |
|
new_mol = Chem.RWMol(mol) |
|
amap = {atom.GetAtomMapNum(): atom.GetIdx() for atom in new_mol.GetAtoms()} |
|
|
|
for edit in edits: |
|
x, y, prev_bo, new_bo = edit.split(":") |
|
x, y = int(x), int(y) |
|
new_bo = float(new_bo) |
|
|
|
bond = new_mol.GetBondBetweenAtoms(amap[x],amap[y]) |
|
|
|
if new_bo > 0: |
|
if bond is not None: |
|
new_mol.RemoveBond(amap[x],amap[y]) |
|
new_mol.AddBond(amap[x],amap[y],BOND_FLOAT_TO_TYPE[new_bo]) |
|
atom_x,atom_y = new_mol.GetAtomWithIdx(amap[x]),new_mol.GetAtomWithIdx(amap[y]) |
|
|
|
try: |
|
atom_x.SetNumExplicitHs(int(atom_x.GetNumExplicitHs()+ float(prev_bo)-float(new_bo))) |
|
except: |
|
atom_x.SetNumExplicitHs(0) |
|
try: |
|
atom_y.SetNumExplicitHs(int(atom_y.GetNumExplicitHs()+ float(prev_bo)-float(new_bo))) |
|
except: |
|
atom_y.SetNumExplicitHs(0) |
|
|
|
pred_mol = new_mol.GetMol() |
|
return pred_mol |
|
|
|
|
|
def apply_edits_to_mol_break(mol, edits): |
|
"""Apply edits to molecular graph. |
|
|
|
Parameters |
|
---------- |
|
mol: Chem.Mol, |
|
RDKit mol object |
|
edits: Iterable[str], |
|
Iterable of edits to apply. An edit is structured as a1:a2:b1:b2, where |
|
a1, a2 are atom maps of participating atoms and b1, b2 are previous and |
|
new bond orders. When a2 = 0, we update the hydrogen count. |
|
""" |
|
mol = Chem.AddHs(mol) |
|
Chem.Kekulize(mol) |
|
for atom in mol.GetAtoms(): |
|
atom.SetNoImplicit(True) |
|
new_mol = Chem.RWMol(mol) |
|
amap = {atom.GetAtomMapNum(): atom.GetIdx() for atom in new_mol.GetAtoms()} |
|
|
|
|
|
for edit in edits: |
|
x, y, prev_bo, new_bo = edit.split(":") |
|
x, y = int(x), int(y) |
|
new_bo = float(new_bo) |
|
|
|
if y == 0: |
|
cent_atom = mol.GetAtomWithIdx(amap[x]) |
|
for neibor in cent_atom.GetNeighbors(): |
|
if neibor.GetAtomicNum() == 1: |
|
new_mol.RemoveBond(amap[x],neibor.GetIdx()) |
|
break |
|
else: |
|
pass |
|
|
|
elif y != 0: |
|
bond = new_mol.GetBondBetweenAtoms(amap[x],amap[y]) |
|
if bond is not None: |
|
new_mol.RemoveBond(amap[x],amap[y]) |
|
|
|
pred_mol = new_mol.GetMol() |
|
pred_mol = Chem.RemoveHs(pred_mol,sanitize = False) |
|
|
|
return pred_mol |
|
|
|
|
|
|
|
def find_reac_edit(frag_mols_1,reac_mols_1,core_edits): |
|
reac_mol_map_num = [i.GetAtomMapNum() for i in reac_mols_1.GetAtoms()] |
|
frag_mol_map_num = [i.GetAtomMapNum() for i in frag_mols_1.GetAtoms()] |
|
lg_map_num = [i for i in reac_mol_map_num if i not in frag_mol_map_num] |
|
attach_map_num = 0 |
|
|
|
reac_edit = [] |
|
|
|
|
|
core_edits = core_edits + [':'.join([i.split(':')[1],i.split(':')[0],i.split(':')[2],i.split(':')[3]]) for i in core_edits] |
|
|
|
|
|
for core_edit in core_edits: |
|
core_edit_ = core_edit.split(':') |
|
if float(core_edit_[3]) == 0 and int(core_edit_[0]) in frag_mol_map_num: |
|
attach_map_num = int(core_edit_[0]) |
|
elif float(core_edit_[2]) - float(core_edit_[3]) > 0 and int(core_edit_[0]) in frag_mol_map_num: |
|
attach_map_num = int(core_edit_[0]) |
|
|
|
|
|
else: |
|
continue |
|
|
|
if str(attach_map_num) != '0' and str(attach_map_num) != core_edit_[0]: |
|
continue |
|
|
|
|
|
frag_mols_1_amap = {atom.GetAtomMapNum(): atom.GetIdx() for atom in frag_mols_1.GetAtoms()} |
|
reac_mols_1_amap = {atom.GetAtomMapNum(): atom.GetIdx() for atom in reac_mols_1.GetAtoms()} |
|
|
|
frag_attach_H = frag_mols_1.GetAtomWithIdx(frag_mols_1_amap[attach_map_num]).GetNumExplicitHs() |
|
reac_attach_H = reac_mols_1.GetAtomWithIdx(reac_mols_1_amap[attach_map_num]).GetNumExplicitHs() |
|
|
|
frag_attach_charge = frag_mols_1.GetAtomWithIdx(frag_mols_1_amap[attach_map_num]).GetFormalCharge() |
|
reac_attach_charge = reac_mols_1.GetAtomWithIdx(reac_mols_1_amap[attach_map_num]).GetFormalCharge() |
|
|
|
|
|
if lg_map_num != []: |
|
for bond in reac_mols_1.GetBonds(): |
|
EndMapNum = bond.GetEndAtom().GetAtomMapNum() |
|
BeginMapNum = bond.GetBeginAtom().GetAtomMapNum() |
|
|
|
if (BeginMapNum == attach_map_num) and (EndMapNum in lg_map_num): |
|
reac_edit.append("{}:{}:{}:{}".format(BeginMapNum,EndMapNum,bond.GetBondTypeAsDouble(),0.0)) |
|
elif (EndMapNum == attach_map_num) and (BeginMapNum in lg_map_num): |
|
|
|
reac_edit.append("{}:{}:{}:{}".format(EndMapNum,BeginMapNum,bond.GetBondTypeAsDouble(),0.0)) |
|
|
|
|
|
|
|
|
|
elif lg_map_num == []: |
|
|
|
|
|
if Chem.MolToSmiles(reac_mols_1) == Chem.MolToSmiles(frag_mols_1): |
|
reac_edit.append("{}:{}:{}:{}".format(attach_map_num,0,0.0,0.0)) |
|
if (reac_attach_H - frag_attach_H) == 1 and (reac_attach_charge - frag_attach_charge) == 0: |
|
reac_edit.append("{}:{}:{}:{}".format(attach_map_num,0,1.0,0.0)) |
|
if (reac_attach_H - frag_attach_H) == 2 and (reac_attach_charge - frag_attach_charge) == 0: |
|
reac_edit.append("{}:{}:{}:{}".format(attach_map_num,0,2.0,0.0)) |
|
|
|
if (reac_attach_charge - frag_attach_charge) == -1: |
|
if "{}:{}:{}:{}".format(attach_map_num,0,0.0,-1.0) not in reac_edit: |
|
reac_edit.append("{}:{}:{}:{}".format(attach_map_num,0,0.0,-1.0)) |
|
|
|
if (reac_attach_charge - frag_attach_charge) == 1: |
|
if "{}:{}:{}:{}".format(attach_map_num,0,0.0,1.0) not in reac_edit: |
|
reac_edit.append("{}:{}:{}:{}".format(attach_map_num,0,0.0,1.0)) |
|
|
|
|
|
return reac_edit |
|
|
|
|
|
|
|
|
|
|
|
def correct_mol_1(mol,is_nitrine_c): |
|
mol = copy.deepcopy(mol) |
|
for atom in mol.GetAtoms(): |
|
|
|
if is_nitrine_c == True and atom.GetAtomicNum() == 7 and sum([i.GetBondTypeAsDouble() for i in atom.GetBonds()]) == 4 and 1.5 not in [i.GetBondTypeAsDouble() for i in atom.GetBonds()] and atom.GetFormalCharge()==0: |
|
atom.SetFormalCharge(1) |
|
else: |
|
pass |
|
|
|
atom.SetNumRadicalElectrons(0) |
|
atom.SetIsAromatic(False) |
|
atom.SetNoImplicit(False) |
|
|
|
return mol |
|
|
|
|
|
def correct_mol(mol_,keep_map): |
|
|
|
mol = copy.deepcopy(mol_) |
|
atom_map_lis = [] |
|
idx_H_dic = {} |
|
|
|
for atom in mol.GetAtoms(): |
|
atom_map_lis.append(atom.GetAtomMapNum()) |
|
|
|
for atom in mol.GetAtoms(): |
|
if atom.GetAtomicNum() == 7 and sum([i.GetBondTypeAsDouble() for i in atom.GetBonds()]) == 4 and 1.5 not in [i.GetBondTypeAsDouble() for i in atom.GetBonds()] and atom.GetFormalCharge()==0: |
|
pass |
|
elif atom.GetAtomicNum() == 15 and atom.GetExplicitValence() == 5 and 1.5 not in [i.GetBondTypeAsDouble() for i in atom.GetBonds()] and atom.GetFormalCharge()==0: |
|
idx_H_dic[atom.GetIdx()] = atom.GetNumExplicitHs() |
|
else: |
|
pass |
|
atom.SetNumRadicalElectrons(0) |
|
atom.SetNoImplicit(False) |
|
atom.SetAtomMapNum(0) |
|
|
|
for atom in mol.GetAtoms(): |
|
atom.SetIsAromatic(False) |
|
|
|
|
|
temp = Chem.MolToMolBlock(mol,kekulize = True) |
|
mol = Chem.MolFromMolBlock(temp,removeHs = False,sanitize= False) |
|
|
|
|
|
|
|
if keep_map: |
|
for i in range(0,mol.GetNumAtoms()): |
|
|
|
mol.GetAtomWithIdx(i).SetAtomMapNum(atom_map_lis[i]) |
|
if i in idx_H_dic.keys(): |
|
|
|
mol.GetAtomWithIdx(i).SetNoImplicit(True) |
|
mol.GetAtomWithIdx(i).SetNumExplicitHs(idx_H_dic[i]) |
|
|
|
|
|
for i in range(0,mol.GetNumAtoms()): |
|
mol.GetAtomWithIdx(i).SetChiralTag(mol_.GetAtomWithIdx(i).GetChiralTag()) |
|
|
|
|
|
n_Chirals = Chem.FindMolChiralCenters(mol) |
|
|
|
return mol |
|
|
|
|
|
def get_atom_map_chai_dic(mol): |
|
dic = {} |
|
for idx,chiral in Chem.FindMolChiralCenters(mol): |
|
atom_map = mol.GetAtomWithIdx(idx).GetAtomMapNum() |
|
dic[atom_map] = chiral |
|
return dic |
|
|
|
|
|
def get_atom_map_stereo_dic(mol): |
|
map_a = {atom.GetIdx(): atom.GetAtomMapNum() for atom in mol.GetAtoms()} |
|
stereo_dic = {} |
|
for bond in mol.GetBonds(): |
|
b_map,e_map = map_a[bond.GetBeginAtomIdx()],map_a[bond.GetEndAtomIdx()] |
|
stereo_dic[tuple(sorted([b_map,e_map]))] = bond.GetStereo() |
|
return stereo_dic |
|
|
|
|
|
def cano_smiles_map(smiles): |
|
atom_map_lis = [] |
|
mol = Chem.MolFromSmiles(smiles,sanitize = False) |
|
for atom in mol.GetAtoms(): |
|
atom_map_lis.append(atom.GetAtomMapNum()) |
|
atom.SetAtomMapNum(0) |
|
smiles = Chem.MolToSmiles(mol,canonical = False,kekuleSmiles=True) |
|
mol = Chem.MolFromSmiles(smiles,sanitize = False) |
|
for atom in mol.GetAtoms(): |
|
atom.SetAtomMapNum(atom_map_lis[atom.GetIdx()]) |
|
smiles = Chem.MolToSmiles(mol,canonical = False,kekuleSmiles=True) |
|
return smiles |
|
|
|
|
|
|
|
def get_stereo_edit_mine(reac_mol,prod_mol): |
|
|
|
reac_map_a = {atom.GetIdx(): atom.GetAtomMapNum() for atom in reac_mol.GetAtoms()} |
|
prod_map_a = {atom.GetIdx(): atom.GetAtomMapNum() for atom in prod_mol.GetAtoms()} |
|
|
|
|
|
for atom in reac_mol.GetAtoms(): |
|
atom.SetAtomMapNum(0) |
|
|
|
|
|
r_rank = list(Chem.CanonicalRankAtoms(reac_mol, breakTies=False)) |
|
r_idx = [i for i in range(reac_mol.GetNumAtoms())] |
|
dic_idx_rank = dict(zip(r_idx,r_rank)) |
|
|
|
|
|
p_stereo_dic = {} |
|
for bond in prod_mol.GetBonds(): |
|
b_map,e_map = prod_map_a[bond.GetBeginAtomIdx()],prod_map_a[bond.GetEndAtomIdx()] |
|
p_stereo_dic[tuple(sorted([b_map,e_map]))] = bond.GetStereo() |
|
|
|
r_stereo_dic = {} |
|
for bond in reac_mol.GetBonds(): |
|
if bond.GetBondTypeAsDouble() == 2.0: |
|
|
|
b_atom,e_atom = bond.GetBeginAtom(),bond.GetEndAtom() |
|
|
|
b_neis = b_atom.GetNeighbors() |
|
b_neis = [i for i in b_neis if i.GetIdx() != e_atom.GetIdx()] |
|
b_neis_rank = [dic_idx_rank[i.GetIdx()] for i in b_neis] |
|
|
|
e_neis = e_atom.GetNeighbors() |
|
e_neis = [i for i in e_neis if i.GetIdx() != b_atom.GetIdx()] |
|
e_neis_rank = [dic_idx_rank[i.GetIdx()] for i in e_neis] |
|
|
|
|
|
b_neis_rank = b_neis_rank + ['H'] * (2 - len(b_neis_rank)) |
|
e_neis_rank = e_neis_rank + ['H'] * (2 - len(e_neis_rank)) |
|
|
|
if len(b_neis_rank) == len(set(b_neis_rank)) and len(e_neis_rank) == len(set(e_neis_rank)): |
|
|
|
b_map,e_map = reac_map_a[bond.GetBeginAtomIdx()],reac_map_a[bond.GetEndAtomIdx()] |
|
r_stereo_dic[tuple(sorted([b_map,e_map]))] = bond.GetStereo() |
|
else: |
|
pass |
|
|
|
stereo_edits = [] |
|
for atom_pair,stereo in r_stereo_dic.items(): |
|
if atom_pair in p_stereo_dic.keys() and stereo != p_stereo_dic[atom_pair]: |
|
if stereo == Chem.rdchem.BondStereo.STEREONONE: |
|
stereo = 'a' |
|
elif stereo == Chem.rdchem.BondStereo.STEREOE: |
|
stereo = 'e' |
|
elif stereo == Chem.rdchem.BondStereo.STEREOZ: |
|
stereo = 'z' |
|
stereo_edits.append('{}:{}:{}:{}'.format(atom_pair[0],atom_pair[1],0,stereo)) |
|
return stereo_edits |
|
|
|
|
|
|
|
def apply_stereo_change(prod_mol,stereo_edits): |
|
p_amap_idx = {atom.GetAtomMapNum(): atom.GetIdx() for atom in prod_mol.GetAtoms()} |
|
|
|
prod_mol = copy.deepcopy(prod_mol) |
|
|
|
|
|
prod_mol_t = copy.deepcopy(prod_mol) |
|
|
|
for stereo_edit in stereo_edits: |
|
|
|
b_map = int(stereo_edit.split(':')[0]) |
|
e_map = int(stereo_edit.split(':')[1]) |
|
|
|
b_n = prod_mol.GetAtomWithIdx(p_amap_idx[b_map]).GetNeighbors() |
|
b_n = [i.GetAtomMapNum() for i in b_n] |
|
b_n = [i for i in b_n if i not in [b_map,e_map]] |
|
|
|
e_n = prod_mol.GetAtomWithIdx(p_amap_idx[e_map]).GetNeighbors() |
|
e_n = [i.GetAtomMapNum() for i in e_n] |
|
e_n = [i for i in e_n if i not in [b_map,e_map]] |
|
|
|
|
|
|
|
f_b_n = b_n[0] |
|
m_cip_rank = 0 |
|
for i in b_n[:]: |
|
|
|
c_cip_rank = int(prod_mol_t.GetAtomWithIdx(p_amap_idx[i]).GetProp('_CIPRank')) |
|
|
|
if c_cip_rank >= m_cip_rank: |
|
f_b_n = i |
|
m_cip_rank = c_cip_rank |
|
|
|
|
|
f_e_n = e_n[0] |
|
m_cip_rank = 0 |
|
for i in e_n[:]: |
|
|
|
c_cip_rank = int(prod_mol_t.GetAtomWithIdx(p_amap_idx[i]).GetProp('_CIPRank')) |
|
|
|
if c_cip_rank >= m_cip_rank: |
|
f_e_n = i |
|
m_cip_rank = c_cip_rank |
|
|
|
|
|
if stereo_edit[-2:] == ':e': |
|
|
|
bond = prod_mol.GetBondBetweenAtoms(p_amap_idx[b_map],p_amap_idx[e_map]) |
|
bond.SetStereo(Chem.rdchem.BondStereo.STEREOE) |
|
|
|
|
|
try: |
|
bond.SetStereoAtoms(p_amap_idx[f_b_n],p_amap_idx[f_e_n]) |
|
except: |
|
bond.SetStereoAtoms(p_amap_idx[f_e_n],p_amap_idx[f_b_n]) |
|
|
|
|
|
|
|
|
|
if stereo_edit[-2:] == ':z': |
|
bond = prod_mol.GetBondBetweenAtoms(p_amap_idx[b_map],p_amap_idx[e_map]) |
|
bond.SetStereo(Chem.rdchem.BondStereo.STEREOZ) |
|
try: |
|
bond.SetStereoAtoms(p_amap_idx[f_b_n],p_amap_idx[f_e_n]) |
|
except: |
|
bond.SetStereoAtoms(p_amap_idx[f_e_n],p_amap_idx[f_b_n]) |
|
|
|
|
|
elif stereo_edit[-2:] == ':a': |
|
bond = prod_mol.GetBondBetweenAtoms(p_amap_idx[b_map],p_amap_idx[e_map]) |
|
bond.SetStereo(Chem.rdchem.BondStereo.STEREOANY) |
|
|
|
return prod_mol |
|
|
|
|
|
def add_Cl(mol): |
|
add_Cl_atom_idx = [] |
|
for atom in mol.GetAtoms(): |
|
Double_O_count = 0 |
|
if atom.GetAtomicNum() == 16 and sorted([i.GetBondTypeAsDouble() for i in atom.GetBonds()]) == [1,2,2]: |
|
neibors = atom.GetNeighbors() |
|
for neibor in neibors: |
|
if neibor.GetAtomicNum() == 8: |
|
bond = mol.GetBondBetweenAtoms(atom.GetIdx(),neibor.GetIdx()) |
|
if bond.GetBondTypeAsDouble() == 2: |
|
Double_O_count += 1 |
|
else: |
|
pass |
|
else: |
|
pass |
|
if Double_O_count == 2: |
|
add_Cl_atom_idx.append(atom.GetIdx()) |
|
|
|
if len(add_Cl_atom_idx) == 1: |
|
map_lis = [i.GetAtomMapNum() for i in mol.GetAtoms()] |
|
mw = Chem.RWMol(mol) |
|
mw.AddAtom(Chem.Atom(17)) |
|
|
|
mw.GetAtomWithIdx(len(map_lis)).SetAtomMapNum(max(map_lis)+1) |
|
mw.AddBond(add_Cl_atom_idx[0],len(map_lis), BOND_FLOAT_TO_TYPE[1]) |
|
mol = mw.GetMol() |
|
|
|
return mol |
|
|
|
|
|
|
|
def neu_sulf_charge(mol): |
|
|
|
for atom in mol.GetAtoms(): |
|
if atom.GetAtomicNum() == 8 and atom.GetFormalCharge() == -1: |
|
|
|
neibors = atom.GetNeighbors() |
|
if len(neibors) == 1 and neibors[0].GetAtomicNum() == 16 and neibors[0].GetExplicitValence() == 4: |
|
atom.SetFormalCharge(0) |
|
else: |
|
pass |
|
|
|
return mol |
|
|
|
|
|
|
|
def align_kekule_pairs(r: str, p: str) : |
|
"""Aligns kekule pairs to ensure unchanged bonds have same bond order in |
|
previously aromatic rings. |
|
|
|
Parameters |
|
---------- |
|
r: str, |
|
SMILES string representing the reactants |
|
p: str, |
|
SMILES string representing the product |
|
""" |
|
reac_mol = Chem.MolFromSmiles(r) |
|
max_amap = max([atom.GetAtomMapNum() for atom in reac_mol.GetAtoms()]) |
|
for atom in reac_mol.GetAtoms(): |
|
if atom.GetAtomMapNum() == 0: |
|
atom.SetAtomMapNum(max_amap + 1) |
|
max_amap = max_amap + 1 |
|
|
|
prod_mol = Chem.MolFromSmiles(p) |
|
|
|
prod_prev = get_bond_info(prod_mol) |
|
Chem.Kekulize(prod_mol) |
|
prod_new = get_bond_info(prod_mol) |
|
|
|
reac_prev = get_bond_info(reac_mol) |
|
Chem.Kekulize(reac_mol) |
|
reac_new = get_bond_info(reac_mol) |
|
|
|
|
|
reac_edit = {} |
|
for bond in prod_new: |
|
if bond in reac_new and (prod_prev[bond][0] == reac_prev[bond][0]): |
|
if reac_new[bond][0] != prod_new[bond][0] or reac_prev[bond][0] == 1.5: |
|
reac_new[bond][0] = prod_new[bond][0] |
|
reac_edit[bond] = reac_new[bond] |
|
|
|
|
|
|
|
reac_mol = Chem.RWMol(reac_mol) |
|
amap_idx = {atom.GetAtomMapNum(): atom.GetIdx() for atom in reac_mol.GetAtoms()} |
|
|
|
for bond in reac_edit: |
|
idx1, idx2 = amap_idx[bond[0]], amap_idx[bond[1]] |
|
bo = reac_new[bond][0] |
|
reac_mol.RemoveBond(idx1, idx2) |
|
reac_mol.AddBond(idx1, idx2, BOND_FLOAT_TO_TYPE[bo]) |
|
|
|
return reac_mol.GetMol(), prod_mol |
|
|
|
|
|
def count_kekule_d(r,p): |
|
prod_mol = Chem.MolFromSmiles(p) |
|
prod_s = get_bond_info(prod_mol) |
|
|
|
prod_mol = Chem.MolFromSmiles(p,sanitize = False) |
|
prod_k = get_bond_info(prod_mol) |
|
|
|
reac_mol = Chem.MolFromSmiles(r) |
|
reac_s = get_bond_info(reac_mol) |
|
|
|
reac_mol = Chem.MolFromSmiles(r,sanitize = False) |
|
reac_k = get_bond_info(reac_mol) |
|
|
|
d_count = 0 |
|
for pair in reac_s.keys(): |
|
if pair in prod_s.keys(): |
|
if reac_s[pair][0] == prod_s[pair][0]: |
|
if reac_k[pair][0] != prod_k[pair][0]: |
|
d_count += 1 |
|
|
|
return d_count |
|
|
|
|
|
def get_kekule_aligned_r(r,p): |
|
if count_kekule_d(r,p) == 0: |
|
return r |
|
else: |
|
|
|
min_r_s_lis = [] |
|
for r_s in r.split('.'): |
|
|
|
min_count = 1000 |
|
min_r_s = '' |
|
|
|
mol = Chem.MolFromSmiles(r_s) |
|
suppl = Chem.ResonanceMolSupplier(mol, Chem.KEKULE_ALL) |
|
|
|
for i in range(len(suppl)): |
|
r_s = Chem.MolToSmiles(suppl[i],kekuleSmiles = True) |
|
count = count_kekule_d(r_s,p) |
|
if count <= min_count: |
|
min_r_s = r_s |
|
min_count = count |
|
|
|
min_r_s_lis.append(min_r_s) |
|
|
|
return '.'.join(min_r_s_lis) |
|
|
|
|
|
def apply_edits_to_mol_connect(mol, edits): |
|
"""Apply edits to molecular graph. |
|
|
|
Parameters |
|
---------- |
|
mol: Chem.Mol, |
|
RDKit mol object |
|
edits: Iterable[str], |
|
Iterable of edits to apply. An edit is structured as a1:a2:b1:b2, where |
|
a1, a2 are atom maps of participating atoms and b1, b2 are previous and |
|
new bond orders. When a2 = 0, we update the hydrogen count. |
|
""" |
|
new_mol = Chem.RWMol(mol) |
|
amap = {atom.GetAtomMapNum(): atom.GetIdx() for atom in new_mol.GetAtoms()} |
|
|
|
|
|
for edit in edits: |
|
x, y, prev_bo, new_bo = edit.split(":") |
|
x, y = int(x), int(y) |
|
new_bo = float(new_bo) |
|
|
|
|
|
new_mol.AddBond(amap[x],amap[y],BOND_FLOAT_TO_TYPE[new_bo]) |
|
|
|
pred_mol = new_mol.GetMol() |
|
|
|
return pred_mol |
|
|
|
|
|
def get_charge_edit_mine(reac_mol, prod_mol,core_edits): |
|
|
|
lg_site_lis = [] |
|
for core_edit in core_edits: |
|
x,y,bo,n_bo = core_edit.split(':') |
|
if float(bo) - float(n_bo) > 0: |
|
lg_site_lis.append(int(x)) |
|
lg_site_lis.append(int(y)) |
|
lg_site_lis = [i for i in lg_site_lis if i != 0] |
|
|
|
dict_reac_charges = {} |
|
for atom in reac_mol.GetAtoms(): |
|
dict_reac_charges[atom.GetAtomMapNum()] = atom.GetFormalCharge() |
|
|
|
dict_prod_charges = {} |
|
for atom in prod_mol.GetAtoms(): |
|
dict_prod_charges[atom.GetAtomMapNum()] = atom.GetFormalCharge() |
|
|
|
charge_edits = [] |
|
for atom_map, charge in dict_prod_charges.items(): |
|
if atom_map in dict_reac_charges.keys(): |
|
if dict_reac_charges[atom_map] != charge and atom_map not in lg_site_lis: |
|
edit = f"{atom_map}:{0}:{0}:{dict_reac_charges[atom_map]}" |
|
charge_edits.append(edit) |
|
|
|
return charge_edits |
|
|
|
|
|
|
|
|
|
def get_atom_map_charge_dic(mol): |
|
dic = {} |
|
for atom in mol.GetAtoms(): |
|
dic[atom.GetAtomMapNum()] = atom.GetFormalCharge() |
|
return dic |
|
|
|
|
|
def apply_charge_change(mol,charge_edits): |
|
|
|
amap = {atom.GetAtomMapNum(): atom.GetIdx() for atom in mol.GetAtoms()} |
|
for edit in charge_edits: |
|
x, y, prev_charge, new_charge = edit.split(":") |
|
mol.GetAtomWithIdx(amap[int(x)]).SetFormalCharge(int(new_charge)) |
|
return mol |
|
|
|
|
|
def get_core_edit_mine(reac_mol, prod_mol): |
|
|
|
prod_bonds = get_bond_info(prod_mol) |
|
reac_bonds = get_bond_info(reac_mol) |
|
|
|
rxn_core_break = set() |
|
rxn_core_lack = set() |
|
rxn_core = set() |
|
core_edits = [] |
|
|
|
p_amap_idx = {atom.GetAtomMapNum(): atom.GetIdx() for atom in prod_mol.GetAtoms()} |
|
reac_amap = {atom.GetAtomMapNum(): atom.GetIdx() for atom in reac_mol.GetAtoms()} |
|
|
|
for bond in prod_bonds: |
|
if bond in reac_bonds and prod_bonds[bond][0] != reac_bonds[bond][0]: |
|
a_start, a_end = bond |
|
prod_bo, reac_bo = prod_bonds[bond][0], reac_bonds[bond][0] |
|
|
|
a_start, a_end = sorted([a_start, a_end]) |
|
edit = f"{a_start}:{a_end}:{prod_bo}:{reac_bo}" |
|
core_edits.append(edit) |
|
rxn_core.update([a_start, a_end]) |
|
|
|
if bond not in reac_bonds: |
|
a_start, a_end = bond |
|
reac_bo = 0.0 |
|
prod_bo = prod_bonds[bond][0] |
|
|
|
start, end = sorted([a_start, a_end]) |
|
edit = f"{a_start}:{a_end}:{prod_bo}:{reac_bo}" |
|
core_edits.append(edit) |
|
rxn_core.update([a_start, a_end]) |
|
rxn_core_break.update([a_start, a_end]) |
|
|
|
for bond in reac_bonds: |
|
if bond not in prod_bonds: |
|
amap1, amap2 = bond |
|
rxn_core_lack.update([amap1, amap2]) |
|
if (amap1 in p_amap_idx) and (amap2 in p_amap_idx): |
|
a_start, a_end = sorted([amap1, amap2]) |
|
reac_bo = reac_bonds[bond][0] |
|
edit = f"{a_start}:{a_end}:{0.0}:{reac_bo}" |
|
core_edits.append(edit) |
|
rxn_core.update([a_start, a_end]) |
|
|
|
|
|
if True: |
|
reac_amap = {atom.GetAtomMapNum(): atom.GetIdx() for atom in reac_mol.GetAtoms()} |
|
for atom in prod_mol.GetAtoms(): |
|
amap_num = atom.GetAtomMapNum() |
|
if (amap_num in rxn_core_break) or (amap_num not in rxn_core_lack): |
|
pass |
|
else: |
|
amap_num = atom.GetAtomMapNum() |
|
numHs_prod = atom.GetTotalNumHs() |
|
numHs_reac = reac_mol.GetAtomWithIdx(reac_amap[amap_num]).GetTotalNumHs() |
|
if numHs_prod != numHs_reac: |
|
edit = f"{amap_num}:{0}:{1.0}:{0.0}" |
|
core_edits.append(edit) |
|
rxn_core.add(amap_num) |
|
|
|
|
|
return core_edits |
|
|
|
|
|
|
|
def get_chai_edit_mine(reac_mol, prod_mol): |
|
reac_map_a = {atom.GetIdx(): atom.GetAtomMapNum() for atom in reac_mol.GetAtoms()} |
|
prod_map_a = {atom.GetIdx(): atom.GetAtomMapNum() for atom in prod_mol.GetAtoms()} |
|
|
|
reac_ChiralCenters = [] |
|
for ChiralCenters in Chem.FindMolChiralCenters(reac_mol,includeUnassigned=True): |
|
reac_ChiralCenters.append((reac_map_a[ChiralCenters[0]],ChiralCenters[1])) |
|
|
|
prod_ChiralCenters = [] |
|
for ChiralCenters in Chem.FindMolChiralCenters(prod_mol,includeUnassigned=True): |
|
prod_ChiralCenters.append((prod_map_a[ChiralCenters[0]],ChiralCenters[1])) |
|
|
|
dict_reac_ChiralCenters = dict(reac_ChiralCenters) |
|
dict_prod_ChiralCenters = dict(prod_ChiralCenters) |
|
|
|
|
|
chai_edits = [] |
|
|
|
for amap_num,chiral in dict_prod_ChiralCenters.items(): |
|
if amap_num in dict_reac_ChiralCenters.keys(): |
|
if chiral != dict_reac_ChiralCenters[amap_num]: |
|
edit = f"{amap_num}:{0}:{0}:{dict_reac_ChiralCenters[amap_num]}" |
|
chai_edits.append(edit) |
|
else: |
|
pass |
|
|
|
for amap_num,chiral in dict_reac_ChiralCenters.items(): |
|
if (amap_num not in dict_prod_ChiralCenters.keys()) and (amap_num in prod_map_a.values()) and chiral != '?': |
|
edit = f"{amap_num}:{0}:{0}:{chiral}" |
|
chai_edits.append(edit) |
|
|
|
return chai_edits |
|
|
|
|
|
|
|
|
|
|
|
def get_chai_edit_mine(reac_mol, prod_mol): |
|
reac_map_a = {atom.GetIdx(): atom.GetAtomMapNum() for atom in reac_mol.GetAtoms()} |
|
prod_map_a = {atom.GetIdx(): atom.GetAtomMapNum() for atom in prod_mol.GetAtoms()} |
|
|
|
reac_ChiralCenters = [] |
|
for ChiralCenters in Chem.FindMolChiralCenters(reac_mol,includeUnassigned=True): |
|
reac_ChiralCenters.append((reac_map_a[ChiralCenters[0]],ChiralCenters[1])) |
|
|
|
prod_ChiralCenters = [] |
|
for ChiralCenters in Chem.FindMolChiralCenters(prod_mol,includeUnassigned=True): |
|
prod_ChiralCenters.append((prod_map_a[ChiralCenters[0]],ChiralCenters[1])) |
|
|
|
dict_reac_ChiralCenters = dict(reac_ChiralCenters) |
|
dict_prod_ChiralCenters = dict(prod_ChiralCenters) |
|
|
|
|
|
chai_edits = [] |
|
|
|
for amap_num,chiral in dict_prod_ChiralCenters.items(): |
|
if amap_num in dict_reac_ChiralCenters.keys(): |
|
if chiral != dict_reac_ChiralCenters[amap_num]: |
|
edit = f"{amap_num}:{0}:{0}:{dict_reac_ChiralCenters[amap_num]}" |
|
chai_edits.append(edit) |
|
else: |
|
pass |
|
|
|
for amap_num,chiral in dict_reac_ChiralCenters.items(): |
|
if (amap_num not in dict_prod_ChiralCenters.keys())and (amap_num in prod_map_a.values()): |
|
edit = f"{amap_num}:{0}:{0}:{chiral}" |
|
|
|
chai_edits.append(edit) |
|
|
|
return chai_edits |
|
|
|
|
|
|
|
def get_chai_edit_mine(reac_mol, prod_mol): |
|
reac_map_a = {atom.GetIdx(): atom.GetAtomMapNum() for atom in reac_mol.GetAtoms()} |
|
prod_map_a = {atom.GetIdx(): atom.GetAtomMapNum() for atom in prod_mol.GetAtoms()} |
|
|
|
reac_ChiralCenters = [] |
|
for ChiralCenters in Chem.FindMolChiralCenters(reac_mol,includeUnassigned=True): |
|
reac_ChiralCenters.append((reac_map_a[ChiralCenters[0]],ChiralCenters[1])) |
|
|
|
prod_ChiralCenters = [] |
|
for ChiralCenters in Chem.FindMolChiralCenters(prod_mol,includeUnassigned=True): |
|
prod_ChiralCenters.append((prod_map_a[ChiralCenters[0]],ChiralCenters[1])) |
|
|
|
dict_reac_ChiralCenters = dict(reac_ChiralCenters) |
|
dict_prod_ChiralCenters = dict(prod_ChiralCenters) |
|
|
|
|
|
chai_edits = [] |
|
|
|
for amap_num,chiral in dict_prod_ChiralCenters.items(): |
|
if amap_num in dict_reac_ChiralCenters.keys(): |
|
if chiral != dict_reac_ChiralCenters[amap_num]: |
|
edit = f"{amap_num}:{0}:{0}:{dict_reac_ChiralCenters[amap_num]}" |
|
chai_edits.append(edit) |
|
else: |
|
pass |
|
|
|
for amap_num,chiral in dict_reac_ChiralCenters.items(): |
|
if (amap_num not in dict_prod_ChiralCenters.keys()) and (amap_num in prod_map_a.values()) and chiral != '?': |
|
edit = f"{amap_num}:{0}:{0}:{chiral}" |
|
chai_edits.append(edit) |
|
|
|
return chai_edits |
|
|
|
|
|
|
|
|
|
def get_lg_map_lis(frag_mols,reac_mols,core_edits,prod_mol): |
|
|
|
lg_map_lis = [] |
|
prod_map_num_lis = [i.GetAtomMapNum() for i in prod_mol.GetAtoms()] |
|
|
|
for frag_mols_1,reac_mols_1 in zip(frag_mols[:],reac_mols[:]): |
|
reac_edits = find_reac_edit(frag_mols_1,reac_mols_1,core_edits) |
|
|
|
|
|
reac_edits_a = [] |
|
reac_edits_b = [] |
|
for reac_edit in reac_edits: |
|
if reac_edit[:3] == '0:0': |
|
reac_edits_a.append(reac_edit) |
|
elif reac_edit[-7:] == '0.0:0.0': |
|
reac_edits_a.append(reac_edit) |
|
elif reac_edit[-10:] == '0:0.0:-1.0': |
|
reac_edits_a.append(reac_edit) |
|
elif reac_edit[-9:] == '0:0.0:1.0': |
|
reac_edits_a.append(reac_edit) |
|
else: |
|
reac_edits_b.append(reac_edit) |
|
|
|
|
|
for reac_edit in reac_edits_a: |
|
if reac_edit[:3] == '0:0': |
|
pass |
|
elif reac_edit[-7:] == '0.0:0.0': |
|
pass |
|
elif reac_edit[-10:] == '0:0.0:-1.0': |
|
edit_map_num_lis = reac_edit.split(':')[:2] |
|
attach_map_num_1 = [int(i) for i in edit_map_num_lis if int(i) in prod_map_num_lis] |
|
lg_smiles = '-1.0' |
|
lg_map_lis.append((lg_smiles,attach_map_num_1)) |
|
elif reac_edit[-9:] == '0:0.0:1.0': |
|
edit_map_num_lis = reac_edit.split(':')[:2] |
|
attach_map_num_1 = [int(i) for i in edit_map_num_lis if int(i) in prod_map_num_lis] |
|
lg_smiles = '1.0' |
|
lg_map_lis.append((lg_smiles,attach_map_num_1)) |
|
|
|
|
|
frag_1_map_num_lis = [i.GetAtomMapNum() for i in frag_mols_1.GetAtoms() if i.GetAtomMapNum() != 0] |
|
reac_frag_mol = apply_edits_to_mol_break(reac_mols_1 , reac_edits_b) |
|
reac_frag_mols = Chem.GetMolFrags(reac_frag_mol,asMols=True,sanitizeFrags = False) |
|
|
|
|
|
reac_edit_added = [] |
|
for reac_frag_mol in reac_frag_mols[:]: |
|
|
|
reac_frag_map_num_lis = [i.GetAtomMapNum() for i in reac_frag_mol.GetAtoms() if i.GetAtomMapNum() != 0] |
|
|
|
if set(reac_frag_map_num_lis) == set(frag_1_map_num_lis): |
|
pass |
|
else: |
|
attach_map_num_1 = [] |
|
for reac_edit in reac_edits: |
|
if reac_edit in reac_edit_added: |
|
continue |
|
else: |
|
pass |
|
|
|
|
|
b,e = int(reac_edit.split(':')[0]),int(reac_edit.split(':')[1]) |
|
if e in reac_frag_map_num_lis and b in frag_1_map_num_lis: |
|
|
|
for atom in reac_frag_mol.GetAtoms(): |
|
if atom.GetAtomMapNum() == int(e): |
|
atom.SetAtomMapNum(500+atom.GetAtomMapNum()) |
|
break |
|
else: |
|
pass |
|
reac_edit_added.append(reac_edit) |
|
|
|
|
|
|
|
if len(attach_map_num_1) == 1: |
|
|
|
if [str(attach_map_num_1[0]),str(atom.GetAtomMapNum()-500)] in [i.split(':')[:2] for i in reac_edits ]: |
|
if atom.GetAtomMapNum() == max([i.GetAtomMapNum() for i in reac_frag_mol.GetAtoms()]): |
|
attach_map_num_1 = [b] + attach_map_num_1 |
|
else: |
|
attach_map_num_1.append(b) |
|
else: |
|
if atom.GetAtomMapNum() == max([i.GetAtomMapNum() for i in reac_frag_mol.GetAtoms()]): |
|
attach_map_num_1.append(b) |
|
else: |
|
attach_map_num_1 = [b] + attach_map_num_1 |
|
elif len(attach_map_num_1) == 0: |
|
attach_map_num_1.append(b) |
|
|
|
|
|
else: |
|
pass |
|
|
|
if reac_frag_mol.GetAtomWithIdx(0).GetAtomicNum() == 1 and len(attach_map_num_1) == 1: |
|
break |
|
|
|
|
|
lg_smiles = Chem.MolToSmiles(reac_frag_mol,kekuleSmiles = True) |
|
lg = Chem.MolFromSmiles(lg_smiles) |
|
Chem.Kekulize(lg) |
|
for atom in lg.GetAtoms(): |
|
if atom.GetAtomMapNum() >= 500: |
|
atom.SetAtomMapNum(1) |
|
pass |
|
else: |
|
atom.SetAtomMapNum(0) |
|
lg_smiles = Chem.MolToSmiles(lg,canonical = False,kekuleSmiles = True) |
|
|
|
if attach_map_num_1 != []: |
|
lg_map_lis.append((lg_smiles,attach_map_num_1)) |
|
|
|
return lg_map_lis |
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_core_edit_mine(reac_mol, prod_mol): |
|
|
|
prod_bonds = get_bond_info(prod_mol) |
|
reac_bonds = get_bond_info(reac_mol) |
|
|
|
rxn_core_break = set() |
|
rxn_core_lack = set() |
|
rxn_core = set() |
|
core_edits = [] |
|
|
|
p_amap_idx = {atom.GetAtomMapNum(): atom.GetIdx() for atom in prod_mol.GetAtoms()} |
|
reac_amap = {atom.GetAtomMapNum(): atom.GetIdx() for atom in reac_mol.GetAtoms()} |
|
|
|
for bond in prod_bonds: |
|
if bond in reac_bonds and prod_bonds[bond][0] != reac_bonds[bond][0]: |
|
a_start, a_end = bond |
|
prod_bo, reac_bo = prod_bonds[bond][0], reac_bonds[bond][0] |
|
|
|
a_start, a_end = sorted([a_start, a_end]) |
|
edit = f"{a_start}:{a_end}:{prod_bo}:{reac_bo}" |
|
core_edits.append(edit) |
|
rxn_core.update([a_start, a_end]) |
|
|
|
if bond not in reac_bonds: |
|
a_start, a_end = bond |
|
reac_bo = 0.0 |
|
prod_bo = prod_bonds[bond][0] |
|
|
|
start, end = sorted([a_start, a_end]) |
|
edit = f"{a_start}:{a_end}:{prod_bo}:{reac_bo}" |
|
core_edits.append(edit) |
|
rxn_core.update([a_start, a_end]) |
|
rxn_core_break.update([a_start, a_end]) |
|
|
|
for bond in reac_bonds: |
|
if bond not in prod_bonds: |
|
amap1, amap2 = bond |
|
rxn_core_lack.update([amap1, amap2]) |
|
if (amap1 in p_amap_idx) and (amap2 in p_amap_idx): |
|
a_start, a_end = sorted([amap1, amap2]) |
|
reac_bo = reac_bonds[bond][0] |
|
edit = f"{a_start}:{a_end}:{0.0}:{reac_bo}" |
|
core_edits.append(edit) |
|
rxn_core.update([a_start, a_end]) |
|
|
|
|
|
|
|
reac_amap = {atom.GetAtomMapNum(): atom.GetIdx() for atom in reac_mol.GetAtoms()} |
|
|
|
for atom in prod_mol.GetAtoms(): |
|
amap_num = atom.GetAtomMapNum() |
|
if (amap_num in rxn_core_break) or (amap_num not in rxn_core_lack): |
|
pass |
|
else: |
|
amap_num = atom.GetAtomMapNum() |
|
numHs_prod = atom.GetTotalNumHs() |
|
numHs_reac = reac_mol.GetAtomWithIdx(reac_amap[amap_num]).GetTotalNumHs() |
|
if numHs_prod != numHs_reac: |
|
edit = f"{amap_num}:{0}:{1.0}:{0.0}" |
|
core_edits.append(edit) |
|
rxn_core.add(amap_num) |
|
|
|
|
|
for atom in prod_mol.GetAtoms(): |
|
amap_num = atom.GetAtomMapNum() |
|
if amap_num in rxn_core: |
|
pass |
|
else: |
|
amap_num = atom.GetAtomMapNum() |
|
Degree_prod = atom.GetDegree() |
|
Degree_reac = reac_mol.GetAtomWithIdx(reac_amap[amap_num]).GetDegree() |
|
|
|
if Degree_prod - Degree_reac == -1: |
|
edit = f"{amap_num}:{0}:{1.0}:{0.0}" |
|
core_edits.append(edit) |
|
rxn_core.add(amap_num) |
|
|
|
|
|
|
|
|
|
|
|
return core_edits |
|
|
|
|
|
|
|
def find_reac_edit(frag_mols_1,reac_mols_1,core_edits): |
|
reac_mol_map_num = [i.GetAtomMapNum() for i in reac_mols_1.GetAtoms()] |
|
frag_mol_map_num = [i.GetAtomMapNum() for i in frag_mols_1.GetAtoms()] |
|
lg_map_num = [i for i in reac_mol_map_num if i not in frag_mol_map_num] |
|
attach_map_num = 0 |
|
|
|
reac_edit = [] |
|
|
|
core_edits = core_edits + [':'.join([i.split(':')[1],i.split(':')[0],i.split(':')[2],i.split(':')[3]]) for i in core_edits] |
|
|
|
|
|
for core_edit in core_edits: |
|
core_edit_ = core_edit.split(':') |
|
|
|
if float(core_edit_[3]) == 0 and int(core_edit_[0]) in frag_mol_map_num: |
|
attach_map_num = int(core_edit_[0]) |
|
elif float(core_edit_[2]) - float(core_edit_[3]) > 0 and int(core_edit_[0]) in frag_mol_map_num: |
|
attach_map_num = int(core_edit_[0]) |
|
|
|
|
|
else: |
|
continue |
|
|
|
if str(attach_map_num) != '0' and str(attach_map_num) != core_edit_[0]: |
|
continue |
|
|
|
|
|
frag_mols_1_amap = {atom.GetAtomMapNum(): atom.GetIdx() for atom in frag_mols_1.GetAtoms()} |
|
reac_mols_1_amap = {atom.GetAtomMapNum(): atom.GetIdx() for atom in reac_mols_1.GetAtoms()} |
|
|
|
frag_attach_H = frag_mols_1.GetAtomWithIdx(frag_mols_1_amap[attach_map_num]).GetNumExplicitHs() |
|
reac_attach_H = reac_mols_1.GetAtomWithIdx(reac_mols_1_amap[attach_map_num]).GetNumExplicitHs() |
|
|
|
frag_attach_charge = frag_mols_1.GetAtomWithIdx(frag_mols_1_amap[attach_map_num]).GetFormalCharge() |
|
reac_attach_charge = reac_mols_1.GetAtomWithIdx(reac_mols_1_amap[attach_map_num]).GetFormalCharge() |
|
|
|
|
|
if lg_map_num != []: |
|
for bond in reac_mols_1.GetBonds(): |
|
EndMapNum = bond.GetEndAtom().GetAtomMapNum() |
|
BeginMapNum = bond.GetBeginAtom().GetAtomMapNum() |
|
if (BeginMapNum == attach_map_num) and (EndMapNum in lg_map_num): |
|
reac_edit.append("{}:{}:{}:{}".format(BeginMapNum,EndMapNum,bond.GetBondTypeAsDouble(),0.0)) |
|
elif (EndMapNum == attach_map_num) and (BeginMapNum in lg_map_num): |
|
reac_edit.append("{}:{}:{}:{}".format(EndMapNum,BeginMapNum,bond.GetBondTypeAsDouble(),0.0)) |
|
|
|
|
|
|
|
elif lg_map_num == []: |
|
|
|
|
|
if Chem.MolToSmiles(reac_mols_1) == Chem.MolToSmiles(frag_mols_1): |
|
reac_edit.append("{}:{}:{}:{}".format(attach_map_num,0,0.0,0.0)) |
|
if (reac_attach_H - frag_attach_H) == 1 and (reac_attach_charge - frag_attach_charge) == 0: |
|
reac_edit.append("{}:{}:{}:{}".format(attach_map_num,0,1.0,0.0)) |
|
if (reac_attach_H - frag_attach_H) == 2 and (reac_attach_charge - frag_attach_charge) == 0: |
|
reac_edit.append("{}:{}:{}:{}".format(attach_map_num,0,2.0,0.0)) |
|
|
|
if (reac_attach_charge - frag_attach_charge) == -1: |
|
if "{}:{}:{}:{}".format(attach_map_num,0,0.0,-1.0) not in reac_edit: |
|
reac_edit.append("{}:{}:{}:{}".format(attach_map_num,0,0.0,-1.0)) |
|
|
|
if (reac_attach_charge - frag_attach_charge) == 1: |
|
if "{}:{}:{}:{}".format(attach_map_num,0,0.0,1.0) not in reac_edit: |
|
reac_edit.append("{}:{}:{}:{}".format(attach_map_num,0,0.0,1.0)) |
|
|
|
|
|
if (reac_attach_charge - frag_attach_charge) == 2: |
|
if "{}:{}:{}:{}".format(attach_map_num,0,0.0,2.0) not in reac_edit: |
|
reac_edit.append("{}:{}:{}:{}".format(attach_map_num,0,0.0,2.0)) |
|
|
|
|
|
|
|
return reac_edit |
|
|
|
|
|
|
|
|
|
def get_lg_map_lis(frag_mols,reac_mols,core_edits,prod_mol): |
|
|
|
lg_map_lis = [] |
|
prod_map_num_lis = [i.GetAtomMapNum() for i in prod_mol.GetAtoms()] |
|
|
|
for frag_mols_1,reac_mols_1 in zip(frag_mols[:],reac_mols[:]): |
|
reac_edits = find_reac_edit(frag_mols_1,reac_mols_1,core_edits) |
|
|
|
|
|
reac_edits_a = [] |
|
reac_edits_b = [] |
|
for reac_edit in reac_edits: |
|
if reac_edit[:3] == '0:0': |
|
reac_edits_a.append(reac_edit) |
|
elif reac_edit[-7:] == '0.0:0.0': |
|
reac_edits_a.append(reac_edit) |
|
elif reac_edit[-10:] == '0:0.0:-1.0': |
|
reac_edits_a.append(reac_edit) |
|
elif reac_edit[-9:] == '0:0.0:1.0': |
|
reac_edits_a.append(reac_edit) |
|
|
|
elif reac_edit[-9:] == '0:0.0:2.0': |
|
reac_edits_a.append(reac_edit) |
|
|
|
else: |
|
reac_edits_b.append(reac_edit) |
|
|
|
|
|
for reac_edit in reac_edits_a: |
|
if reac_edit[:3] == '0:0': |
|
pass |
|
elif reac_edit[-7:] == '0.0:0.0': |
|
pass |
|
elif reac_edit[-10:] == '0:0.0:-1.0': |
|
edit_map_num_lis = reac_edit.split(':')[:2] |
|
attach_map_num_1 = [int(i) for i in edit_map_num_lis if int(i) in prod_map_num_lis] |
|
lg_smiles = '-1' |
|
lg_map_lis.append((lg_smiles,attach_map_num_1)) |
|
elif reac_edit[-9:] == '0:0.0:1.0': |
|
edit_map_num_lis = reac_edit.split(':')[:2] |
|
attach_map_num_1 = [int(i) for i in edit_map_num_lis if int(i) in prod_map_num_lis] |
|
lg_smiles = '1' |
|
lg_map_lis.append((lg_smiles,attach_map_num_1)) |
|
|
|
elif reac_edit[-9:] == '0:0.0:2.0': |
|
edit_map_num_lis = reac_edit.split(':')[:2] |
|
attach_map_num_1 = [int(i) for i in edit_map_num_lis if int(i) in prod_map_num_lis] |
|
lg_smiles = '2' |
|
lg_map_lis.append((lg_smiles,attach_map_num_1)) |
|
|
|
|
|
frag_1_map_num_lis = [i.GetAtomMapNum() for i in frag_mols_1.GetAtoms() if i.GetAtomMapNum() != 0] |
|
reac_frag_mol = apply_edits_to_mol_break(reac_mols_1 , reac_edits_b) |
|
reac_frag_mols = Chem.GetMolFrags(reac_frag_mol,asMols=True,sanitizeFrags = False) |
|
|
|
|
|
reac_edit_added = [] |
|
for reac_frag_mol in reac_frag_mols[:]: |
|
|
|
reac_frag_map_num_lis = [i.GetAtomMapNum() for i in reac_frag_mol.GetAtoms() if i.GetAtomMapNum() != 0] |
|
|
|
if set(reac_frag_map_num_lis) == set(frag_1_map_num_lis): |
|
pass |
|
else: |
|
attach_map_num_1 = [] |
|
for reac_edit in reac_edits: |
|
if reac_edit in reac_edit_added: |
|
continue |
|
else: |
|
pass |
|
|
|
|
|
b,e = int(reac_edit.split(':')[0]),int(reac_edit.split(':')[1]) |
|
if e in reac_frag_map_num_lis and b in frag_1_map_num_lis: |
|
|
|
for atom in reac_frag_mol.GetAtoms(): |
|
if atom.GetAtomMapNum() == int(e): |
|
atom.SetAtomMapNum(500+atom.GetAtomMapNum()) |
|
break |
|
else: |
|
pass |
|
reac_edit_added.append(reac_edit) |
|
|
|
|
|
|
|
if len(attach_map_num_1) == 1: |
|
|
|
if [str(attach_map_num_1[0]),str(atom.GetAtomMapNum()-500)] in [i.split(':')[:2] for i in reac_edits ]: |
|
if atom.GetAtomMapNum() == max([i.GetAtomMapNum() for i in reac_frag_mol.GetAtoms()]): |
|
attach_map_num_1 = [b] + attach_map_num_1 |
|
else: |
|
attach_map_num_1.append(b) |
|
else: |
|
if atom.GetAtomMapNum() == max([i.GetAtomMapNum() for i in reac_frag_mol.GetAtoms()]): |
|
attach_map_num_1.append(b) |
|
else: |
|
attach_map_num_1 = [b] + attach_map_num_1 |
|
elif len(attach_map_num_1) == 0: |
|
attach_map_num_1.append(b) |
|
|
|
|
|
else: |
|
pass |
|
|
|
if reac_frag_mol.GetAtomWithIdx(0).GetAtomicNum() == 1 and len(attach_map_num_1) == 1: |
|
break |
|
|
|
|
|
lg_smiles = Chem.MolToSmiles(reac_frag_mol,kekuleSmiles = True) |
|
lg = Chem.MolFromSmiles(lg_smiles) |
|
Chem.Kekulize(lg) |
|
for atom in lg.GetAtoms(): |
|
if atom.GetAtomMapNum() >= 500: |
|
atom.SetAtomMapNum(1) |
|
pass |
|
else: |
|
atom.SetAtomMapNum(0) |
|
lg_smiles = Chem.MolToSmiles(lg,canonical = False,kekuleSmiles = True) |
|
|
|
if attach_map_num_1 != []: |
|
lg_map_lis.append((lg_smiles,attach_map_num_1)) |
|
|
|
return lg_map_lis |
|
|
|
|
|
|
|
def get_chai_edit_mine(reac_mol, prod_mol): |
|
reac_map_a = {atom.GetIdx(): atom.GetAtomMapNum() for atom in reac_mol.GetAtoms()} |
|
prod_map_a = {atom.GetIdx(): atom.GetAtomMapNum() for atom in prod_mol.GetAtoms()} |
|
|
|
reac_mol_= copy.deepcopy(reac_mol) |
|
prod_mol_= copy.deepcopy(prod_mol) |
|
|
|
for atom in reac_mol_.GetAtoms(): |
|
atom.SetAtomMapNum(0) |
|
|
|
for atom in prod_mol_.GetAtoms(): |
|
atom.SetAtomMapNum(0) |
|
|
|
|
|
reac_ChiralCenters = [] |
|
for ChiralCenters in Chem.FindMolChiralCenters(Chem.MolFromMolBlock(Chem.MolToMolBlock(reac_mol_)),includeUnassigned=True): |
|
reac_ChiralCenters.append((reac_map_a[ChiralCenters[0]],ChiralCenters[1])) |
|
|
|
prod_ChiralCenters = [] |
|
for ChiralCenters in Chem.FindMolChiralCenters(Chem.MolFromMolBlock(Chem.MolToMolBlock(prod_mol_)),includeUnassigned=True): |
|
prod_ChiralCenters.append((prod_map_a[ChiralCenters[0]],ChiralCenters[1])) |
|
|
|
dict_reac_ChiralCenters = dict(reac_ChiralCenters) |
|
dict_prod_ChiralCenters = dict(prod_ChiralCenters) |
|
|
|
|
|
chai_edits = [] |
|
|
|
for amap_num,chiral in dict_prod_ChiralCenters.items(): |
|
if amap_num in dict_reac_ChiralCenters.keys(): |
|
if chiral != dict_reac_ChiralCenters[amap_num]: |
|
edit = f"{amap_num}:{0}:{0}:{dict_reac_ChiralCenters[amap_num]}" |
|
chai_edits.append(edit) |
|
else: |
|
pass |
|
|
|
for amap_num,chiral in dict_reac_ChiralCenters.items(): |
|
if (amap_num not in dict_prod_ChiralCenters.keys()) and (amap_num in prod_map_a.values()) and chiral != '?': |
|
edit = f"{amap_num}:{0}:{0}:{chiral}" |
|
chai_edits.append(edit) |
|
|
|
return chai_edits |
|
|
|
|
|
|
|
def get_original_chair_edit(p,b): |
|
b = copy.deepcopy(b) |
|
for atom in b.GetAtoms(): |
|
atom.SetAtomMapNum(0) |
|
b_dic = dict(Chem.FindMolChiralCenters(Chem.MolFromMolBlock(Chem.MolToMolBlock(b)),includeUnassigned=True)) |
|
|
|
temp_p = Chem.MolFromSmiles(p) |
|
for atom in temp_p.GetAtoms(): |
|
atom.SetAtomMapNum(0) |
|
temp_dic = dict(Chem.FindMolChiralCenters(Chem.MolFromMolBlock(Chem.MolToMolBlock(temp_p)),includeUnassigned=True)) |
|
out = [] |
|
for i,j in temp_dic.items(): |
|
if i in b_dic: |
|
out.append('{}:0:0:{}'.format(i+1,j)) |
|
return out |
|
|
|
|
|
|
|
|
|
def apply_chirality_change(prod_mol,chai_edits): |
|
p_amap_idx = {atom.GetAtomMapNum(): atom.GetIdx() for atom in prod_mol.GetAtoms()} |
|
prod_mol = copy.deepcopy(prod_mol) |
|
for chai_edit in chai_edits: |
|
amap = int(chai_edit.split(':')[0]) |
|
if chai_edit[-2:] == ':R': |
|
atom = prod_mol.GetAtomWithIdx(p_amap_idx[amap]) |
|
atom.SetChiralTag(Chem.ChiralType.CHI_TETRAHEDRAL_CCW) |
|
temp_mol_dic = get_chair_dict_without_atom_map(prod_mol) |
|
if temp_mol_dic[atom.GetIdx()] == 'R': |
|
pass |
|
else: |
|
atom.SetChiralTag(Chem.ChiralType.CHI_TETRAHEDRAL_CW) |
|
|
|
elif chai_edit[-2:] == ':S': |
|
atom = prod_mol.GetAtomWithIdx(p_amap_idx[amap]) |
|
atom.SetChiralTag(Chem.ChiralType.CHI_TETRAHEDRAL_CCW) |
|
temp_mol_dic = get_chair_dict_without_atom_map(prod_mol) |
|
if temp_mol_dic[atom.GetIdx()] == 'S': |
|
pass |
|
else: |
|
atom.SetChiralTag(Chem.ChiralType.CHI_TETRAHEDRAL_CW) |
|
temp_mol_dic = dict(Chem.FindMolChiralCenters(Chem.MolFromMolBlock(Chem.MolToMolBlock(prod_mol)),includeUnassigned=True)) |
|
|
|
|
|
elif chai_edit[-2:] == ':?': |
|
atom = prod_mol.GetAtomWithIdx(p_amap_idx[amap]) |
|
atom.SetChiralTag(Chem.ChiralType.CHI_UNSPECIFIED) |
|
|
|
return prod_mol |
|
|
|
|
|
|
|
|
|
def get_chair_dict_without_atom_map(temp_p): |
|
temp_p = copy.deepcopy(temp_p) |
|
for atom in temp_p.GetAtoms(): |
|
atom.SetAtomMapNum(0) |
|
temp_dic = dict(Chem.FindMolChiralCenters(Chem.MolFromMolBlock(Chem.MolToMolBlock(temp_p)),includeUnassigned=True)) |
|
return temp_dic |
|
|
|
|
|
|
|
def run_get_p_b_l(rxn_smi): |
|
try: |
|
r, p = rxn_smi.split(">>") |
|
|
|
if Chem.MolFromSmiles(p).GetNumAtoms() >= 150 or Chem.MolFromSmiles(r).GetNumAtoms() >= 150: |
|
print('error type 3') |
|
return 'error type 3' |
|
else: |
|
pass |
|
|
|
|
|
r,p = cano_smiles_map(r),cano_smiles_map(p) |
|
|
|
reac_mol, prod_mol = align_kekule_pairs(r, p) |
|
reac_mol = Chem.MolFromSmiles(Chem.MolToSmiles(reac_mol,kekuleSmiles = True),sanitize = False) |
|
|
|
|
|
reac_smiles_temp = Chem.MolToSmiles(reac_mol,kekuleSmiles = True) |
|
reac_mol_temp = Chem.MolFromSmiles(reac_smiles_temp) |
|
|
|
if reac_mol_temp != None and Chem.MolToSmiles(reac_mol_temp) == Chem.MolToSmiles(Chem.MolFromSmiles(r)): |
|
pass |
|
else: |
|
r_k = get_kekule_aligned_r(r,p) |
|
if count_kekule_d(r_k,p) == 0: |
|
reac_mol, prod_mol = Chem.MolFromSmiles(r_k),Chem.MolFromSmiles(p) |
|
Chem.Kekulize(reac_mol) |
|
Chem.Kekulize(prod_mol) |
|
else: |
|
reac_mol, prod_mol = Chem.MolFromSmiles(r_k),Chem.MolFromSmiles(p) |
|
Chem.Kekulize(reac_mol) |
|
Chem.Kekulize(prod_mol) |
|
|
|
|
|
|
|
core_edits_add = [i for i in core_edits if (float(i.split(':')[2]) == 0) and (float(i.split(':')[1]) != 0)] |
|
|
|
core_edits = [i for i in core_edits if i not in core_edits_add] |
|
|
|
|
|
edit_c = [i for i in core_edits if (float(i.split(':')[-1]) > 0)] |
|
edit_b = [i for i in core_edits if (float(i.split(':')[-1]) == 0)] |
|
|
|
chai_edits = get_chai_edit_mine(Chem.MolFromSmiles(r), Chem.MolFromSmiles(p)) |
|
|
|
stereo_edits = get_stereo_edit_mine(Chem.MolFromSmiles(r), Chem.MolFromSmiles(p)) |
|
|
|
|
|
|
|
charge_edits = get_charge_edit_mine(reac_mol, prod_mol,core_edits) |
|
|
|
|
|
o_p_Chiral_dic = get_atom_map_chai_dic(Chem.MolFromSmiles(p)) |
|
o_p_Stereo_dic = get_atom_map_stereo_dic(Chem.MolFromSmiles(p)) |
|
|
|
|
|
frag_mol = apply_edits_to_mol_break(prod_mol,edit_b) |
|
frag_mol = apply_edits_to_mol_change(frag_mol,edit_c) |
|
|
|
frag_mol = apply_edits_to_mol_connect(frag_mol, core_edits_add) |
|
frag_mol = remove_s_H(frag_mol) |
|
|
|
|
|
reac_mols = Chem.GetMolFrags(reac_mol,asMols=True,sanitizeFrags = False) |
|
frag_mols = Chem.GetMolFrags(frag_mol,asMols=True,sanitizeFrags = False) |
|
|
|
if len(reac_mols) != len(frag_mols): |
|
frag_mols = [frag_mol for frag_mol in frag_mols if Chem.MolToSmiles(frag_mol) != '[H]'] |
|
else: |
|
pass |
|
|
|
if len(reac_mols) != len(frag_mols): |
|
frag_mols = [frag_mol] |
|
else: |
|
pass |
|
|
|
|
|
if len(reac_mols) == len(frag_mols): |
|
reac_mols, frag_mols = map_reac_and_frag(reac_mols,frag_mols) |
|
else: |
|
print('error type 0') |
|
|
|
|
|
lg_map_lis_temp = get_lg_map_lis(frag_mols[:],reac_mols[:],core_edits,prod_mol) |
|
|
|
lg_map_lis = [] |
|
for lg, map_ in lg_map_lis_temp: |
|
lg, map_ = copy.deepcopy(lg),copy.deepcopy(map_) |
|
map_new = [] |
|
if lg.count(':') > 1: |
|
lg = Chem.MolFromSmiles(lg) |
|
Chem.Kekulize(lg) |
|
for atom in lg.GetAtoms(): |
|
if atom.GetAtomMapNum() == 0: |
|
map_new.append('*') |
|
else: |
|
map_new.append(map_.pop(0)) |
|
|
|
lg_smiles = Chem.MolToSmiles(lg,kekuleSmiles = True) |
|
rank = list(Chem.CanonicalRankAtoms(lg, breakTies=False)) |
|
|
|
|
|
map_new = sorted(map_new, key=lambda x: rank[map_new.index(x)]) |
|
map_new = [i for i in map_new if i != '*'] |
|
|
|
lg_map_lis.append((lg_smiles,map_new)) |
|
else: |
|
lg_map_lis.append((lg, map_ )) |
|
|
|
|
|
total_mol = frag_mol |
|
|
|
|
|
|
|
for lg_smile,map_nums in lg_map_lis[:]: |
|
|
|
if lg_smile not in ['-1.0','1.0','2.0']: |
|
|
|
lg = Chem.MolFromSmiles(lg_smile) |
|
|
|
total_mol_map_num_lis = [i.GetAtomMapNum() for i in total_mol.GetAtoms()] |
|
max_map = max(total_mol_map_num_lis) |
|
count = 1 |
|
for atom in lg.GetAtoms(): |
|
if atom.GetAtomMapNum() == 1: |
|
atom.SetAtomMapNum(max_map + count) |
|
count += 1 |
|
else: |
|
pass |
|
|
|
total_mol_map_num_lis = [i.GetAtomMapNum() for i in total_mol.GetAtoms()] |
|
max_map = max(total_mol_map_num_lis) |
|
|
|
for atom in lg.GetAtoms(): |
|
if atom.GetAtomMapNum() == 0: |
|
atom.SetAtomMapNum(max_map + count) |
|
count += 1 |
|
else: |
|
pass |
|
|
|
total_mol = Chem.CombineMols(total_mol,lg) |
|
|
|
amap = {atom.GetAtomMapNum(): atom.GetIdx() for atom in total_mol.GetAtoms()} |
|
new_mol = Chem.RWMol(total_mol) |
|
|
|
is_multi_bond = 0 |
|
|
|
for idx in range(len(map_nums)): |
|
map_num = map_nums[idx] |
|
if lg_smile.count(':') == len(map_nums): |
|
lg_map = max_map + 1 + idx |
|
atom = total_mol.GetAtomWithIdx(amap[lg_map]) |
|
is_multi_bond = 0 |
|
else: |
|
lg_map = max_map + 1 |
|
atom = total_mol.GetAtomWithIdx(amap[lg_map]) |
|
is_multi_bond= 1 |
|
|
|
|
|
|
|
|
|
if atom.GetSymbol() == 'O' and atom.GetTotalValence() == 0 and atom.GetFormalCharge() == 0 and is_multi_bond == 0: |
|
bond_float = 2.0 |
|
elif atom.GetSymbol() == 'S' and atom.GetTotalValence() in [0,2,4] and atom.GetFormalCharge() == 0 and is_multi_bond == 0: |
|
bond_float = 2.0 |
|
elif atom.GetSymbol() == 'S' and atom.GetTotalValence() ==1 and atom.GetFormalCharge() == 1 and is_multi_bond == 0: |
|
bond_float = 2.0 |
|
elif atom.GetSymbol() == 'P' and atom.GetTotalValence() == 3 and atom.GetFormalCharge() == 0 and is_multi_bond == 0: |
|
bond_float = 2.0 |
|
elif atom.GetSymbol() == 'C' and atom.GetTotalValence() == 2 and atom.GetFormalCharge() == 0 and is_multi_bond == 0: |
|
bond_float = 2.0 |
|
elif atom.GetSymbol() == 'N' and atom.GetTotalValence() == 2 and atom.GetFormalCharge() == 1 and is_multi_bond == 0: |
|
bond_float = 2.0 |
|
elif atom.GetSymbol() == 'N' and atom.GetTotalValence() == 1 and atom.GetFormalCharge() == 0 and is_multi_bond == 0: |
|
bond_float = 2.0 |
|
elif atom.GetSymbol() == 'N' and atom.GetTotalValence() == 0 and atom.GetFormalCharge() == -1 and is_multi_bond == 0: |
|
bond_float = 2.0 |
|
elif atom.GetSymbol() == 'Se' and atom.GetTotalValence() == 2 and atom.GetFormalCharge() == 0 and is_multi_bond == 0: |
|
bond_float = 2.0 |
|
elif atom.GetSymbol() == 'Si' and atom.GetTotalValence() == 2 and atom.GetFormalCharge() == 0 and is_multi_bond == 0: |
|
bond_float = 2.0 |
|
elif atom.GetSymbol() == 'Mn' and atom.GetTotalValence() == 5 and atom.GetFormalCharge() == 0 and is_multi_bond == 0: |
|
bond_float = 2.0 |
|
elif atom.GetSymbol() == 'Cr' and atom.GetTotalValence() == 4 and atom.GetFormalCharge() == 0 and is_multi_bond == 0: |
|
bond_float = 2.0 |
|
elif atom.GetSymbol() == 'O' and atom.GetTotalValence() == 1 and atom.GetFormalCharge() == 1 and is_multi_bond == 0: |
|
bond_float = 2.0 |
|
|
|
|
|
elif atom.GetSymbol() == 'N' and atom.GetTotalValence() == 0 and atom.GetFormalCharge() == 0 and is_multi_bond == 0: |
|
bond_float = 3.0 |
|
elif atom.GetSymbol() == 'C' and atom.GetTotalValence() == 1 and atom.GetFormalCharge() == 0 and is_multi_bond == 0: |
|
bond_float = 3.0 |
|
elif atom.GetSymbol() == 'C' and atom.GetTotalValence() == 0 and atom.GetFormalCharge() == -1 and is_multi_bond == 0: |
|
bond_float = 3.0 |
|
|
|
|
|
|
|
|
|
else: |
|
|
|
bond_float = 1.0 |
|
|
|
|
|
new_mol.AddBond(amap[map_num],amap[lg_map],BOND_FLOAT_TO_TYPE[bond_float]) |
|
total_mol = new_mol.GetMol() |
|
|
|
else: |
|
|
|
map_num = map_nums[0] |
|
|
|
amap = {atom.GetAtomMapNum(): atom.GetIdx() for atom in total_mol.GetAtoms()} |
|
atom = total_mol.GetAtomWithIdx(amap[map_num]) |
|
atom.SetNumRadicalElectrons(0) |
|
atom.SetFormalCharge(int(atom.GetFormalCharge()+float(lg_smile))) |
|
|
|
|
|
total_mol = correct_mol_1(total_mol,is_nitrine_c = True) |
|
|
|
b = correct_mol(total_mol,keep_map = True) |
|
|
|
b_Chiral_dic = get_atom_map_chai_dic(b) |
|
b_Stereo_dic = get_atom_map_stereo_dic(b) |
|
|
|
dic_map_idx = dict([(i.GetAtomMapNum(),i.GetIdx()) for i in b.GetAtoms()]) |
|
|
|
act = 0 |
|
for b_map,Chiral in b_Chiral_dic.items(): |
|
if b_map not in o_p_Chiral_dic.keys(): |
|
pass |
|
elif b_map in o_p_Chiral_dic.keys() and b_Chiral_dic[b_map] != o_p_Chiral_dic[b_map] and b_map not in [int(i.split(':')[0]) for i in chai_edits]: |
|
|
|
act =1 |
|
atom = b.GetAtomWithIdx(dic_map_idx[b_map]) |
|
|
|
if atom.GetChiralTag() == Chem.ChiralType.CHI_TETRAHEDRAL_CCW: |
|
atom.SetChiralTag(Chem.ChiralType.CHI_TETRAHEDRAL_CW) |
|
elif atom.GetChiralTag() == Chem.ChiralType.CHI_TETRAHEDRAL_CW: |
|
atom.SetChiralTag(Chem.ChiralType.CHI_TETRAHEDRAL_CCW) |
|
|
|
if act == 1: |
|
pass |
|
|
|
|
|
|
|
|
|
for b_map,Stereo in b_Stereo_dic.items(): |
|
if b_map not in o_p_Stereo_dic.keys(): |
|
pass |
|
elif b_map in o_p_Stereo_dic.keys() and Stereo != o_p_Stereo_dic[b_map] and b_map not in [tuple([int(i) for i in i.split(':')[:2]]) for i in stereo_edits]: |
|
bond = b.GetBondBetweenAtoms(dic_map_idx[b_map[0]],dic_map_idx[b_map[1]]) |
|
|
|
bond.SetStereo(o_p_Stereo_dic[b_map]) |
|
|
|
b = apply_charge_change(b,charge_edits) |
|
|
|
if chai_edits == []: |
|
o_chai_edits = get_original_chair_edit(p,b) |
|
|
|
|
|
b = apply_chirality_change(b,o_chai_edits) |
|
|
|
else: |
|
b = apply_chirality_change(b,chai_edits) |
|
|
|
|
|
|
|
b = Chem.MolFromSmiles(Chem.MolToSmiles(b,canonical = False)) |
|
|
|
b = apply_stereo_change(b,stereo_edits) |
|
|
|
|
|
|
|
for atom in b.GetAtoms(): |
|
atom.SetAtomMapNum(0) |
|
|
|
|
|
for bond in b.GetBonds(): |
|
|
|
if bond.GetStereo() == Chem.rdchem.BondStereo.STEREONONE: |
|
|
|
bond.SetStereo(Chem.rdchem.BondStereo.STEREOANY) |
|
else: |
|
pass |
|
|
|
|
|
pre_smiles = Chem.MolToSmiles(b) |
|
|
|
pre_smiles = pre_smiles.replace('[H]/C=C/','C=C').replace('[H]/C=C(\\','C=C(').replace('[H]/C=C(/','C=C(').replace('[MgH2]','[Mg]').replace('/C=N\\','C=C') |
|
|
|
pre_smiles = Chem.MolToSmiles(Chem.MolFromSmiles(pre_smiles)) |
|
|
|
reac_mol = Chem.MolFromSmiles(r) |
|
|
|
for atom in reac_mol.GetAtoms(): |
|
atom.SetAtomMapNum(0) |
|
reac_mol_smiles = Chem.MolToSmiles(reac_mol) |
|
|
|
reac_mol_smiles = Chem.MolToSmiles(Chem.MolFromSmiles(reac_mol_smiles)) |
|
|
|
|
|
|
|
|
|
if [float(i[-3:]) for i in core_edits_add] == []: |
|
max_add = 0 |
|
elif max([float(i[-3:]) for i in core_edits_add]) == 1: |
|
max_add = 1 |
|
else: |
|
max_add = 2 |
|
|
|
charges = [int(i[-1]) for i in charge_edits] + [0] |
|
|
|
if pre_smiles == reac_mol_smiles and len(core_edits_add) <= 1 and max_add <=1 and max(charges)<=1 and min(charges)>=-1: |
|
|
|
return ([p,core_edits,chai_edits,stereo_edits,charge_edits,core_edits_add,lg_map_lis]) |
|
else: |
|
print(pre_smiles,reac_mol_smiles,chai_edits,stereo_edits) |
|
return 'error type 1' |
|
|
|
|
|
|
|
|
|
except: |
|
print('error type 2') |
|
return 'error type 2' |
|
|
|
|
|
|
|
def run_get_p_b_l_forward(rxn_smi): |
|
try: |
|
r, p = rxn_smi.split(">>") |
|
|
|
if Chem.MolFromSmiles(p).GetNumAtoms() >= 150 or Chem.MolFromSmiles(r).GetNumAtoms() >= 150: |
|
|
|
return 'error type 1' |
|
else: |
|
pass |
|
|
|
|
|
r,p = cano_smiles_map(r),cano_smiles_map(p) |
|
reac_mol, prod_mol = align_kekule_pairs(r, p) |
|
reac_mol = Chem.MolFromSmiles(Chem.MolToSmiles(reac_mol,kekuleSmiles = True),sanitize = False) |
|
|
|
|
|
reac_smiles_temp = Chem.MolToSmiles(reac_mol,kekuleSmiles = True) |
|
reac_mol_temp = Chem.MolFromSmiles(reac_smiles_temp) |
|
|
|
if reac_mol_temp != None and Chem.MolToSmiles(reac_mol_temp) == Chem.MolToSmiles(Chem.MolFromSmiles(r)): |
|
pass |
|
else: |
|
r_k = get_kekule_aligned_r(r,p) |
|
if count_kekule_d(r_k,p) == 0: |
|
reac_mol, prod_mol = Chem.MolFromSmiles(r_k),Chem.MolFromSmiles(p) |
|
Chem.Kekulize(reac_mol) |
|
Chem.Kekulize(prod_mol) |
|
else: |
|
reac_mol, prod_mol = Chem.MolFromSmiles(r_k),Chem.MolFromSmiles(p) |
|
Chem.Kekulize(reac_mol) |
|
Chem.Kekulize(prod_mol) |
|
|
|
|
|
|
|
|
|
core_edits= get_core_edit_mine(reac_mol,prod_mol) |
|
core_edits_add = [i for i in core_edits if (float(i.split(':')[2]) == 0) and (float(i.split(':')[1]) != 0)] |
|
core_edits = [i for i in core_edits if i not in core_edits_add] |
|
|
|
|
|
edit_c = [i for i in core_edits if (float(i.split(':')[-1]) > 0)] |
|
edit_b = [i for i in core_edits if (float(i.split(':')[-1]) == 0)] |
|
|
|
chai_edits = get_chai_edit_mine(Chem.MolFromSmiles(r), Chem.MolFromSmiles(p)) |
|
stereo_edits = get_stereo_edit_mine(Chem.MolFromSmiles(r), Chem.MolFromSmiles(p)) |
|
charge_edits = get_charge_edit_mine(reac_mol, prod_mol,core_edits) |
|
|
|
|
|
o_p_Chiral_dic = get_atom_map_chai_dic(Chem.MolFromSmiles(p)) |
|
o_p_Stereo_dic = get_atom_map_stereo_dic(Chem.MolFromSmiles(p)) |
|
|
|
|
|
frag_mol = apply_edits_to_mol_break(prod_mol,edit_b) |
|
frag_mol = apply_edits_to_mol_change(frag_mol,edit_c) |
|
|
|
frag_mol = apply_edits_to_mol_connect(frag_mol, core_edits_add) |
|
frag_mol = remove_s_H(frag_mol) |
|
|
|
|
|
reac_mols = Chem.GetMolFrags(reac_mol,asMols=True,sanitizeFrags = False) |
|
frag_mols = Chem.GetMolFrags(frag_mol,asMols=True,sanitizeFrags = False) |
|
|
|
if len(reac_mols) != len(frag_mols): |
|
frag_mols = [frag_mol for frag_mol in frag_mols if Chem.MolToSmiles(frag_mol) != '[H]'] |
|
else: |
|
pass |
|
|
|
if len(reac_mols) != len(frag_mols): |
|
frag_mols = [frag_mol] |
|
else: |
|
pass |
|
|
|
|
|
if len(reac_mols) == len(frag_mols): |
|
reac_mols, frag_mols = map_reac_and_frag(reac_mols,frag_mols) |
|
else: |
|
|
|
pass |
|
|
|
|
|
|
|
lg_map_lis_temp = get_lg_map_lis(frag_mols[:],reac_mols[:],core_edits,prod_mol) |
|
|
|
lg_map_lis = [] |
|
for lg, map_ in lg_map_lis_temp: |
|
lg, map_ = copy.deepcopy(lg),copy.deepcopy(map_) |
|
map_new = [] |
|
if lg.count(':') > 1: |
|
lg = Chem.MolFromSmiles(lg) |
|
Chem.Kekulize(lg) |
|
for atom in lg.GetAtoms(): |
|
if atom.GetAtomMapNum() == 0: |
|
map_new.append('*') |
|
else: |
|
map_new.append(map_.pop(0)) |
|
|
|
lg_smiles = Chem.MolToSmiles(lg,kekuleSmiles = True) |
|
rank = list(Chem.CanonicalRankAtoms(lg, breakTies=False)) |
|
map_new = sorted(map_new, key=lambda x: rank[map_new.index(x)]) |
|
map_new = [i for i in map_new if i != '*'] |
|
|
|
lg_map_lis.append((lg_smiles,map_new)) |
|
else: |
|
lg_map_lis.append((lg, map_ )) |
|
|
|
|
|
|
|
|
|
return ([p,core_edits,chai_edits,stereo_edits,charge_edits,core_edits_add,lg_map_lis]) |
|
|
|
|
|
|
|
|
|
except: |
|
return 'error type 2' |
|
|
|
|
|
|
|
def run_get_p_b_l_backward(p,core_edits,chai_edits,stereo_edits,charge_edits,core_edits_add,lg_map_lis): |
|
|
|
prod_mol = Chem.MolFromSmiles(p) |
|
|
|
core_edits = [i for i in core_edits if i not in core_edits_add] |
|
edit_c = [i for i in core_edits if (float(i.split(':')[-1]) > 0)] |
|
edit_b = [i for i in core_edits if (float(i.split(':')[-1]) == 0)] |
|
|
|
|
|
o_p_Chiral_dic = get_atom_map_chai_dic(Chem.MolFromSmiles(p)) |
|
o_p_Stereo_dic = get_atom_map_stereo_dic(Chem.MolFromSmiles(p)) |
|
|
|
|
|
frag_mol = apply_edits_to_mol_break(prod_mol,edit_b) |
|
frag_mol = apply_edits_to_mol_change(frag_mol,edit_c) |
|
|
|
frag_mol = apply_edits_to_mol_connect(frag_mol, core_edits_add) |
|
frag_mol = remove_s_H(frag_mol) |
|
|
|
|
|
|
|
total_mol = frag_mol |
|
|
|
|
|
for lg_smile,map_nums in lg_map_lis[:]: |
|
|
|
if lg_smile not in ['-1','1','2']: |
|
|
|
lg = Chem.MolFromSmiles(lg_smile) |
|
|
|
total_mol_map_num_lis = [i.GetAtomMapNum() for i in total_mol.GetAtoms()] |
|
max_map = max(total_mol_map_num_lis) |
|
count = 1 |
|
for atom in lg.GetAtoms(): |
|
if atom.GetAtomMapNum() == 1: |
|
atom.SetAtomMapNum(max_map + count) |
|
count += 1 |
|
else: |
|
pass |
|
|
|
total_mol_map_num_lis = [i.GetAtomMapNum() for i in total_mol.GetAtoms()] |
|
max_map = max(total_mol_map_num_lis) |
|
|
|
for atom in lg.GetAtoms(): |
|
if atom.GetAtomMapNum() == 0: |
|
atom.SetAtomMapNum(max_map + count) |
|
count += 1 |
|
else: |
|
pass |
|
|
|
total_mol = Chem.CombineMols(total_mol,lg) |
|
|
|
amap = {atom.GetAtomMapNum(): atom.GetIdx() for atom in total_mol.GetAtoms()} |
|
new_mol = Chem.RWMol(total_mol) |
|
|
|
is_multi_bond = 0 |
|
|
|
for idx in range(len(map_nums)): |
|
map_num = map_nums[idx] |
|
if lg_smile.count(':') == len(map_nums): |
|
lg_map = max_map + 1 + idx |
|
atom = total_mol.GetAtomWithIdx(amap[lg_map]) |
|
is_multi_bond = 0 |
|
else: |
|
lg_map = max_map + 1 |
|
atom = total_mol.GetAtomWithIdx(amap[lg_map]) |
|
is_multi_bond= 1 |
|
|
|
|
|
if atom.GetSymbol() == 'O' and atom.GetTotalValence() == 0 and atom.GetFormalCharge() == 0 and is_multi_bond == 0: |
|
bond_float = 2.0 |
|
elif atom.GetSymbol() == 'S' and atom.GetTotalValence() in [0,2,4] and atom.GetFormalCharge() == 0 and is_multi_bond == 0: |
|
bond_float = 2.0 |
|
elif atom.GetSymbol() == 'S' and atom.GetTotalValence() ==1 and atom.GetFormalCharge() == 1 and is_multi_bond == 0: |
|
bond_float = 2.0 |
|
elif atom.GetSymbol() == 'P' and atom.GetTotalValence() == 3 and atom.GetFormalCharge() == 0 and is_multi_bond == 0: |
|
bond_float = 2.0 |
|
elif atom.GetSymbol() == 'C' and atom.GetTotalValence() == 2 and atom.GetFormalCharge() == 0 and is_multi_bond == 0: |
|
bond_float = 2.0 |
|
elif atom.GetSymbol() == 'N' and atom.GetTotalValence() == 2 and atom.GetFormalCharge() == 1 and is_multi_bond == 0: |
|
bond_float = 2.0 |
|
elif atom.GetSymbol() == 'N' and atom.GetTotalValence() == 1 and atom.GetFormalCharge() == 0 and is_multi_bond == 0: |
|
bond_float = 2.0 |
|
elif atom.GetSymbol() == 'N' and atom.GetTotalValence() == 0 and atom.GetFormalCharge() == -1 and is_multi_bond == 0: |
|
bond_float = 2.0 |
|
elif atom.GetSymbol() == 'Se' and atom.GetTotalValence() == 2 and atom.GetFormalCharge() == 0 and is_multi_bond == 0: |
|
bond_float = 2.0 |
|
elif atom.GetSymbol() == 'Si' and atom.GetTotalValence() == 2 and atom.GetFormalCharge() == 0 and is_multi_bond == 0: |
|
bond_float = 2.0 |
|
elif atom.GetSymbol() == 'Mn' and atom.GetTotalValence() == 5 and atom.GetFormalCharge() == 0 and is_multi_bond == 0: |
|
bond_float = 2.0 |
|
elif atom.GetSymbol() == 'Cr' and atom.GetTotalValence() == 4 and atom.GetFormalCharge() == 0 and is_multi_bond == 0: |
|
bond_float = 2.0 |
|
elif atom.GetSymbol() == 'O' and atom.GetTotalValence() == 1 and atom.GetFormalCharge() == 1 and is_multi_bond == 0: |
|
bond_float = 2.0 |
|
|
|
|
|
elif atom.GetSymbol() == 'N' and atom.GetTotalValence() == 0 and atom.GetFormalCharge() == 0 and is_multi_bond == 0: |
|
bond_float = 3.0 |
|
elif atom.GetSymbol() == 'C' and atom.GetTotalValence() == 1 and atom.GetFormalCharge() == 0 and is_multi_bond == 0: |
|
bond_float = 3.0 |
|
elif atom.GetSymbol() == 'C' and atom.GetTotalValence() == 0 and atom.GetFormalCharge() == -1 and is_multi_bond == 0: |
|
bond_float = 3.0 |
|
else: |
|
|
|
bond_float = 1.0 |
|
|
|
|
|
new_mol.AddBond(amap[map_num],amap[lg_map],BOND_FLOAT_TO_TYPE[bond_float]) |
|
total_mol = new_mol.GetMol() |
|
|
|
else: |
|
|
|
map_num = map_nums[0] |
|
|
|
amap = {atom.GetAtomMapNum(): atom.GetIdx() for atom in total_mol.GetAtoms()} |
|
atom = total_mol.GetAtomWithIdx(amap[map_num]) |
|
atom.SetNumRadicalElectrons(0) |
|
atom.SetFormalCharge(int(atom.GetFormalCharge()+float(lg_smile))) |
|
|
|
|
|
total_mol = correct_mol_1(total_mol,is_nitrine_c = True) |
|
|
|
b = correct_mol(total_mol,keep_map = True) |
|
|
|
b_Chiral_dic = get_atom_map_chai_dic(b) |
|
b_Stereo_dic = get_atom_map_stereo_dic(b) |
|
|
|
dic_map_idx = dict([(i.GetAtomMapNum(),i.GetIdx()) for i in b.GetAtoms()]) |
|
|
|
act = 0 |
|
for b_map,Chiral in b_Chiral_dic.items(): |
|
if b_map not in o_p_Chiral_dic.keys(): |
|
pass |
|
elif b_map in o_p_Chiral_dic.keys() and b_Chiral_dic[b_map] != o_p_Chiral_dic[b_map] and b_map not in [int(i.split(':')[0]) for i in chai_edits]: |
|
act =1 |
|
atom = b.GetAtomWithIdx(dic_map_idx[b_map]) |
|
|
|
if atom.GetChiralTag() == Chem.ChiralType.CHI_TETRAHEDRAL_CCW: |
|
atom.SetChiralTag(Chem.ChiralType.CHI_TETRAHEDRAL_CW) |
|
elif atom.GetChiralTag() == Chem.ChiralType.CHI_TETRAHEDRAL_CW: |
|
atom.SetChiralTag(Chem.ChiralType.CHI_TETRAHEDRAL_CCW) |
|
|
|
if act == 1: |
|
pass |
|
|
|
|
|
|
|
|
|
for b_map,Stereo in b_Stereo_dic.items(): |
|
if b_map not in o_p_Stereo_dic.keys(): |
|
pass |
|
elif b_map in o_p_Stereo_dic.keys() and Stereo != o_p_Stereo_dic[b_map] and b_map not in [tuple([int(i) for i in i.split(':')[:2]]) for i in stereo_edits]: |
|
bond = b.GetBondBetweenAtoms(dic_map_idx[b_map[0]],dic_map_idx[b_map[1]]) |
|
|
|
bond.SetStereo(o_p_Stereo_dic[b_map]) |
|
|
|
b = apply_charge_change(b,charge_edits) |
|
|
|
if chai_edits == []: |
|
o_chai_edits = get_original_chair_edit(p,b) |
|
|
|
|
|
b = apply_chirality_change(b,o_chai_edits) |
|
|
|
else: |
|
b = apply_chirality_change(b,chai_edits) |
|
|
|
|
|
b = Chem.MolFromSmiles(Chem.MolToSmiles(b,canonical = False)) |
|
b = apply_stereo_change(b,stereo_edits) |
|
|
|
|
|
|
|
for atom in b.GetAtoms(): |
|
atom.SetAtomMapNum(0) |
|
|
|
|
|
for bond in b.GetBonds(): |
|
|
|
if bond.GetStereo() == Chem.rdchem.BondStereo.STEREONONE: |
|
|
|
bond.SetStereo(Chem.rdchem.BondStereo.STEREOANY) |
|
else: |
|
pass |
|
|
|
pre_smiles = Chem.MolToSmiles(b) |
|
pre_smiles = pre_smiles.replace('[H]/C=C/','C=C').replace('[H]/C=C(\\','C=C(').replace('[H]/C=C(/','C=C(').replace('[MgH2]','[Mg]').replace('/C=N\\','C=C') |
|
pre_smiles = Chem.MolToSmiles(Chem.MolFromSmiles(pre_smiles)) |
|
return pre_smiles |
|
|
|
|
|
|
|
|
|
def run_get_p_b_l_backward_with_mapping(p,core_edits,chai_edits,stereo_edits,charge_edits,core_edits_add,lg_map_lis): |
|
|
|
prod_mol = Chem.MolFromSmiles(p) |
|
|
|
core_edits = [i for i in core_edits if i not in core_edits_add] |
|
edit_c = [i for i in core_edits if (float(i.split(':')[-1]) > 0)] |
|
edit_b = [i for i in core_edits if (float(i.split(':')[-1]) == 0)] |
|
|
|
|
|
o_p_Chiral_dic = get_atom_map_chai_dic(Chem.MolFromSmiles(p)) |
|
o_p_Stereo_dic = get_atom_map_stereo_dic(Chem.MolFromSmiles(p)) |
|
|
|
|
|
frag_mol = apply_edits_to_mol_break(prod_mol,edit_b) |
|
frag_mol = apply_edits_to_mol_change(frag_mol,edit_c) |
|
|
|
frag_mol = apply_edits_to_mol_connect(frag_mol, core_edits_add) |
|
frag_mol = remove_s_H(frag_mol) |
|
|
|
|
|
|
|
total_mol = frag_mol |
|
|
|
|
|
for lg_smile,map_nums in lg_map_lis[:]: |
|
|
|
if lg_smile not in ['-1','1','2']: |
|
|
|
lg = Chem.MolFromSmiles(lg_smile) |
|
|
|
total_mol_map_num_lis = [i.GetAtomMapNum() for i in total_mol.GetAtoms()] |
|
max_map = max(total_mol_map_num_lis) |
|
count = 1 |
|
for atom in lg.GetAtoms(): |
|
if atom.GetAtomMapNum() == 1: |
|
atom.SetAtomMapNum(max_map + count) |
|
count += 1 |
|
else: |
|
pass |
|
|
|
total_mol_map_num_lis = [i.GetAtomMapNum() for i in total_mol.GetAtoms()] |
|
max_map = max(total_mol_map_num_lis) |
|
|
|
for atom in lg.GetAtoms(): |
|
if atom.GetAtomMapNum() == 0: |
|
atom.SetAtomMapNum(max_map + count) |
|
count += 1 |
|
else: |
|
pass |
|
|
|
total_mol = Chem.CombineMols(total_mol,lg) |
|
|
|
amap = {atom.GetAtomMapNum(): atom.GetIdx() for atom in total_mol.GetAtoms()} |
|
new_mol = Chem.RWMol(total_mol) |
|
|
|
is_multi_bond = 0 |
|
|
|
for idx in range(len(map_nums)): |
|
map_num = map_nums[idx] |
|
if lg_smile.count(':') == len(map_nums): |
|
lg_map = max_map + 1 + idx |
|
atom = total_mol.GetAtomWithIdx(amap[lg_map]) |
|
is_multi_bond = 0 |
|
else: |
|
lg_map = max_map + 1 |
|
atom = total_mol.GetAtomWithIdx(amap[lg_map]) |
|
is_multi_bond= 1 |
|
|
|
|
|
if atom.GetSymbol() == 'O' and atom.GetTotalValence() == 0 and atom.GetFormalCharge() == 0 and is_multi_bond == 0: |
|
bond_float = 2.0 |
|
elif atom.GetSymbol() == 'S' and atom.GetTotalValence() in [0,2,4] and atom.GetFormalCharge() == 0 and is_multi_bond == 0: |
|
bond_float = 2.0 |
|
elif atom.GetSymbol() == 'S' and atom.GetTotalValence() ==1 and atom.GetFormalCharge() == 1 and is_multi_bond == 0: |
|
bond_float = 2.0 |
|
elif atom.GetSymbol() == 'P' and atom.GetTotalValence() == 3 and atom.GetFormalCharge() == 0 and is_multi_bond == 0: |
|
bond_float = 2.0 |
|
elif atom.GetSymbol() == 'C' and atom.GetTotalValence() == 2 and atom.GetFormalCharge() == 0 and is_multi_bond == 0: |
|
bond_float = 2.0 |
|
elif atom.GetSymbol() == 'N' and atom.GetTotalValence() == 2 and atom.GetFormalCharge() == 1 and is_multi_bond == 0: |
|
bond_float = 2.0 |
|
elif atom.GetSymbol() == 'N' and atom.GetTotalValence() == 1 and atom.GetFormalCharge() == 0 and is_multi_bond == 0: |
|
bond_float = 2.0 |
|
elif atom.GetSymbol() == 'N' and atom.GetTotalValence() == 0 and atom.GetFormalCharge() == -1 and is_multi_bond == 0: |
|
bond_float = 2.0 |
|
elif atom.GetSymbol() == 'Se' and atom.GetTotalValence() == 2 and atom.GetFormalCharge() == 0 and is_multi_bond == 0: |
|
bond_float = 2.0 |
|
elif atom.GetSymbol() == 'Si' and atom.GetTotalValence() == 2 and atom.GetFormalCharge() == 0 and is_multi_bond == 0: |
|
bond_float = 2.0 |
|
elif atom.GetSymbol() == 'Mn' and atom.GetTotalValence() == 5 and atom.GetFormalCharge() == 0 and is_multi_bond == 0: |
|
bond_float = 2.0 |
|
elif atom.GetSymbol() == 'Cr' and atom.GetTotalValence() == 4 and atom.GetFormalCharge() == 0 and is_multi_bond == 0: |
|
bond_float = 2.0 |
|
elif atom.GetSymbol() == 'O' and atom.GetTotalValence() == 1 and atom.GetFormalCharge() == 1 and is_multi_bond == 0: |
|
bond_float = 2.0 |
|
|
|
|
|
elif atom.GetSymbol() == 'N' and atom.GetTotalValence() == 0 and atom.GetFormalCharge() == 0 and is_multi_bond == 0: |
|
bond_float = 3.0 |
|
elif atom.GetSymbol() == 'C' and atom.GetTotalValence() == 1 and atom.GetFormalCharge() == 0 and is_multi_bond == 0: |
|
bond_float = 3.0 |
|
elif atom.GetSymbol() == 'C' and atom.GetTotalValence() == 0 and atom.GetFormalCharge() == -1 and is_multi_bond == 0: |
|
bond_float = 3.0 |
|
else: |
|
|
|
bond_float = 1.0 |
|
|
|
|
|
new_mol.AddBond(amap[map_num],amap[lg_map],BOND_FLOAT_TO_TYPE[bond_float]) |
|
total_mol = new_mol.GetMol() |
|
|
|
else: |
|
|
|
map_num = map_nums[0] |
|
|
|
amap = {atom.GetAtomMapNum(): atom.GetIdx() for atom in total_mol.GetAtoms()} |
|
atom = total_mol.GetAtomWithIdx(amap[map_num]) |
|
atom.SetNumRadicalElectrons(0) |
|
atom.SetFormalCharge(int(atom.GetFormalCharge()+float(lg_smile))) |
|
|
|
|
|
total_mol = correct_mol_1(total_mol,is_nitrine_c = True) |
|
|
|
b = correct_mol(total_mol,keep_map = True) |
|
|
|
b_Chiral_dic = get_atom_map_chai_dic(b) |
|
b_Stereo_dic = get_atom_map_stereo_dic(b) |
|
|
|
dic_map_idx = dict([(i.GetAtomMapNum(),i.GetIdx()) for i in b.GetAtoms()]) |
|
|
|
act = 0 |
|
for b_map,Chiral in b_Chiral_dic.items(): |
|
if b_map not in o_p_Chiral_dic.keys(): |
|
pass |
|
elif b_map in o_p_Chiral_dic.keys() and b_Chiral_dic[b_map] != o_p_Chiral_dic[b_map] and b_map not in [int(i.split(':')[0]) for i in chai_edits]: |
|
act =1 |
|
atom = b.GetAtomWithIdx(dic_map_idx[b_map]) |
|
|
|
if atom.GetChiralTag() == Chem.ChiralType.CHI_TETRAHEDRAL_CCW: |
|
atom.SetChiralTag(Chem.ChiralType.CHI_TETRAHEDRAL_CW) |
|
elif atom.GetChiralTag() == Chem.ChiralType.CHI_TETRAHEDRAL_CW: |
|
atom.SetChiralTag(Chem.ChiralType.CHI_TETRAHEDRAL_CCW) |
|
|
|
if act == 1: |
|
pass |
|
|
|
|
|
|
|
|
|
for b_map,Stereo in b_Stereo_dic.items(): |
|
if b_map not in o_p_Stereo_dic.keys(): |
|
pass |
|
elif b_map in o_p_Stereo_dic.keys() and Stereo != o_p_Stereo_dic[b_map] and b_map not in [tuple([int(i) for i in i.split(':')[:2]]) for i in stereo_edits]: |
|
bond = b.GetBondBetweenAtoms(dic_map_idx[b_map[0]],dic_map_idx[b_map[1]]) |
|
|
|
bond.SetStereo(o_p_Stereo_dic[b_map]) |
|
|
|
b = apply_charge_change(b,charge_edits) |
|
|
|
if chai_edits == []: |
|
o_chai_edits = get_original_chair_edit(p,b) |
|
|
|
|
|
b = apply_chirality_change(b,o_chai_edits) |
|
|
|
else: |
|
b = apply_chirality_change(b,chai_edits) |
|
|
|
|
|
b = Chem.MolFromSmiles(Chem.MolToSmiles(b,canonical = False)) |
|
b = apply_stereo_change(b,stereo_edits) |
|
|
|
|
|
|
|
|
|
|
|
|
|
for bond in b.GetBonds(): |
|
|
|
if bond.GetStereo() == Chem.rdchem.BondStereo.STEREONONE: |
|
|
|
bond.SetStereo(Chem.rdchem.BondStereo.STEREOANY) |
|
else: |
|
pass |
|
|
|
pre_smiles = Chem.MolToSmiles(b) |
|
|
|
|
|
return pre_smiles |
|
|
|
|
|
|
|
def run_get_p_b_l_check(rxn): |
|
try: |
|
p,core_edits,chai_edits,stereo_edits,charge_edits,core_edits_add,lg_map_lis = run_get_p_b_l_forward(rxn) |
|
except: |
|
return 'error type 3' |
|
|
|
try: |
|
pre_smiles = run_get_p_b_l_backward(p,core_edits,chai_edits,stereo_edits,charge_edits,core_edits_add,lg_map_lis) |
|
except: |
|
return 'error type 5' |
|
|
|
r = rxn.split('>>')[0] |
|
reac_mol = Chem.MolFromSmiles(r) |
|
for atom in reac_mol.GetAtoms(): |
|
atom.SetAtomMapNum(0) |
|
reac_mol_smiles = Chem.MolToSmiles(reac_mol) |
|
reac_mol_smiles = Chem.MolToSmiles(Chem.MolFromSmiles(reac_mol_smiles)) |
|
|
|
|
|
if [float(i[-3:]) for i in core_edits_add] == []: |
|
max_add = 0 |
|
elif max([float(i[-3:]) for i in core_edits_add]) == 1: |
|
max_add = 1 |
|
else: |
|
max_add = 2 |
|
|
|
charges = [int(i[-1]) for i in charge_edits] + [0] |
|
|
|
if pre_smiles == reac_mol_smiles and len(core_edits_add) <= 1 and max_add <=1 and max(charges)<=1 and min(charges)>=-1: |
|
return p,core_edits,chai_edits,stereo_edits,charge_edits,core_edits_add,lg_map_lis |
|
|
|
else: |
|
return 'error type 4' |
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_atom_pair_bond_idx_dic(concise_smiles): |
|
mol_indigo = indigo.loadMolecule(concise_smiles) |
|
mol_block_indigo = mol_indigo.molfile() |
|
|
|
mol = Chem.MolFromSmiles(concise_smiles,sanitize = False) |
|
atom_num = len(mol.GetAtoms()) |
|
bond_num = len(mol.GetBonds()) |
|
|
|
mol_block_lis = mol_block_indigo.split('\n') |
|
bond_line_lis = mol_block_lis[4+atom_num:4+atom_num+bond_num] |
|
atom_pair_bond_idx_dic = {} |
|
|
|
|
|
count = 0 |
|
for bond_line in bond_line_lis: |
|
s_atom = int(bond_line[:3]) |
|
e_atom = int(bond_line[3:6]) |
|
min_atom = min((s_atom,e_atom)) |
|
max_atom = max((s_atom,e_atom)) |
|
atom_pair_bond_idx_dic[(min_atom,max_atom)] = count |
|
count += 1 |
|
|
|
return atom_pair_bond_idx_dic |
|
|
|
|
|
|
|
|
|
def get_rm_token_lis(concise_smiles,detailed_smiles): |
|
detailed_smiles_length = len(detailed_smiles) |
|
idx = 0 |
|
rm_token_lis = [] |
|
for _ in range(len(detailed_smiles)): |
|
|
|
if detailed_smiles[idx] != concise_smiles[idx]: |
|
rm_token_lis.append(detailed_smiles[idx]) |
|
detailed_smiles = detailed_smiles[:idx] + detailed_smiles[idx+1:] |
|
else: |
|
idx += 1 |
|
rm_token_lis.append(' ') |
|
if detailed_smiles == concise_smiles and len(rm_token_lis) == detailed_smiles_length: |
|
return rm_token_lis |
|
else: |
|
print('error') |
|
pass |
|
|
|
|
|
def get_bond_token_lis(detailed_smiles): |
|
bond_token_lis = [] |
|
|
|
for i in range(len(detailed_smiles)): |
|
|
|
if detailed_smiles[i] in ['-','=','#',':','/','\\'] and detailed_smiles[i+1] != ']': |
|
bond_token_lis.append(detailed_smiles[i]) |
|
else: |
|
bond_token_lis.append(' ') |
|
pass |
|
|
|
return bond_token_lis |
|
|
|
|
|
def get_bond_token_idx_dic(bond_token_lis): |
|
bond_token_idx_dic = {} |
|
bond_idx = 0 |
|
token_idx = 0 |
|
for i in bond_token_lis: |
|
token_idx += 1 |
|
if i != ' ': |
|
bond_idx += 1 |
|
else: |
|
pass |
|
bond_token_idx_dic[bond_idx] = token_idx |
|
return bond_token_idx_dic |
|
|
|
|
|
def rerank_special_bond(mol_block_indigo_lis,bond_idx): |
|
mol = Chem.MolFromMolBlock('\n'.join(mol_block_indigo_lis),removeHs = False) |
|
q = mol_block_indigo_lis[mol.GetNumAtoms()+ 4 +bond_idx][:3] |
|
h = mol_block_indigo_lis[mol.GetNumAtoms()+ 4 +bond_idx][3:6] |
|
mol_block_indigo_lis[mol.GetNumAtoms()+ 4 +bond_idx] = h + q + mol_block_indigo_lis[mol.GetNumAtoms()+ 4 +bond_idx][6:] |
|
return mol_block_indigo_lis |
|
|
|
def get_caption_r(caption): |
|
words = re.findall(r'[{](.*?)[}]', caption) |
|
words = ['{' + i + '}' for i in words ] |
|
caption_r = caption |
|
count = 400 |
|
for i in words: |
|
count += 1 |
|
caption_r = caption_r.replace(i,'[{}Au]'.format(count),1) |
|
|
|
return caption_r,words |
|
|
|
|
|
def get_b_smiles_detailed_smiles(caption_r,smiles): |
|
b_smiles = caption_r |
|
|
|
b_smiles = b_smiles.replace('/','/-').replace('\\','\\-') |
|
b_smiles = b_smiles.replace('-!','!').replace('-?','?') |
|
|
|
mol_tmp = Chem.MolFromSmiles(smiles,sanitize = False) |
|
detailed_smiles = Chem.MolToSmiles(mol_tmp,canonical = False,allBondsExplicit = True) |
|
|
|
detailed_smiles = detailed_smiles.replace('/','/-').replace('\\','\\-') |
|
|
|
for i in range(len(detailed_smiles)): |
|
if detailed_smiles[i] != b_smiles[i]: |
|
if b_smiles[i] in ['!','_',';','^','&','{','}','。','《','》']: |
|
pass |
|
else: |
|
b_smiles = b_smiles[:i] + detailed_smiles[i] + b_smiles[i:] |
|
else: |
|
pass |
|
|
|
return b_smiles,detailed_smiles |
|
|
|
|
|
def get_bond_dic(b_smiles,detailed_smiles): |
|
b_smiles = b_smiles.replace('-]',']') |
|
detailed_smiles = detailed_smiles.replace('-]',']') |
|
count = 0 |
|
bond_dic = {} |
|
for i,j in zip(detailed_smiles,b_smiles): |
|
if i != j: |
|
bond_dic[count] = j |
|
|
|
if i in ['-','=','#',':']: |
|
count += 1 |
|
return bond_dic |
|
|
|
|
|
def get_t_smiles(e_smiles,o_smiles): |
|
e_smiles_r = e_smiles.replace('!','-').replace('_','-').replace(';','-').replace('^','-').replace('&','=').replace('{','=').replace('}','=').replace('。','=').replace('《','=').replace('》','=') |
|
mol_r = Chem.MolFromSmiles(e_smiles_r,sanitize = False) |
|
a = Chem.MolFromSmiles(o_smiles,sanitize = False) |
|
|
|
for atom in a.GetAtoms(): |
|
atom.SetAtomMapNum(0) |
|
|
|
for atom in mol_r.GetAtoms(): |
|
if atom.GetIsotope() != 0: |
|
a.GetAtomWithIdx(atom.GetIdx()).SetIsotope(atom.GetIsotope()) |
|
|
|
t_smiles = Chem.MolToSmiles(a,canonical = False) |
|
return t_smiles |
|
|
|
|
|
|
|
|
|
def get_b_smiles(p_b): |
|
|
|
o_smiles = p_b[0] |
|
core_edits = p_b[1] |
|
chai_edits = p_b[2] |
|
stereo_edits = p_b[3] |
|
charge_edits = p_b[4] |
|
core_edits_add = p_b[5] |
|
atom_idx_mark_dic = {} |
|
|
|
for edit in core_edits: |
|
b = int(edit.split(':')[0]) |
|
e = int(edit.split(':')[1]) |
|
new_b = edit.split(':')[3] |
|
if min([b,e]) == 0: |
|
atom_map = max([b,e]) |
|
if new_b == '0.0': |
|
atom_idx_mark_dic[atom_map] = 9 |
|
else: |
|
pass |
|
|
|
|
|
|
|
for edit in chai_edits: |
|
|
|
edit_l = edit.split(':') |
|
if edit_l[3] == 'R': |
|
if int(edit_l[0]) not in atom_idx_mark_dic.keys(): |
|
atom_idx_mark_dic[int(edit_l[0])] = 10 |
|
else: |
|
atom_idx_mark_dic[int(edit_l[0])] = 10 + atom_idx_mark_dic[int(edit_l[0])] |
|
elif edit_l[3] == 'S': |
|
if int(edit_l[0]) not in atom_idx_mark_dic.keys(): |
|
atom_idx_mark_dic[int(edit_l[0])] = 20 |
|
else: |
|
atom_idx_mark_dic[int(edit_l[0])] = 20 + atom_idx_mark_dic[int(edit_l[0])] |
|
elif edit_l[3] == '?': |
|
if int(edit_l[0]) not in atom_idx_mark_dic.keys(): |
|
atom_idx_mark_dic[int(edit_l[0])] = 30 |
|
else: |
|
atom_idx_mark_dic[int(edit_l[0])] = 30 + atom_idx_mark_dic[int(edit_l[0])] |
|
|
|
|
|
for edit in charge_edits: |
|
|
|
edit_l = edit.split(':') |
|
if edit_l[3] == '1': |
|
if int(edit_l[0]) not in atom_idx_mark_dic.keys(): |
|
atom_idx_mark_dic[int(edit_l[0])] = 200 |
|
else: |
|
atom_idx_mark_dic[int(edit_l[0])] = 200 + atom_idx_mark_dic[int(edit_l[0])] |
|
pass |
|
|
|
elif edit_l[3] == '0': |
|
if int(edit_l[0]) not in atom_idx_mark_dic.keys(): |
|
atom_idx_mark_dic[int(edit_l[0])] = 400 |
|
else: |
|
atom_idx_mark_dic[int(edit_l[0])] = 400 + atom_idx_mark_dic[int(edit_l[0])] |
|
|
|
|
|
elif edit_l[3] == '-1': |
|
if int(edit_l[0]) not in atom_idx_mark_dic.keys(): |
|
atom_idx_mark_dic[int(edit_l[0])] = 600 |
|
else: |
|
atom_idx_mark_dic[int(edit_l[0])] = 600 + atom_idx_mark_dic[int(edit_l[0])] |
|
|
|
|
|
|
|
for edit in core_edits_add: |
|
edit_l = edit.split(':') |
|
|
|
if int(edit_l[0]) not in atom_idx_mark_dic.keys(): |
|
atom_idx_mark_dic[int(edit_l[0])] = 100 |
|
else: |
|
atom_idx_mark_dic[int(edit_l[0])] = 100 + atom_idx_mark_dic[int(edit_l[0])] |
|
|
|
|
|
if int(edit_l[1]) not in atom_idx_mark_dic.keys(): |
|
atom_idx_mark_dic[int(edit_l[1])] = 100 |
|
else: |
|
atom_idx_mark_dic[int(edit_l[1])] = 100 + atom_idx_mark_dic[int(edit_l[1])] |
|
|
|
|
|
|
|
|
|
|
|
a = Chem.MolFromSmiles(o_smiles,sanitize = False) |
|
|
|
for atom in a.GetAtoms(): |
|
if atom.GetAtomMapNum() in atom_idx_mark_dic.keys(): |
|
atom_map = atom.GetAtomMapNum() |
|
atom.SetIsotope(atom_idx_mark_dic[atom_map]) |
|
else: |
|
pass |
|
atom.SetAtomMapNum(0) |
|
|
|
mol = copy.deepcopy(a) |
|
|
|
|
|
detailed_smiles = Chem.MolToSmiles(mol,canonical = False,allBondsExplicit = True,kekuleSmiles=True) |
|
|
|
|
|
concise_smiles = Chem.MolToSmiles(mol,canonical = False,kekuleSmiles=True) |
|
concise_smiles_no_chirality = Chem.MolToSmiles(mol,canonical = False,isomericSmiles = False,kekuleSmiles=True) |
|
atom_pair_bond_idx_dic = get_atom_pair_bond_idx_dic(concise_smiles_no_chirality) |
|
rm_token_lis = get_rm_token_lis(concise_smiles,detailed_smiles) |
|
bond_token_lis = get_bond_token_lis(detailed_smiles) |
|
bond_token_idx_dic = get_bond_token_idx_dic(bond_token_lis) |
|
|
|
|
|
bond_idx_mark_dic = {} |
|
for edit in core_edits: |
|
|
|
b = int(edit.split(':')[0]) |
|
e = int(edit.split(':')[1]) |
|
org_b = edit.split(':')[2] |
|
new_b = edit.split(':')[3] |
|
if min([b,e]) != 0: |
|
bond_idx = atom_pair_bond_idx_dic[min([b,e]),max([b,e])] |
|
if new_b == '0.0': |
|
mark = '!' |
|
elif new_b == '1.0': |
|
mark = '_' |
|
elif new_b == '2.0': |
|
mark = ';' |
|
elif new_b == '3.0': |
|
mark = '^' |
|
bond_idx_mark_dic[bond_idx] = mark |
|
else: |
|
pass |
|
|
|
for edit in stereo_edits: |
|
|
|
b = int(edit.split(':')[0]) |
|
e = int(edit.split(':')[1]) |
|
new_b = edit.split(':')[3] |
|
if min([b,e]) != 0: |
|
bond_idx = atom_pair_bond_idx_dic[min([b,e]),max([b,e])] |
|
if bond_idx not in bond_idx_mark_dic.keys(): |
|
|
|
if new_b == 'a': |
|
mark = '&' |
|
elif new_b == 'e': |
|
mark = '{' |
|
elif new_b == 'z': |
|
mark = '}' |
|
bond_idx_mark_dic[bond_idx] = mark |
|
else: |
|
bond_idx in bond_idx_mark_dic.keys() |
|
if new_b == 'a': |
|
mark = '。' |
|
elif new_b == 'e': |
|
mark = '《' |
|
elif new_b == 'z': |
|
mark = '》' |
|
bond_idx_mark_dic[bond_idx] = mark |
|
else: |
|
pass |
|
|
|
|
|
|
|
for bond_idx,mark in bond_idx_mark_dic.items(): |
|
token_idx = bond_token_idx_dic[bond_idx] |
|
rm_token_lis[token_idx] = mark |
|
|
|
new_smiles_lis = [] |
|
for i in range(len(rm_token_lis)): |
|
if rm_token_lis[i] == ' ': |
|
new_smiles_lis.append(detailed_smiles[i]) |
|
elif rm_token_lis[i][-1] in ['!','_',';','^','&','{','}','。','《','》']: |
|
new_smiles_lis.append(rm_token_lis[i]) |
|
else: |
|
pass |
|
|
|
caption = ''.join(new_smiles_lis) |
|
out_b_smiles_lis.append(caption) |
|
|
|
|
|
|
|
caption_r = caption |
|
|
|
|
|
|
|
t_smiles = get_t_smiles(caption_r,o_smiles) |
|
|
|
b_smiles,detailed_smiles = get_b_smiles_detailed_smiles(caption_r,t_smiles) |
|
|
|
|
|
bond_dic = get_bond_dic(b_smiles,detailed_smiles) |
|
|
|
|
|
atom_pair_bond_idx = {} |
|
for atom_pair,bond_idx in get_atom_pair_bond_idx_dic(o_smiles).items(): |
|
atom_pair_bond_idx[bond_idx] = atom_pair |
|
|
|
|
|
|
|
mol = Chem.MolFromSmiles(t_smiles) |
|
Chem.Kekulize(mol) |
|
core_edits_ = [] |
|
chai_edits_ = [] |
|
stereo_edits_ = [] |
|
charge_edits_ = [] |
|
core_edits_add_ = [] |
|
|
|
for bond_idx,mark in bond_dic.items(): |
|
b,e = atom_pair_bond_idx[bond_idx] |
|
o_bond = mol.GetBondBetweenAtoms(b-1,e-1).GetBondTypeAsDouble() |
|
if mark == '!': |
|
n_bond = '0.0' |
|
core_edits_.append('{}:{}:{}:{}'.format(b,e,o_bond,n_bond)) |
|
elif mark == '_': |
|
n_bond = '1.0' |
|
core_edits_.append('{}:{}:{}:{}'.format(b,e,o_bond,n_bond)) |
|
elif mark == ';': |
|
n_bond = '2.0' |
|
core_edits_.append('{}:{}:{}:{}'.format(b,e,o_bond,n_bond)) |
|
elif mark == '^': |
|
n_bond = '3.0' |
|
core_edits_.append('{}:{}:{}:{}'.format(b,e,o_bond,n_bond)) |
|
|
|
elif mark == '&': |
|
stereo_edits_.append('{}:{}:{}:{}'.format(b,e,0,'a')) |
|
elif mark == '{': |
|
stereo_edits_.append('{}:{}:{}:{}'.format(b,e,0,'e')) |
|
elif mark == '}': |
|
stereo_edits_.append('{}:{}:{}:{}'.format(b,e,0,'z')) |
|
|
|
|
|
elif mark == '。': |
|
n_bond = '2.0' |
|
core_edits_.append('{}:{}:{}:{}'.format(b,e,o_bond,n_bond)) |
|
stereo_edits_.append('{}:{}:{}:{}'.format(b,e,0,'a')) |
|
elif mark == '《': |
|
n_bond = '2.0' |
|
core_edits_.append('{}:{}:{}:{}'.format(b,e,o_bond,n_bond)) |
|
stereo_edits_.append('{}:{}:{}:{}'.format(b,e,0,'e')) |
|
elif mark == '》': |
|
n_bond = '2.0' |
|
core_edits_.append('{}:{}:{}:{}'.format(b,e,o_bond,n_bond)) |
|
stereo_edits_.append('{}:{}:{}:{}'.format(b,e,0,'z')) |
|
|
|
|
|
|
|
core_edits_add_atom_lis = [] |
|
|
|
for atom in mol.GetAtoms(): |
|
Isotope = atom.GetIsotope() |
|
g_w = Isotope % 10 |
|
s_w = Isotope % 100 //10 |
|
b_w = Isotope // 100 |
|
|
|
if g_w == 9: |
|
core_edits_.append('{}:{}:{}:{}'.format(atom.GetIdx()+1,0,'1.0','0.0')) |
|
else: |
|
pass |
|
|
|
|
|
if s_w == 1: |
|
chai_edits_.append('{}:{}:{}:{}'.format(atom.GetIdx()+1,0,'0','R')) |
|
elif s_w == 2: |
|
chai_edits_.append('{}:{}:{}:{}'.format(atom.GetIdx()+1,0,'0','S')) |
|
elif s_w == 3: |
|
chai_edits_.append('{}:{}:{}:{}'.format(atom.GetIdx()+1,0,'0','?')) |
|
|
|
|
|
if b_w == 2 or b_w == 3: |
|
charge_edits_.append('{}:{}:{}:{}'.format(atom.GetIdx()+1,0,'0',1)) |
|
elif b_w == 4 or b_w == 5: |
|
charge_edits_.append('{}:{}:{}:{}'.format(atom.GetIdx()+1,0,'0',0)) |
|
elif b_w == 6 or b_w == 7: |
|
charge_edits_.append('{}:{}:{}:{}'.format(atom.GetIdx()+1,0,'0',-1)) |
|
|
|
|
|
if b_w % 2 == 1: |
|
core_edits_add_atom_lis.append(atom.GetIdx()+1) |
|
|
|
|
|
if core_edits_add_atom_lis != []: |
|
core_edits_add_.append('{}:{}:{}:{}'.format(core_edits_add_atom_lis[0],core_edits_add_atom_lis[1],'0.0','1.0')) |
|
else: |
|
pass |
|
|
|
if sorted(core_edits_) != sorted(core_edits) or sorted(chai_edits_) != sorted(chai_edits) or sorted(stereo_edits_) != sorted(stereo_edits) or sorted(charge_edits_) != sorted(charge_edits) or sorted(core_edits_add_) != sorted(core_edits_add): |
|
print(core_edits_,core_edits) |
|
print(chai_edits_,chai_edits) |
|
print(core_edits_add_,core_edits_add) |
|
return 'error' |
|
else: |
|
return caption |
|
pass |
|
|
|
|
|
def get_b_smiles_forward(p_b): |
|
o_smiles = p_b[0] |
|
core_edits = p_b[1] |
|
chai_edits = p_b[2] |
|
stereo_edits = p_b[3] |
|
charge_edits = p_b[4] |
|
core_edits_add = p_b[5] |
|
atom_idx_mark_dic = {} |
|
|
|
|
|
for edit in core_edits: |
|
b = int(edit.split(':')[0]) |
|
e = int(edit.split(':')[1]) |
|
new_b = edit.split(':')[3] |
|
if min([b,e]) == 0: |
|
atom_map = max([b,e]) |
|
if new_b == '0.0': |
|
atom_idx_mark_dic[atom_map] = 9 |
|
else: |
|
pass |
|
|
|
|
|
for edit in chai_edits: |
|
|
|
edit_l = edit.split(':') |
|
if edit_l[3] == 'R': |
|
if int(edit_l[0]) not in atom_idx_mark_dic.keys(): |
|
atom_idx_mark_dic[int(edit_l[0])] = 10 |
|
else: |
|
atom_idx_mark_dic[int(edit_l[0])] = 10 + atom_idx_mark_dic[int(edit_l[0])] |
|
elif edit_l[3] == 'S': |
|
if int(edit_l[0]) not in atom_idx_mark_dic.keys(): |
|
atom_idx_mark_dic[int(edit_l[0])] = 20 |
|
else: |
|
atom_idx_mark_dic[int(edit_l[0])] = 20 + atom_idx_mark_dic[int(edit_l[0])] |
|
elif edit_l[3] == '?': |
|
if int(edit_l[0]) not in atom_idx_mark_dic.keys(): |
|
atom_idx_mark_dic[int(edit_l[0])] = 30 |
|
else: |
|
atom_idx_mark_dic[int(edit_l[0])] = 30 + atom_idx_mark_dic[int(edit_l[0])] |
|
|
|
|
|
for edit in charge_edits: |
|
|
|
edit_l = edit.split(':') |
|
if edit_l[3] == '1': |
|
if int(edit_l[0]) not in atom_idx_mark_dic.keys(): |
|
atom_idx_mark_dic[int(edit_l[0])] = 200 |
|
else: |
|
atom_idx_mark_dic[int(edit_l[0])] = 200 + atom_idx_mark_dic[int(edit_l[0])] |
|
pass |
|
|
|
elif edit_l[3] == '0': |
|
if int(edit_l[0]) not in atom_idx_mark_dic.keys(): |
|
atom_idx_mark_dic[int(edit_l[0])] = 400 |
|
else: |
|
atom_idx_mark_dic[int(edit_l[0])] = 400 + atom_idx_mark_dic[int(edit_l[0])] |
|
|
|
|
|
elif edit_l[3] == '-1': |
|
if int(edit_l[0]) not in atom_idx_mark_dic.keys(): |
|
atom_idx_mark_dic[int(edit_l[0])] = 600 |
|
else: |
|
atom_idx_mark_dic[int(edit_l[0])] = 600 + atom_idx_mark_dic[int(edit_l[0])] |
|
|
|
|
|
for edit in core_edits_add: |
|
edit_l = edit.split(':') |
|
|
|
if int(edit_l[0]) not in atom_idx_mark_dic.keys(): |
|
atom_idx_mark_dic[int(edit_l[0])] = 100 |
|
else: |
|
atom_idx_mark_dic[int(edit_l[0])] = 100 + atom_idx_mark_dic[int(edit_l[0])] |
|
|
|
|
|
if int(edit_l[1]) not in atom_idx_mark_dic.keys(): |
|
atom_idx_mark_dic[int(edit_l[1])] = 100 |
|
else: |
|
atom_idx_mark_dic[int(edit_l[1])] = 100 + atom_idx_mark_dic[int(edit_l[1])] |
|
|
|
|
|
a = Chem.MolFromSmiles(o_smiles,sanitize = False) |
|
|
|
for atom in a.GetAtoms(): |
|
if atom.GetAtomMapNum() in atom_idx_mark_dic.keys(): |
|
atom_map = atom.GetAtomMapNum() |
|
atom.SetIsotope(atom_idx_mark_dic[atom_map]) |
|
else: |
|
pass |
|
atom.SetAtomMapNum(0) |
|
|
|
mol = copy.deepcopy(a) |
|
|
|
|
|
detailed_smiles = Chem.MolToSmiles(mol,canonical = False,allBondsExplicit = True,kekuleSmiles=True) |
|
|
|
|
|
concise_smiles = Chem.MolToSmiles(mol,canonical = False,kekuleSmiles=True) |
|
concise_smiles_no_chirality = Chem.MolToSmiles(mol,canonical = False,isomericSmiles = False,kekuleSmiles=True) |
|
atom_pair_bond_idx_dic = get_atom_pair_bond_idx_dic(concise_smiles_no_chirality) |
|
rm_token_lis = get_rm_token_lis(concise_smiles,detailed_smiles) |
|
bond_token_lis = get_bond_token_lis(detailed_smiles) |
|
bond_token_idx_dic = get_bond_token_idx_dic(bond_token_lis) |
|
|
|
|
|
bond_idx_mark_dic = {} |
|
for edit in core_edits: |
|
|
|
b = int(edit.split(':')[0]) |
|
e = int(edit.split(':')[1]) |
|
org_b = edit.split(':')[2] |
|
new_b = edit.split(':')[3] |
|
if min([b,e]) != 0: |
|
bond_idx = atom_pair_bond_idx_dic[min([b,e]),max([b,e])] |
|
if new_b == '0.0': |
|
mark = '!' |
|
elif new_b == '1.0': |
|
mark = '_' |
|
elif new_b == '2.0': |
|
mark = ';' |
|
elif new_b == '3.0': |
|
mark = '^' |
|
bond_idx_mark_dic[bond_idx] = mark |
|
else: |
|
pass |
|
|
|
for edit in stereo_edits: |
|
|
|
b = int(edit.split(':')[0]) |
|
e = int(edit.split(':')[1]) |
|
new_b = edit.split(':')[3] |
|
if min([b,e]) != 0: |
|
bond_idx = atom_pair_bond_idx_dic[min([b,e]),max([b,e])] |
|
if bond_idx not in bond_idx_mark_dic.keys(): |
|
|
|
if new_b == 'a': |
|
mark = '&' |
|
elif new_b == 'e': |
|
mark = '{' |
|
elif new_b == 'z': |
|
mark = '}' |
|
bond_idx_mark_dic[bond_idx] = mark |
|
else: |
|
bond_idx in bond_idx_mark_dic.keys() |
|
if new_b == 'a': |
|
mark = '。' |
|
elif new_b == 'e': |
|
mark = '《' |
|
elif new_b == 'z': |
|
mark = '》' |
|
bond_idx_mark_dic[bond_idx] = mark |
|
else: |
|
pass |
|
|
|
|
|
for bond_idx,mark in bond_idx_mark_dic.items(): |
|
token_idx = bond_token_idx_dic[bond_idx] |
|
rm_token_lis[token_idx] = mark |
|
|
|
new_smiles_lis = [] |
|
for i in range(len(rm_token_lis)): |
|
if rm_token_lis[i] == ' ': |
|
new_smiles_lis.append(detailed_smiles[i]) |
|
elif rm_token_lis[i][-1] in ['!','_',';','^','&','{','}','。','《','》']: |
|
new_smiles_lis.append(rm_token_lis[i]) |
|
else: |
|
pass |
|
|
|
return ''.join(new_smiles_lis) |
|
|
|
|
|
def get_b_smiles_backward(caption_r,o_smiles): |
|
|
|
t_smiles = get_t_smiles(caption_r,o_smiles) |
|
b_smiles,detailed_smiles = get_b_smiles_detailed_smiles(caption_r,t_smiles) |
|
bond_dic = get_bond_dic(b_smiles,detailed_smiles) |
|
|
|
|
|
atom_pair_bond_idx = {} |
|
for atom_pair,bond_idx in get_atom_pair_bond_idx_dic(o_smiles).items(): |
|
atom_pair_bond_idx[bond_idx] = atom_pair |
|
|
|
|
|
|
|
mol = Chem.MolFromSmiles(t_smiles) |
|
Chem.Kekulize(mol) |
|
core_edits_ = [] |
|
chai_edits_ = [] |
|
stereo_edits_ = [] |
|
charge_edits_ = [] |
|
core_edits_add_ = [] |
|
|
|
for bond_idx,mark in bond_dic.items(): |
|
b,e = atom_pair_bond_idx[bond_idx] |
|
o_bond = mol.GetBondBetweenAtoms(b-1,e-1).GetBondTypeAsDouble() |
|
if mark == '!': |
|
n_bond = '0.0' |
|
core_edits_.append('{}:{}:{}:{}'.format(b,e,o_bond,n_bond)) |
|
elif mark == '_': |
|
n_bond = '1.0' |
|
core_edits_.append('{}:{}:{}:{}'.format(b,e,o_bond,n_bond)) |
|
elif mark == ';': |
|
n_bond = '2.0' |
|
core_edits_.append('{}:{}:{}:{}'.format(b,e,o_bond,n_bond)) |
|
elif mark == '^': |
|
n_bond = '3.0' |
|
core_edits_.append('{}:{}:{}:{}'.format(b,e,o_bond,n_bond)) |
|
|
|
elif mark == '&': |
|
stereo_edits_.append('{}:{}:{}:{}'.format(b,e,0,'a')) |
|
elif mark == '{': |
|
stereo_edits_.append('{}:{}:{}:{}'.format(b,e,0,'e')) |
|
elif mark == '}': |
|
stereo_edits_.append('{}:{}:{}:{}'.format(b,e,0,'z')) |
|
|
|
|
|
elif mark == '。': |
|
n_bond = '2.0' |
|
core_edits_.append('{}:{}:{}:{}'.format(b,e,o_bond,n_bond)) |
|
stereo_edits_.append('{}:{}:{}:{}'.format(b,e,0,'a')) |
|
elif mark == '《': |
|
n_bond = '2.0' |
|
core_edits_.append('{}:{}:{}:{}'.format(b,e,o_bond,n_bond)) |
|
stereo_edits_.append('{}:{}:{}:{}'.format(b,e,0,'e')) |
|
elif mark == '》': |
|
n_bond = '2.0' |
|
core_edits_.append('{}:{}:{}:{}'.format(b,e,o_bond,n_bond)) |
|
stereo_edits_.append('{}:{}:{}:{}'.format(b,e,0,'z')) |
|
|
|
|
|
core_edits_add_atom_lis = [] |
|
|
|
for atom in mol.GetAtoms(): |
|
Isotope = atom.GetIsotope() |
|
g_w = Isotope % 10 |
|
s_w = Isotope % 100 //10 |
|
b_w = Isotope // 100 |
|
|
|
if g_w == 9: |
|
core_edits_.append('{}:{}:{}:{}'.format(atom.GetIdx()+1,0,'1.0','0.0')) |
|
else: |
|
pass |
|
|
|
|
|
if s_w == 1: |
|
chai_edits_.append('{}:{}:{}:{}'.format(atom.GetIdx()+1,0,'0','R')) |
|
elif s_w == 2: |
|
chai_edits_.append('{}:{}:{}:{}'.format(atom.GetIdx()+1,0,'0','S')) |
|
elif s_w == 3: |
|
chai_edits_.append('{}:{}:{}:{}'.format(atom.GetIdx()+1,0,'0','?')) |
|
|
|
|
|
if b_w == 2 or b_w == 3: |
|
charge_edits_.append('{}:{}:{}:{}'.format(atom.GetIdx()+1,0,'0',1)) |
|
elif b_w == 4 or b_w == 5: |
|
charge_edits_.append('{}:{}:{}:{}'.format(atom.GetIdx()+1,0,'0',0)) |
|
elif b_w == 6 or b_w == 7: |
|
charge_edits_.append('{}:{}:{}:{}'.format(atom.GetIdx()+1,0,'0',-1)) |
|
|
|
|
|
if b_w % 2 == 1: |
|
core_edits_add_atom_lis.append(atom.GetIdx()+1) |
|
|
|
if core_edits_add_atom_lis != []: |
|
core_edits_add_.append('{}:{}:{}:{}'.format(core_edits_add_atom_lis[0],core_edits_add_atom_lis[1],'0.0','1.0')) |
|
else: |
|
pass |
|
|
|
|
|
return core_edits_,chai_edits_,stereo_edits_,charge_edits_,core_edits_add_ |
|
|
|
|
|
|
|
def get_b_smiles_check(p_b): |
|
p,core_edits,chai_edits,stereo_edits,charge_edits,core_edits_add,lg_map_lis = p_b |
|
b_smiles = get_b_smiles_forward(p_b) |
|
core_edits_,chai_edits_,stereo_edits_,charge_edits_,core_edits_add_ = get_b_smiles_backward(b_smiles,p_b[0]) |
|
if sorted(core_edits_) != sorted(core_edits) or sorted(chai_edits_) != sorted(chai_edits) or sorted(stereo_edits_) != sorted(stereo_edits) or sorted(charge_edits_) != sorted(charge_edits) or sorted(core_edits_add_) != sorted(core_edits_add): |
|
print(core_edits_,core_edits) |
|
print(chai_edits_,chai_edits) |
|
print(core_edits_add_,core_edits_add) |
|
return 'error' |
|
else: |
|
return b_smiles |
|
|
|
|
|
import re |
|
|
|
def replacenth(string, sub, wanted, n): |
|
where = [m.start() for m in re.finditer(sub, string)][n-1] |
|
before = string[:where] |
|
after = string[where:] |
|
after = after.replace(sub, wanted, 1) |
|
newString = before + after |
|
return newString |
|
|
|
|
|
def cano_smiles_map(smiles): |
|
atom_map_lis = [] |
|
mol = Chem.MolFromSmiles(smiles,sanitize = False) |
|
for atom in mol.GetAtoms(): |
|
atom_map_lis.append(atom.GetAtomMapNum()) |
|
atom.SetAtomMapNum(0) |
|
smiles = Chem.MolToSmiles(mol,canonical = False,kekuleSmiles=True) |
|
mol = Chem.MolFromSmiles(smiles,sanitize = False) |
|
for atom in mol.GetAtoms(): |
|
atom.SetAtomMapNum(atom_map_lis[atom.GetIdx()]) |
|
smiles = Chem.MolToSmiles(mol,canonical = False,kekuleSmiles=True) |
|
return smiles |
|
|
|
|
|
def get_lg_forward(core_edits,lg_map): |
|
|
|
attach_idx = [] |
|
for core_edit in core_edits: |
|
core_edit = core_edit.split(':') |
|
if float(core_edit[2])-float(core_edit[3]) > 0: |
|
attach_idx.append(int(core_edit[0])) |
|
attach_idx.append(int(core_edit[1])) |
|
|
|
attach_idx = sorted(list(set(attach_idx))) |
|
attach_idx = [i for i in attach_idx if i != 0] |
|
lg_lis = [()]*len(attach_idx) |
|
|
|
for lg,map_lis in lg_map: |
|
|
|
if len(map_lis) == 1: |
|
map_ = map_lis[0] |
|
id_ = attach_idx.index(map_) |
|
lg_lis[id_] = tuple(list(lg_lis[id_]) +[lg]) |
|
|
|
elif len(map_lis) != 1 and len(set(map_lis)) == 1: |
|
map_ = map_lis[0] |
|
id_ = attach_idx.index(map_) |
|
lg_lis[id_] = tuple(list(lg_lis[id_]) +[lg]) |
|
elif len(map_lis) != 1 and len(set(map_lis)) != 1 and lg.count(':') == 1: |
|
for map_ in map_lis: |
|
id_ = attach_idx.index(map_) |
|
lg_lis[id_] = tuple(list(lg_lis[id_]) +[lg + "*"]) |
|
elif len(map_lis) != 1 and len(set(map_lis)) != 1 and lg.count(':') == 2: |
|
|
|
if map_lis[0]<map_lis[1]: |
|
lg = replacenth(lg, ':1',':2',2) |
|
else: |
|
lg = lg.replace(':1',':2',1) |
|
for map_ in map_lis: |
|
id_ = attach_idx.index(map_) |
|
lg_lis[id_] = tuple(list(lg_lis[id_]) +[lg + "*"]) |
|
else: |
|
print('error') |
|
|
|
return [tuple(sorted(i)) for i in lg_lis] |
|
|
|
|
|
|
|
def get_lg_backward(core_edits_,lg_lis): |
|
|
|
attach_idx = [] |
|
for core_edit in core_edits_: |
|
core_edit = core_edit.split(':') |
|
if float(core_edit[2])-float(core_edit[3]) > 0: |
|
attach_idx.append(int(core_edit[0])) |
|
attach_idx.append(int(core_edit[1])) |
|
|
|
attach_idx = [i for i in attach_idx if i != 0] |
|
attach_idx = sorted(list(set(attach_idx))) |
|
|
|
lg_map_new = [] |
|
for id_,lg_ in zip(attach_idx,lg_lis): |
|
for lg in list(lg_): |
|
if lg.count(':') > 1: |
|
|
|
lg_map_new.append((lg,[id_]*lg.count(':'))) |
|
else: |
|
|
|
lg_map_new.append((lg,[id_])) |
|
|
|
|
|
dic_t = {} |
|
for i,j in lg_map_new: |
|
if '*' in i: |
|
dic_t.setdefault(i,[]).append(j[0]) |
|
else: |
|
pass |
|
|
|
|
|
lg_map_new_k =[] |
|
for i,j in lg_map_new: |
|
if '*' not in i: |
|
lg_map_new_k.append((i,j)) |
|
|
|
else: |
|
pass |
|
|
|
|
|
for i,j in dic_t.items(): |
|
if ':2' not in i: |
|
lg_map_new_k.append((i.replace('*',''),j)) |
|
elif i.index(':1') <= i.index(':2'): |
|
lg_map_new_k.append((i.replace('*','').replace(':2',':1'),j)) |
|
else: |
|
j.reverse() |
|
lg_map_new_k.append((i.replace('*','').replace(':2',':1'),j)) |
|
|
|
|
|
lg_map_new = lg_map_new_k |
|
return lg_map_new |
|
|
|
|
|
|
|
|
|
dic_str_to_num = {} |
|
for l in range(4,0,-1): |
|
for a,i in zip([0,200,400,600,100,300,500,700],['','α','β','γ','δ','αδ','βδ','γδ']): |
|
for b,j in zip([0,10,20,30],['','r','s','?']): |
|
for c,k in zip([0,9],['','~']): |
|
if len(k+j+i) == l: |
|
dic_str_to_num[k+j+i] = str(a+b+c) |
|
|
|
|
|
dic_num_to_str = {} |
|
for l in range(3,0,-1): |
|
for a,i in zip([0,200,400,600,100,300,500,700],['','α','β','γ','δ','αδ','βδ','γδ']): |
|
for b,j in zip([0,10,20,30],['','r','s','?']): |
|
for c,k in zip([0,9],['','~']): |
|
if len(str(a+b+c)) == l and len(k+j+i) != 0: |
|
dic_num_to_str[str(a+b+c)] = k+j+i |
|
|
|
|
|
|
|
def iso_to_symbo(txt,dic_num_to_str): |
|
|
|
for i,j in dic_num_to_str.items(): |
|
i = '[' + i |
|
j = '[' + j |
|
txt = txt.replace(i,j) |
|
txt = txt.replace('。',';&').replace('》',';}').replace('《',';{') |
|
return txt |
|
|
|
def symbo_to_iso(txt,dic_str_to_num): |
|
|
|
for i,j in dic_str_to_num.items(): |
|
i = '[' + i |
|
j = '[' + j |
|
txt = txt.replace(i,j) |
|
txt = txt.replace(';&','。').replace(';}','》').replace(';{','《') |
|
return txt |
|
|
|
|
|
|
|
def merge_smiles_only(text): |
|
|
|
|
|
text = symbo_to_iso(text,dic_str_to_num) |
|
o_smiles = text.split('>>>')[0] |
|
b_smiles = text.split('>>>')[1].split('<')[0] |
|
|
|
lg_lis = [] |
|
for i in re.findall(r"[<](.*?)[>]", text): |
|
if i == '': |
|
lg_lis.append(tuple()) |
|
else: |
|
lg_lis.append(tuple(i.split(','))) |
|
|
|
core_edits,chai_edits,stereo_edits,charge_edits,core_edits_add = get_b_smiles_backward(b_smiles,o_smiles) |
|
lg_map_lis = get_lg_backward(core_edits,lg_lis) |
|
|
|
p = Chem.MolFromSmiles(o_smiles,sanitize = False) |
|
for atom in p.GetAtoms(): |
|
atom.SetAtomMapNum(atom.GetIdx()+1) |
|
p = Chem.MolToSmiles(p) |
|
|
|
pre_smiles = run_get_p_b_l_backward(p,core_edits,chai_edits,stereo_edits,charge_edits,core_edits_add,lg_map_lis) |
|
|
|
return pre_smiles |
|
|
|
|
|
def merge_smiles_with_mapping_only(text): |
|
|
|
|
|
text = symbo_to_iso(text,dic_str_to_num) |
|
o_smiles = text.split('>>>')[0] |
|
b_smiles = text.split('>>>')[1].split('<')[0] |
|
|
|
lg_lis = [] |
|
for i in re.findall(r"[<](.*?)[>]", text): |
|
if i == '': |
|
lg_lis.append(tuple()) |
|
else: |
|
lg_lis.append(tuple(i.split(','))) |
|
|
|
core_edits,chai_edits,stereo_edits,charge_edits,core_edits_add = get_b_smiles_backward(b_smiles,o_smiles) |
|
lg_map_lis = get_lg_backward(core_edits,lg_lis) |
|
|
|
p = Chem.MolFromSmiles(o_smiles,sanitize = False) |
|
for atom in p.GetAtoms(): |
|
atom.SetAtomMapNum(atom.GetIdx()+1) |
|
p = Chem.MolToSmiles(p) |
|
|
|
pre_smiles = run_get_p_b_l_backward_with_mapping(p,core_edits,chai_edits,stereo_edits,charge_edits,core_edits_add,lg_map_lis) |
|
|
|
return pre_smiles |
|
|
|
|
|
|
|
def merge_smiles(text): |
|
try: |
|
return merge_smiles_only(text) |
|
except: |
|
return "" |
|
|
|
|
|
def merge_smiles_with_mapping(text): |
|
try: |
|
return merge_smiles_with_mapping_only(text) |
|
except: |
|
return "" |
|
|
|
|
|
def get_e_smiles(rxn): |
|
|
|
p_b = run_get_p_b_l_forward(rxn) |
|
b_smiles = get_b_smiles_check(p_b) |
|
lg_lis = get_lg_forward(p_b[1],p_b[6]) |
|
|
|
k = p_b |
|
b = b_smiles |
|
c = lg_lis |
|
a = Chem.MolFromSmiles(k[0],sanitize = False) |
|
|
|
for atom in a.GetAtoms(): |
|
atom.SetAtomMapNum(0) |
|
a = Chem.MolToSmiles(a,canonical = False) |
|
|
|
str_ = '' |
|
for i in c: |
|
str_ = str_ + '<{}>'.format(','.join(i)) |
|
txt = a +'>>>'+ b+str_ |
|
|
|
return iso_to_symbo(txt,dic_num_to_str) |
|
|
|
def get_e_smiles_with_check(rxn): |
|
|
|
p_b = run_get_p_b_l_check(rxn) |
|
b_smiles = get_b_smiles_check(p_b) |
|
lg_lis = get_lg_forward(p_b[1],p_b[6]) |
|
|
|
k = p_b |
|
b = b_smiles |
|
c = lg_lis |
|
a = Chem.MolFromSmiles(k[0],sanitize = False) |
|
|
|
for atom in a.GetAtoms(): |
|
atom.SetAtomMapNum(0) |
|
a = Chem.MolToSmiles(a,canonical = False) |
|
|
|
str_ = '' |
|
for i in c: |
|
str_ = str_ + '<{}>'.format(','.join(i)) |
|
txt = a +'>>>'+ b+str_ |
|
|
|
return iso_to_symbo(txt,dic_num_to_str) |
|
|
|
def get_edit_from_e_smiles(text): |
|
text = symbo_to_iso(text,dic_str_to_num) |
|
o_smiles = text.split('>>>')[0] |
|
b_smiles = text.split('>>>')[1].split('<')[0] |
|
|
|
lg_lis = [] |
|
for i in re.findall(r"[<](.*?)[>]", text): |
|
if i == '': |
|
lg_lis.append(tuple()) |
|
else: |
|
lg_lis.append(tuple(i.split(','))) |
|
|
|
core_edits,chai_edits,stereo_edits,charge_edits,core_edits_add = get_b_smiles_backward(b_smiles,o_smiles) |
|
lg_map_lis = get_lg_backward(core_edits,lg_lis) |
|
|
|
return core_edits,chai_edits,stereo_edits,charge_edits,core_edits_add,lg_map_lis |