Spaces:
Running
Running
yinuozhang
commited on
Commit
•
6a8393c
1
Parent(s):
a9bf7b9
improve bond recognition and glycine recognition
Browse files
app.py
CHANGED
@@ -17,11 +17,12 @@ from rdkit import Chem
|
|
17 |
class PeptideAnalyzer:
|
18 |
def __init__(self):
|
19 |
self.bond_patterns = [
|
20 |
-
r'OC\(=O\)', #
|
21 |
-
r'N\(C\)C\(=O\)', # N-methylated peptide bond
|
22 |
-
r'N[12]
|
23 |
-
r'
|
24 |
-
r'C\(=O\)N' #
|
|
|
25 |
]
|
26 |
|
27 |
def is_peptide(self, smiles):
|
@@ -39,12 +40,7 @@ class PeptideAnalyzer:
|
|
39 |
n_methyl_pattern = Chem.MolFromSmarts('[N;H0;$(NC)](C)[C](=O)')
|
40 |
if mol.HasSubstructMatch(n_methyl_pattern):
|
41 |
return True
|
42 |
-
|
43 |
-
# Look for ester bonds in cyclic depsipeptides: OC(=O) pattern
|
44 |
-
ester_bond_pattern = Chem.MolFromSmarts('O[C](=O)')
|
45 |
-
if mol.HasSubstructMatch(ester_bond_pattern):
|
46 |
-
return True
|
47 |
-
|
48 |
return False
|
49 |
|
50 |
def is_cyclic(self, smiles):
|
@@ -107,18 +103,8 @@ class PeptideAnalyzer:
|
|
107 |
'pattern': match.group()
|
108 |
})
|
109 |
used.update(range(match.start(), match.end()))
|
110 |
-
|
111 |
-
# Then find all bonds, including N2C(=O)
|
112 |
-
bond_patterns = [
|
113 |
-
(r'OC\(=O\)', 'ester'),
|
114 |
-
(r'N\(C\)C\(=O\)', 'n_methyl'),
|
115 |
-
(r'N[12]C\(=O\)', 'peptide'), # Pro peptide bonds
|
116 |
-
(r'NC\(=O\)', 'peptide'), # Regular peptide bonds
|
117 |
-
(r'C\(=O\)N\(C\)', 'n_methyl'),
|
118 |
-
(r'C\(=O\)N[12]?', 'peptide')
|
119 |
-
]
|
120 |
|
121 |
-
for pattern, bond_type in bond_patterns:
|
122 |
for match in re.finditer(pattern, smiles):
|
123 |
if not any(p in range(match.start(), match.end()) for p in used):
|
124 |
positions.append({
|
@@ -216,8 +202,14 @@ class PeptideAnalyzer:
|
|
216 |
return '4F-Phe', mods
|
217 |
|
218 |
# Regular residue identification
|
219 |
-
if 'NCC(=O)' in content:
|
220 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
221 |
|
222 |
if 'CC(C)C[C@H]' in content or 'CC(C)C[C@@H]' in content:
|
223 |
return 'Leu', mods
|
@@ -694,7 +686,7 @@ def process_input(smiles_input=None, file_obj=None, show_linear=False, show_segm
|
|
694 |
return "No input provided.", None, None
|
695 |
|
696 |
iface = gr.Interface(
|
697 |
-
fn=process_input,
|
698 |
inputs=[
|
699 |
gr.Textbox(
|
700 |
label="Enter SMILES string",
|
|
|
17 |
class PeptideAnalyzer:
|
18 |
def __init__(self):
|
19 |
self.bond_patterns = [
|
20 |
+
(r'OC\(=O\)', 'ester'), # Ester bond
|
21 |
+
(r'N\(C\)C\(=O\)', 'n_methyl'), # N-methylated peptide bond
|
22 |
+
(r'N[12]C\(=O\)', 'proline'), # Proline peptide bond
|
23 |
+
(r'NC\(=O\)', 'peptide'), # Standard peptide bond
|
24 |
+
(r'C\(=O\)N\(C\)', 'n_methyl_reverse'), # Reverse N-methylated
|
25 |
+
(r'C\(=O\)N[12]?', 'peptide_reverse') # Reverse peptide bond
|
26 |
]
|
27 |
|
28 |
def is_peptide(self, smiles):
|
|
|
40 |
n_methyl_pattern = Chem.MolFromSmarts('[N;H0;$(NC)](C)[C](=O)')
|
41 |
if mol.HasSubstructMatch(n_methyl_pattern):
|
42 |
return True
|
43 |
+
|
|
|
|
|
|
|
|
|
|
|
44 |
return False
|
45 |
|
46 |
def is_cyclic(self, smiles):
|
|
|
103 |
'pattern': match.group()
|
104 |
})
|
105 |
used.update(range(match.start(), match.end()))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
106 |
|
107 |
+
for pattern, bond_type in self.bond_patterns:
|
108 |
for match in re.finditer(pattern, smiles):
|
109 |
if not any(p in range(match.start(), match.end()) for p in used):
|
110 |
positions.append({
|
|
|
202 |
return '4F-Phe', mods
|
203 |
|
204 |
# Regular residue identification
|
205 |
+
if ('NCC(=O)' in content) or (content == 'C'):
|
206 |
+
# Middle case - between bonds
|
207 |
+
if segment.get('bond_before') and segment.get('bond_after'):
|
208 |
+
if ('C(=O)N' in segment['bond_before'] or 'C(=O)N(C)' in segment['bond_before']):
|
209 |
+
return 'Gly', mods
|
210 |
+
# Terminal case - at the end
|
211 |
+
elif segment.get('bond_before') and segment.get('bond_before').startswith('C(=O)N'):
|
212 |
+
return 'Gly', mods
|
213 |
|
214 |
if 'CC(C)C[C@H]' in content or 'CC(C)C[C@@H]' in content:
|
215 |
return 'Leu', mods
|
|
|
686 |
return "No input provided.", None, None
|
687 |
|
688 |
iface = gr.Interface(
|
689 |
+
fn=process_input,
|
690 |
inputs=[
|
691 |
gr.Textbox(
|
692 |
label="Enter SMILES string",
|