yinuozhang commited on
Commit
6a8393c
1 Parent(s): a9bf7b9

improve bond recognition and glycine recognition

Browse files
Files changed (1) hide show
  1. app.py +17 -25
app.py CHANGED
@@ -17,11 +17,12 @@ from rdkit import Chem
17
  class PeptideAnalyzer:
18
  def __init__(self):
19
  self.bond_patterns = [
20
- r'OC\(=O\)', # ester bond
21
- r'N\(C\)C\(=O\)', # N-methylated peptide bond
22
- r'N[12]?C\(=O\)', # peptide bond (including Pro N1/N2)
23
- r'C\(=O\)N\(C\)', # N-methylated peptide bond reverse
24
- r'C\(=O\)N' # peptide bond reverse
 
25
  ]
26
 
27
  def is_peptide(self, smiles):
@@ -39,12 +40,7 @@ class PeptideAnalyzer:
39
  n_methyl_pattern = Chem.MolFromSmarts('[N;H0;$(NC)](C)[C](=O)')
40
  if mol.HasSubstructMatch(n_methyl_pattern):
41
  return True
42
-
43
- # Look for ester bonds in cyclic depsipeptides: OC(=O) pattern
44
- ester_bond_pattern = Chem.MolFromSmarts('O[C](=O)')
45
- if mol.HasSubstructMatch(ester_bond_pattern):
46
- return True
47
-
48
  return False
49
 
50
  def is_cyclic(self, smiles):
@@ -107,18 +103,8 @@ class PeptideAnalyzer:
107
  'pattern': match.group()
108
  })
109
  used.update(range(match.start(), match.end()))
110
-
111
- # Then find all bonds, including N2C(=O)
112
- bond_patterns = [
113
- (r'OC\(=O\)', 'ester'),
114
- (r'N\(C\)C\(=O\)', 'n_methyl'),
115
- (r'N[12]C\(=O\)', 'peptide'), # Pro peptide bonds
116
- (r'NC\(=O\)', 'peptide'), # Regular peptide bonds
117
- (r'C\(=O\)N\(C\)', 'n_methyl'),
118
- (r'C\(=O\)N[12]?', 'peptide')
119
- ]
120
 
121
- for pattern, bond_type in bond_patterns:
122
  for match in re.finditer(pattern, smiles):
123
  if not any(p in range(match.start(), match.end()) for p in used):
124
  positions.append({
@@ -216,8 +202,14 @@ class PeptideAnalyzer:
216
  return '4F-Phe', mods
217
 
218
  # Regular residue identification
219
- if 'NCC(=O)' in content:
220
- return 'Gly', mods
 
 
 
 
 
 
221
 
222
  if 'CC(C)C[C@H]' in content or 'CC(C)C[C@@H]' in content:
223
  return 'Leu', mods
@@ -694,7 +686,7 @@ def process_input(smiles_input=None, file_obj=None, show_linear=False, show_segm
694
  return "No input provided.", None, None
695
 
696
  iface = gr.Interface(
697
- fn=process_input, # Your processing function
698
  inputs=[
699
  gr.Textbox(
700
  label="Enter SMILES string",
 
17
  class PeptideAnalyzer:
18
  def __init__(self):
19
  self.bond_patterns = [
20
+ (r'OC\(=O\)', 'ester'), # Ester bond
21
+ (r'N\(C\)C\(=O\)', 'n_methyl'), # N-methylated peptide bond
22
+ (r'N[12]C\(=O\)', 'proline'), # Proline peptide bond
23
+ (r'NC\(=O\)', 'peptide'), # Standard peptide bond
24
+ (r'C\(=O\)N\(C\)', 'n_methyl_reverse'), # Reverse N-methylated
25
+ (r'C\(=O\)N[12]?', 'peptide_reverse') # Reverse peptide bond
26
  ]
27
 
28
  def is_peptide(self, smiles):
 
40
  n_methyl_pattern = Chem.MolFromSmarts('[N;H0;$(NC)](C)[C](=O)')
41
  if mol.HasSubstructMatch(n_methyl_pattern):
42
  return True
43
+
 
 
 
 
 
44
  return False
45
 
46
  def is_cyclic(self, smiles):
 
103
  'pattern': match.group()
104
  })
105
  used.update(range(match.start(), match.end()))
 
 
 
 
 
 
 
 
 
 
106
 
107
+ for pattern, bond_type in self.bond_patterns:
108
  for match in re.finditer(pattern, smiles):
109
  if not any(p in range(match.start(), match.end()) for p in used):
110
  positions.append({
 
202
  return '4F-Phe', mods
203
 
204
  # Regular residue identification
205
+ if ('NCC(=O)' in content) or (content == 'C'):
206
+ # Middle case - between bonds
207
+ if segment.get('bond_before') and segment.get('bond_after'):
208
+ if ('C(=O)N' in segment['bond_before'] or 'C(=O)N(C)' in segment['bond_before']):
209
+ return 'Gly', mods
210
+ # Terminal case - at the end
211
+ elif segment.get('bond_before') and segment.get('bond_before').startswith('C(=O)N'):
212
+ return 'Gly', mods
213
 
214
  if 'CC(C)C[C@H]' in content or 'CC(C)C[C@@H]' in content:
215
  return 'Leu', mods
 
686
  return "No input provided.", None, None
687
 
688
  iface = gr.Interface(
689
+ fn=process_input,
690
  inputs=[
691
  gr.Textbox(
692
  label="Enter SMILES string",