yinuozhang commited on
Commit
a9bf7b9
1 Parent(s): a7360b6
Files changed (1) hide show
  1. app.py +38 -48
app.py CHANGED
@@ -49,57 +49,47 @@ class PeptideAnalyzer:
49
 
50
  def is_cyclic(self, smiles):
51
  """
52
- Determine if SMILES represents a cyclic peptide
53
  Returns: (is_cyclic, peptide_cycles, aromatic_cycles)
54
  """
55
- cycle_info = {}
56
-
57
- # Find all cycle numbers and their contexts
58
- for match in re.finditer(r'(\d)', smiles):
59
- number = match.group(1)
60
- position = match.start(1)
61
-
62
- if number not in cycle_info:
63
- cycle_info[number] = []
64
- cycle_info[number].append({
65
- 'position': position,
66
- 'full_context': smiles[max(0, position-3):min(len(smiles), position+4)]
67
- })
68
-
69
- # Check each cycle
70
- peptide_cycles = []
71
  aromatic_cycles = []
 
 
 
 
72
 
73
- for number, occurrences in cycle_info.items():
74
- if len(occurrences) != 2:
75
- continue
76
-
77
- start, end = occurrences[0]['position'], occurrences[1]['position']
78
- segment = smiles[start:end+1]
79
-
80
- # Check for aromatic rings
81
- full_context = smiles[max(0,start-10):min(len(smiles),end+10)]
82
- is_aromatic = ('c2ccccc2' in full_context and len(segment) < 20) or \
83
- ('c1ccccc1' in full_context and len(segment) < 20)
84
-
85
- # Check for peptide bonds
86
- peptide_patterns = [
87
- 'C(=O)N', # Regular peptide bond
88
- 'C(=O)N(C)', # N-methylated peptide bond
89
- 'C(=O)N1', # Cyclic peptide bond
90
- 'C(=O)N2' # Cyclic peptide bond
91
- ]
92
-
93
- has_peptide_bond = any(pattern in segment for pattern in peptide_patterns) and \
94
- len(segment) > 20
95
-
96
- if is_aromatic and len(segment) < 20:
97
- aromatic_cycles.append(number)
98
- elif has_peptide_bond:
99
- peptide_cycles.append(number)
100
 
101
- return len(peptide_cycles) > 0, peptide_cycles, aromatic_cycles
102
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
103
 
104
  def split_on_bonds(self, smiles):
105
  """Split SMILES into segments with simplified Pro handling"""
@@ -629,7 +619,7 @@ def process_input(smiles_input=None, file_obj=None, show_linear=False, show_segm
629
  summary += f"Is Cyclic: {'Yes' if is_cyclic else 'No'}\n"
630
  if is_cyclic:
631
  summary += f"Peptide Cycles: {', '.join(peptide_cycles)}\n"
632
- summary += f"Aromatic Cycles: {', '.join(aromatic_cycles)}\n"
633
 
634
  return summary + output_text, img_cyclic, img_linear
635
 
@@ -693,7 +683,7 @@ def process_input(smiles_input=None, file_obj=None, show_linear=False, show_segm
693
  output_text += f"Is Cyclic: {'Yes' if is_cyclic else 'No'}\n"
694
  if is_cyclic:
695
  output_text += f"Peptide Cycles: {', '.join(peptide_cycles)}\n"
696
- output_text += f"Aromatic Cycles: {', '.join(aromatic_cycles)}\n"
697
  output_text += "-" * 50 + "\n"
698
 
699
  return output_text, None, None
 
49
 
50
  def is_cyclic(self, smiles):
51
  """
52
+ Determine if SMILES represents a cyclic peptide by checking head-tail connection.
53
  Returns: (is_cyclic, peptide_cycles, aromatic_cycles)
54
  """
55
+ # First find aromatic rings
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
  aromatic_cycles = []
57
+ for match in re.finditer(r'c[12]ccccc[12]', smiles):
58
+ number = match.group(0)[1]
59
+ if number not in aromatic_cycles:
60
+ aromatic_cycles.append(str(number))
61
 
62
+ # Find potential cycle numbers and their contexts
63
+ cycle_closures = []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
 
65
+ # Look for cycle starts and corresponding ends
66
+ cycle_patterns = [
67
+ # Pattern pairs (start, end)
68
+ (r'[^\d](\d)[A-Z@]', r'C\1=O$'), # Classic C=O ending
69
+ (r'[^\d](\d)[A-Z@]', r'N\1C\(=O\)'), # N1C(=O) pattern
70
+ (r'[^\d](\d)[A-Z@]', r'N\1C$'), # Simple N1C ending
71
+ (r'[^\d](\d)C\(=O\)', r'N\1[A-Z]'), # Reverse connection
72
+ (r'H(\d)', r'N\1C'), # H1...N1C pattern
73
+ (r'[^\d](\d)(?:C|N|O)', r'(?:C|N)\1(?:\(|$)'), # Generic cycle closure
74
+ ]
75
+
76
+ for start_pat, end_pat in cycle_patterns:
77
+ start_matches = re.finditer(start_pat, smiles)
78
+ for start_match in start_matches:
79
+ number = start_match.group(1)
80
+ if number not in aromatic_cycles: # Skip aromatic ring numbers
81
+ # Look for corresponding end pattern
82
+ end_match = re.search(end_pat.replace('\\1', number), smiles)
83
+ if end_match and end_match.start() > start_match.start():
84
+ cycle_closures.append(number)
85
+ break
86
+
87
+ # Remove duplicates and aromatic numbers
88
+ peptide_cycles = list(set(cycle_closures) - set(aromatic_cycles))
89
+
90
+ is_cyclic = len(peptide_cycles) > 0
91
+
92
+ return is_cyclic, peptide_cycles, aromatic_cycles
93
 
94
  def split_on_bonds(self, smiles):
95
  """Split SMILES into segments with simplified Pro handling"""
 
619
  summary += f"Is Cyclic: {'Yes' if is_cyclic else 'No'}\n"
620
  if is_cyclic:
621
  summary += f"Peptide Cycles: {', '.join(peptide_cycles)}\n"
622
+ #summary += f"Aromatic Cycles: {', '.join(aromatic_cycles)}\n"
623
 
624
  return summary + output_text, img_cyclic, img_linear
625
 
 
683
  output_text += f"Is Cyclic: {'Yes' if is_cyclic else 'No'}\n"
684
  if is_cyclic:
685
  output_text += f"Peptide Cycles: {', '.join(peptide_cycles)}\n"
686
+ #output_text += f"Aromatic Cycles: {', '.join(aromatic_cycles)}\n"
687
  output_text += "-" * 50 + "\n"
688
 
689
  return output_text, None, None