Spaces:
Running
Running
yinuozhang
commited on
Commit
•
a9bf7b9
1
Parent(s):
a7360b6
cyclic
Browse files
app.py
CHANGED
@@ -49,57 +49,47 @@ class PeptideAnalyzer:
|
|
49 |
|
50 |
def is_cyclic(self, smiles):
|
51 |
"""
|
52 |
-
Determine if SMILES represents a cyclic peptide
|
53 |
Returns: (is_cyclic, peptide_cycles, aromatic_cycles)
|
54 |
"""
|
55 |
-
|
56 |
-
|
57 |
-
# Find all cycle numbers and their contexts
|
58 |
-
for match in re.finditer(r'(\d)', smiles):
|
59 |
-
number = match.group(1)
|
60 |
-
position = match.start(1)
|
61 |
-
|
62 |
-
if number not in cycle_info:
|
63 |
-
cycle_info[number] = []
|
64 |
-
cycle_info[number].append({
|
65 |
-
'position': position,
|
66 |
-
'full_context': smiles[max(0, position-3):min(len(smiles), position+4)]
|
67 |
-
})
|
68 |
-
|
69 |
-
# Check each cycle
|
70 |
-
peptide_cycles = []
|
71 |
aromatic_cycles = []
|
|
|
|
|
|
|
|
|
72 |
|
73 |
-
|
74 |
-
|
75 |
-
continue
|
76 |
-
|
77 |
-
start, end = occurrences[0]['position'], occurrences[1]['position']
|
78 |
-
segment = smiles[start:end+1]
|
79 |
-
|
80 |
-
# Check for aromatic rings
|
81 |
-
full_context = smiles[max(0,start-10):min(len(smiles),end+10)]
|
82 |
-
is_aromatic = ('c2ccccc2' in full_context and len(segment) < 20) or \
|
83 |
-
('c1ccccc1' in full_context and len(segment) < 20)
|
84 |
-
|
85 |
-
# Check for peptide bonds
|
86 |
-
peptide_patterns = [
|
87 |
-
'C(=O)N', # Regular peptide bond
|
88 |
-
'C(=O)N(C)', # N-methylated peptide bond
|
89 |
-
'C(=O)N1', # Cyclic peptide bond
|
90 |
-
'C(=O)N2' # Cyclic peptide bond
|
91 |
-
]
|
92 |
-
|
93 |
-
has_peptide_bond = any(pattern in segment for pattern in peptide_patterns) and \
|
94 |
-
len(segment) > 20
|
95 |
-
|
96 |
-
if is_aromatic and len(segment) < 20:
|
97 |
-
aromatic_cycles.append(number)
|
98 |
-
elif has_peptide_bond:
|
99 |
-
peptide_cycles.append(number)
|
100 |
|
101 |
-
|
102 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
103 |
|
104 |
def split_on_bonds(self, smiles):
|
105 |
"""Split SMILES into segments with simplified Pro handling"""
|
@@ -629,7 +619,7 @@ def process_input(smiles_input=None, file_obj=None, show_linear=False, show_segm
|
|
629 |
summary += f"Is Cyclic: {'Yes' if is_cyclic else 'No'}\n"
|
630 |
if is_cyclic:
|
631 |
summary += f"Peptide Cycles: {', '.join(peptide_cycles)}\n"
|
632 |
-
summary += f"Aromatic Cycles: {', '.join(aromatic_cycles)}\n"
|
633 |
|
634 |
return summary + output_text, img_cyclic, img_linear
|
635 |
|
@@ -693,7 +683,7 @@ def process_input(smiles_input=None, file_obj=None, show_linear=False, show_segm
|
|
693 |
output_text += f"Is Cyclic: {'Yes' if is_cyclic else 'No'}\n"
|
694 |
if is_cyclic:
|
695 |
output_text += f"Peptide Cycles: {', '.join(peptide_cycles)}\n"
|
696 |
-
output_text += f"Aromatic Cycles: {', '.join(aromatic_cycles)}\n"
|
697 |
output_text += "-" * 50 + "\n"
|
698 |
|
699 |
return output_text, None, None
|
|
|
49 |
|
50 |
def is_cyclic(self, smiles):
|
51 |
"""
|
52 |
+
Determine if SMILES represents a cyclic peptide by checking head-tail connection.
|
53 |
Returns: (is_cyclic, peptide_cycles, aromatic_cycles)
|
54 |
"""
|
55 |
+
# First find aromatic rings
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
56 |
aromatic_cycles = []
|
57 |
+
for match in re.finditer(r'c[12]ccccc[12]', smiles):
|
58 |
+
number = match.group(0)[1]
|
59 |
+
if number not in aromatic_cycles:
|
60 |
+
aromatic_cycles.append(str(number))
|
61 |
|
62 |
+
# Find potential cycle numbers and their contexts
|
63 |
+
cycle_closures = []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
64 |
|
65 |
+
# Look for cycle starts and corresponding ends
|
66 |
+
cycle_patterns = [
|
67 |
+
# Pattern pairs (start, end)
|
68 |
+
(r'[^\d](\d)[A-Z@]', r'C\1=O$'), # Classic C=O ending
|
69 |
+
(r'[^\d](\d)[A-Z@]', r'N\1C\(=O\)'), # N1C(=O) pattern
|
70 |
+
(r'[^\d](\d)[A-Z@]', r'N\1C$'), # Simple N1C ending
|
71 |
+
(r'[^\d](\d)C\(=O\)', r'N\1[A-Z]'), # Reverse connection
|
72 |
+
(r'H(\d)', r'N\1C'), # H1...N1C pattern
|
73 |
+
(r'[^\d](\d)(?:C|N|O)', r'(?:C|N)\1(?:\(|$)'), # Generic cycle closure
|
74 |
+
]
|
75 |
+
|
76 |
+
for start_pat, end_pat in cycle_patterns:
|
77 |
+
start_matches = re.finditer(start_pat, smiles)
|
78 |
+
for start_match in start_matches:
|
79 |
+
number = start_match.group(1)
|
80 |
+
if number not in aromatic_cycles: # Skip aromatic ring numbers
|
81 |
+
# Look for corresponding end pattern
|
82 |
+
end_match = re.search(end_pat.replace('\\1', number), smiles)
|
83 |
+
if end_match and end_match.start() > start_match.start():
|
84 |
+
cycle_closures.append(number)
|
85 |
+
break
|
86 |
+
|
87 |
+
# Remove duplicates and aromatic numbers
|
88 |
+
peptide_cycles = list(set(cycle_closures) - set(aromatic_cycles))
|
89 |
+
|
90 |
+
is_cyclic = len(peptide_cycles) > 0
|
91 |
+
|
92 |
+
return is_cyclic, peptide_cycles, aromatic_cycles
|
93 |
|
94 |
def split_on_bonds(self, smiles):
|
95 |
"""Split SMILES into segments with simplified Pro handling"""
|
|
|
619 |
summary += f"Is Cyclic: {'Yes' if is_cyclic else 'No'}\n"
|
620 |
if is_cyclic:
|
621 |
summary += f"Peptide Cycles: {', '.join(peptide_cycles)}\n"
|
622 |
+
#summary += f"Aromatic Cycles: {', '.join(aromatic_cycles)}\n"
|
623 |
|
624 |
return summary + output_text, img_cyclic, img_linear
|
625 |
|
|
|
683 |
output_text += f"Is Cyclic: {'Yes' if is_cyclic else 'No'}\n"
|
684 |
if is_cyclic:
|
685 |
output_text += f"Peptide Cycles: {', '.join(peptide_cycles)}\n"
|
686 |
+
#output_text += f"Aromatic Cycles: {', '.join(aromatic_cycles)}\n"
|
687 |
output_text += "-" * 50 + "\n"
|
688 |
|
689 |
return output_text, None, None
|