NiniCat LfOreVEr commited on
Commit
f90af69
1 Parent(s): fca5575

Update cas12.py (#5)

Browse files

- Update cas12.py (a5a7472cbc5fea41f3f151cb51ef3137c6c66ea7)


Co-authored-by: Qingyang Liu <LfOreVEr@users.noreply.huggingface.co>

Files changed (1) hide show
  1. cas12.py +11 -7
cas12.py CHANGED
@@ -87,20 +87,23 @@ def fetch_ensembl_sequence(transcript_id):
87
  print(f"Error fetching sequence data from Ensembl: {response.text}")
88
  return None
89
 
90
- def find_crispr_targets(sequence, chr, start, strand, transcript_id, exon_id, pam="TTTN", target_length=34):
91
  targets = []
92
  len_sequence = len(sequence)
93
  complement = {'A': 'T', 'T': 'A', 'C': 'G', 'G': 'C'}
94
  dnatorna = {'A': 'A', 'T': 'U', 'C': 'C', 'G': 'G'}
95
 
96
- if strand == -1:
97
- sequence = ''.join([complement[base] for base in sequence])
98
-
99
  for i in range(len_sequence - target_length + 1):
100
  target_seq = sequence[i:i + target_length]
101
  if target_seq[4:7] == 'TTT':
102
- tar_start = start + i
103
- tar_end = start + i + target_length
 
 
 
 
 
 
104
  gRNA = ''.join([dnatorna[base] for base in target_seq[8:28]])
105
  targets.append([target_seq, gRNA, chr, str(tar_start), str(tar_end), str(strand), transcript_id, exon_id])
106
  return targets
@@ -142,9 +145,10 @@ def process_gene(gene_symbol, model_path):
142
  gene_sequence = fetch_ensembl_sequence(exon_id)
143
  if gene_sequence:
144
  start = Exons[j]['start']
 
145
  strand = Exons[j]['strand']
146
  chr = Exons[j]['seq_region_name']
147
- targets = find_crispr_targets(gene_sequence, chr, start, strand, transcript_id, exon_id)
148
  if targets:
149
  formatted_data = format_prediction_output(targets, model_path)
150
  results.append(formatted_data)
 
87
  print(f"Error fetching sequence data from Ensembl: {response.text}")
88
  return None
89
 
90
+ def find_crispr_targets(sequence, chr, start, end, strand, transcript_id, exon_id, pam="TTTN", target_length=34):
91
  targets = []
92
  len_sequence = len(sequence)
93
  complement = {'A': 'T', 'T': 'A', 'C': 'G', 'G': 'C'}
94
  dnatorna = {'A': 'A', 'T': 'U', 'C': 'C', 'G': 'G'}
95
 
 
 
 
96
  for i in range(len_sequence - target_length + 1):
97
  target_seq = sequence[i:i + target_length]
98
  if target_seq[4:7] == 'TTT':
99
+ if strand == -1:
100
+ tar_start = end - i - target_length + 1
101
+ tar_end = end -i
102
+ #seq_in_ref = ''.join([complement[base] for base in target_seq])[::-1]
103
+ else:
104
+ tar_start = start + i
105
+ tar_end = start + i + target_length - 1
106
+ #seq_in_ref = target_seq
107
  gRNA = ''.join([dnatorna[base] for base in target_seq[8:28]])
108
  targets.append([target_seq, gRNA, chr, str(tar_start), str(tar_end), str(strand), transcript_id, exon_id])
109
  return targets
 
145
  gene_sequence = fetch_ensembl_sequence(exon_id)
146
  if gene_sequence:
147
  start = Exons[j]['start']
148
+ end = Exons[j]['end']
149
  strand = Exons[j]['strand']
150
  chr = Exons[j]['seq_region_name']
151
+ targets = find_crispr_targets(gene_sequence, chr, start, end, strand, transcript_id, exon_id)
152
  if targets:
153
  formatted_data = format_prediction_output(targets, model_path)
154
  results.append(formatted_data)