supercat666 commited on
Commit
2c449b4
1 Parent(s): 1acd869
Files changed (2) hide show
  1. cas12.py +13 -1
  2. cas9on.py +9 -8
cas12.py CHANGED
@@ -145,10 +145,22 @@ def process_gene(gene_symbol, model_path):
145
 
146
  return all_data, gene_sequence
147
 
 
148
  def create_genbank_features(formatted_data):
149
  features = []
150
  for data in formatted_data:
151
- location = FeatureLocation(start=int(data[1]), end=int(data[2]), strand=(1 if data[3] == '+' else -1))
 
 
 
 
 
 
 
 
 
 
 
152
  feature = SeqFeature(location=location, type="misc_feature", qualifiers={
153
  'label': data[5], # gRNA as label
154
  'note': f"Prediction: {data[6]}" # Prediction score in note
 
145
 
146
  return all_data, gene_sequence
147
 
148
+
149
  def create_genbank_features(formatted_data):
150
  features = []
151
  for data in formatted_data:
152
+ try:
153
+ # Attempt to convert start and end positions to integers
154
+ start = int(data[1])
155
+ end = int(data[2])
156
+ except ValueError as e:
157
+ # Log the error and skip this iteration if conversion fails
158
+ print(f"Error converting start/end to int: {data[1]}, {data[2]} - {e}")
159
+ continue # Skip this iteration
160
+
161
+ # Proceed as normal if conversion is successful
162
+ strand = 1 if data[3] == '+' else -1
163
+ location = FeatureLocation(start=start, end=end, strand=strand)
164
  feature = SeqFeature(location=location, type="misc_feature", qualifiers={
165
  'label': data[5], # gRNA as label
166
  'note': f"Prediction: {data[6]}" # Prediction score in note
cas9on.py CHANGED
@@ -40,23 +40,24 @@ class DCModelOntar:
40
  return yp.ravel()
41
 
42
  # Function to predict on-target efficiency and format output
43
- def format_prediction_output(gRNAs, model_path):
44
  dcModel = DCModelOntar(model_path)
45
  formatted_data = []
46
 
47
- for gRNA in gRNAs:
48
  # Encode the gRNA sequence
49
- encoded_seq = get_seqcode(gRNA[0]).reshape(-1,4,1,23)
50
 
51
  # Predict on-target efficiency using the model
52
  prediction = dcModel.ontar_predict(encoded_seq)
53
 
54
  # Format output
55
- chr = gRNA[1]
56
- start = gRNA[2]
57
- end = gRNA[3]
58
- strand = gRNA[4]
59
- formatted_data.append([chr, start, end, strand, gRNA[0], prediction[0]])
 
60
 
61
  return formatted_data
62
 
 
40
  return yp.ravel()
41
 
42
  # Function to predict on-target efficiency and format output
43
+ def format_prediction_output(targets, model_path):
44
  dcModel = DCModelOntar(model_path)
45
  formatted_data = []
46
 
47
+ for target in targets:
48
  # Encode the gRNA sequence
49
+ encoded_seq = get_seqcode(target[0]).reshape(-1,4,1,23)
50
 
51
  # Predict on-target efficiency using the model
52
  prediction = dcModel.ontar_predict(encoded_seq)
53
 
54
  # Format output
55
+ gRNA = target[1]
56
+ chr = target[2]
57
+ start = target[3]
58
+ end = target[4]
59
+ strand = target[5]
60
+ formatted_data.append([chr, start, end, strand, target[0], gRNA, prediction[0]])
61
 
62
  return formatted_data
63