supercat666 commited on
Commit
445015e
1 Parent(s): 504e78c
Files changed (1) hide show
  1. cas9on.py +7 -6
cas9on.py CHANGED
@@ -167,22 +167,23 @@ def create_genbank_features(formatted_data):
167
  def generate_genbank_file_from_df(df, gene_sequence, gene_symbol, output_path):
168
  features = []
169
  for index, row in df.iterrows():
170
- # Correct column references according to your formatted data
 
 
 
171
  location = FeatureLocation(start=int(row["Start Pos"]),
172
  end=int(row["End Pos"]),
173
- strand=1 if row["Strand"] == '+' else -1) # Adjust strand representation
174
  feature = SeqFeature(location=location, type="gene", qualifiers={
175
- 'locus_tag': row["Transcript ID"], # Use 'Transcript ID' from your formatted data
176
  'note': f"gRNA: {row['gRNA']}, Prediction: {row['Prediction']}"
177
  })
178
  features.append(feature)
179
 
 
180
  record = SeqRecord(Seq(gene_sequence), id=gene_symbol, name=gene_symbol,
181
  description=f'CRISPR Cas9 predicted targets for {gene_symbol}', features=features)
182
-
183
- # Add the missing molecule_type annotation
184
  record.annotations["molecule_type"] = "DNA"
185
-
186
  SeqIO.write(record, output_path, "genbank")
187
 
188
 
 
167
  def generate_genbank_file_from_df(df, gene_sequence, gene_symbol, output_path):
168
  features = []
169
  for index, row in df.iterrows():
170
+ # Use 'Transcript ID' if it exists, otherwise use a default value like 'Unknown'
171
+ transcript_id = row.get("Transcript ID", "Unknown")
172
+
173
+ # Make sure to use the correct column names for Start Pos, End Pos, and Strand
174
  location = FeatureLocation(start=int(row["Start Pos"]),
175
  end=int(row["End Pos"]),
176
+ strand=1 if row["Strand"] == '+' else -1)
177
  feature = SeqFeature(location=location, type="gene", qualifiers={
178
+ 'locus_tag': transcript_id, # Now using the variable that holds the safe value
179
  'note': f"gRNA: {row['gRNA']}, Prediction: {row['Prediction']}"
180
  })
181
  features.append(feature)
182
 
183
+ # The rest of the function remains unchanged
184
  record = SeqRecord(Seq(gene_sequence), id=gene_symbol, name=gene_symbol,
185
  description=f'CRISPR Cas9 predicted targets for {gene_symbol}', features=features)
 
 
186
  record.annotations["molecule_type"] = "DNA"
 
187
  SeqIO.write(record, output_path, "genbank")
188
 
189