supercat666 commited on
Commit
26e7c05
1 Parent(s): bc641c8
Files changed (3) hide show
  1. app.py +83 -2
  2. cas9att.py +0 -5
  3. cas9attvcf.py +9 -18
app.py CHANGED
@@ -145,8 +145,8 @@ gene_symbol_list = list(gene_annotations.keys()) # List of gene symbols for the
145
  if selected_model == 'Cas9':
146
  # Use a radio button to select enzymes, making sure only one can be selected at a time
147
  target_selection = st.radio(
148
- "Select either on-target or off-target:",
149
- ('on-target', 'off-target'),
150
  key='target_selection'
151
  )
152
  if 'current_gene_symbol' not in st.session_state:
@@ -319,6 +319,87 @@ if selected_model == 'Cas9':
319
  file_name=f"{gene_symbol}_files.zip",
320
  mime="application/zip"
321
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
322
 
323
  elif target_selection == 'off-target':
324
  ENTRY_METHODS = dict(
 
145
  if selected_model == 'Cas9':
146
  # Use a radio button to select enzymes, making sure only one can be selected at a time
147
  target_selection = st.radio(
148
+ "Select either on-target, on-target with mutation or off-target:",
149
+ ('on-target', 'mutation', 'off-target'),
150
  key='target_selection'
151
  )
152
  if 'current_gene_symbol' not in st.session_state:
 
319
  file_name=f"{gene_symbol}_files.zip",
320
  mime="application/zip"
321
  )
322
+ elif target_selection == 'mutation':
323
+ # Prediction button
324
+ predict_button = st.button('Predict on-target')
325
+ vcf_reader =...
326
+
327
+ if 'exons' not in st.session_state:
328
+ st.session_state['exons'] = []
329
+
330
+ # Process predictions
331
+ if predict_button and gene_symbol:
332
+ with st.spinner('Predicting... Please wait'):
333
+ predictions, gene_sequence, exons = cas9attvcf.process_gene(gene_symbol, cas9att_path)
334
+ full_predictions = sorted(predictions, key=lambda x: x[8], reverse=True)
335
+ sorted_predictions = sorted(predictions, key=lambda x: x[8], reverse=True)[:10]
336
+ st.session_state['full_results'] = full_predictions
337
+ st.session_state['on_target_results'] = sorted_predictions
338
+ st.session_state['gene_sequence'] = gene_sequence # Save gene sequence in session state
339
+ st.session_state['exons'] = exons # Store exon data
340
+
341
+ # Notify the user once the process is completed successfully.
342
+ st.success('Prediction completed!')
343
+ st.session_state['prediction_made'] = True
344
+
345
+ if 'on_target_results' in st.session_state and st.session_state['on_target_results']:
346
+ ensembl_id = gene_annotations.get(gene_symbol, 'Unknown') # Get Ensembl ID or default to 'Unknown'
347
+ col1, col2, col3 = st.columns(3)
348
+ with col1:
349
+ st.markdown("**Genome**")
350
+ st.markdown("Homo sapiens")
351
+ with col2:
352
+ st.markdown("**Gene**")
353
+ st.markdown(f"{gene_symbol} : {ensembl_id} (primary)")
354
+ with col3:
355
+ st.markdown("**Nuclease**")
356
+ st.markdown("SpCas9")
357
+ # Include "Target" in the DataFrame's columns
358
+ try:
359
+ df = pd.DataFrame(st.session_state['on_target_results'],
360
+ columns=["Gene Symbol", "Chr", "Strand", "Target Start", "Transcript", "Exon",
361
+ "Target",
362
+ "gRNA", "Prediction", "Is Mutation"])
363
+ df_full = pd.DataFrame(st.session_state['full_results'],
364
+ columns=["Gene Symbol", "Chr", "Strand", "Target Start", "Transcript",
365
+ "Exon", "Target",
366
+ "gRNA", "Prediction", "Is Mutation"])
367
+ st.dataframe(df)
368
+ except ValueError as e:
369
+ st.error(f"DataFrame creation error: {e}")
370
+ # Optionally print or log the problematic data for debugging:
371
+ print(st.session_state['on_target_results'])
372
+
373
+ if 'gene_sequence' in st.session_state and st.session_state['gene_sequence']:
374
+ gene_symbol = st.session_state['current_gene_symbol']
375
+ gene_sequence = st.session_state['gene_sequence']
376
+
377
+ # Define file paths
378
+ genbank_file_path = f"{gene_symbol}_crispr_targets.gb"
379
+ bed_file_path = f"{gene_symbol}_crispr_targets.bed"
380
+ csv_file_path = f"{gene_symbol}_crispr_predictions.csv"
381
+ plot_image_path = f"{gene_symbol}_gtracks_plot.png"
382
+
383
+ # Generate files
384
+ cas9att.generate_genbank_file_from_df(df_full, gene_sequence, gene_symbol, genbank_file_path)
385
+ cas9att.create_bed_file_from_df(df_full, bed_file_path)
386
+ cas9att.create_csv_from_df(df_full, csv_file_path)
387
+
388
+ # Prepare an in-memory buffer for the ZIP file
389
+ zip_buffer = io.BytesIO()
390
+ with zipfile.ZipFile(zip_buffer, 'w', zipfile.ZIP_DEFLATED) as zip_file:
391
+ # For each file, add it to the ZIP file
392
+ zip_file.write(genbank_file_path)
393
+ zip_file.write(bed_file_path)
394
+ zip_file.write(csv_file_path)
395
+
396
+ # Display the download button for the ZIP file
397
+ st.download_button(
398
+ label="Download GenBank, BED, CSV files as ZIP",
399
+ data=zip_buffer.getvalue(),
400
+ file_name=f"{gene_symbol}_files.zip",
401
+ mime="application/zip"
402
+ )
403
 
404
  elif target_selection == 'off-target':
405
  ENTRY_METHODS = dict(
cas9att.py CHANGED
@@ -224,11 +224,6 @@ def process_gene(gene_symbol, model_path):
224
  else:
225
  print("Failed to retrieve transcripts.")
226
 
227
- output = []
228
- for result in results:
229
- for item in result:
230
- output.append(item)
231
-
232
  # Return the sorted output, combined gene sequences, and all exons
233
  return results, all_gene_sequences, all_exons
234
 
 
224
  else:
225
  print("Failed to retrieve transcripts.")
226
 
 
 
 
 
 
227
  # Return the sorted output, combined gene sequences, and all exons
228
  return results, all_gene_sequences, all_exons
229
 
cas9attvcf.py CHANGED
@@ -325,16 +325,8 @@ def process_gene(gene_symbol, vcf_reader, model_path):
325
  else:
326
  print("Failed to retrieve transcripts.")
327
 
328
- output = []
329
- for result in results:
330
- for item in result:
331
- output.append(item)
332
-
333
- # Sort results based on prediction score (assuming score is at the 8th index)
334
- sorted_results = sorted(output, key=lambda x: x[8], reverse=True)
335
-
336
  # Return the sorted output, combined gene sequences, and all exons
337
- return sorted_results, all_gene_sequences, all_exons
338
 
339
 
340
  def create_genbank_features(data):
@@ -351,22 +343,22 @@ def create_genbank_features(data):
351
  for row in formatted_data:
352
  try:
353
  start = int(row[1])
354
- end = int(row[2])
355
  except ValueError as e:
356
  print(f"Error converting start/end to int: {row[1]}, {row[2]} - {e}")
357
  continue
358
 
359
- strand = 1 if row[3] == '+' else -1
360
  location = FeatureLocation(start=start, end=end, strand=strand)
 
361
  feature = SeqFeature(location=location, type="misc_feature", qualifiers={
362
  'label': row[7], # Use gRNA as the label
363
- 'note': f"Prediction: {row[8]}" # Include the prediction score
364
  })
365
  features.append(feature)
366
 
367
  return features
368
 
369
-
370
  def generate_genbank_file_from_df(df, gene_sequence, gene_symbol, output_path):
371
  # Ensure gene_sequence is a string before creating Seq object
372
  if not isinstance(gene_sequence, str):
@@ -381,22 +373,21 @@ def generate_genbank_file_from_df(df, gene_sequence, gene_symbol, output_path):
381
  record.annotations["molecule_type"] = "DNA"
382
  SeqIO.write(record, output_path, "genbank")
383
 
384
-
385
  def create_bed_file_from_df(df, output_path):
386
  with open(output_path, 'w') as bed_file:
387
  for index, row in df.iterrows():
388
  chrom = row["Chr"]
389
- start = int(row["Start Pos"])
390
- end = int(row["End Pos"])
391
  strand = '+' if row["Strand"] == '1' else '-'
392
  gRNA = row["gRNA"]
393
  score = str(row["Prediction"])
 
394
  # transcript_id is not typically part of the standard BED columns but added here for completeness
395
  transcript_id = row["Transcript"]
396
 
397
  # Writing only standard BED columns; additional columns can be appended as needed
398
- bed_file.write(f"{chrom}\t{start}\t{end}\t{gRNA}\t{score}\t{strand}\n")
399
-
400
 
401
  def create_csv_from_df(df, output_path):
402
  df.to_csv(output_path, index=False)
 
325
  else:
326
  print("Failed to retrieve transcripts.")
327
 
 
 
 
 
 
 
 
 
328
  # Return the sorted output, combined gene sequences, and all exons
329
+ return results, all_gene_sequences, all_exons
330
 
331
 
332
  def create_genbank_features(data):
 
343
  for row in formatted_data:
344
  try:
345
  start = int(row[1])
346
+ end = start + len(row[6]) # Calculate the end position based on the target sequence length
347
  except ValueError as e:
348
  print(f"Error converting start/end to int: {row[1]}, {row[2]} - {e}")
349
  continue
350
 
351
+ strand = 1 if row[3] == '1' else -1
352
  location = FeatureLocation(start=start, end=end, strand=strand)
353
+ is_mutation = 'Yes' if row[9] else 'No'
354
  feature = SeqFeature(location=location, type="misc_feature", qualifiers={
355
  'label': row[7], # Use gRNA as the label
356
+ 'note': f"Prediction: {row[8]}, Mutation: {is_mutation}" # Include the prediction score and mutation status
357
  })
358
  features.append(feature)
359
 
360
  return features
361
 
 
362
  def generate_genbank_file_from_df(df, gene_sequence, gene_symbol, output_path):
363
  # Ensure gene_sequence is a string before creating Seq object
364
  if not isinstance(gene_sequence, str):
 
373
  record.annotations["molecule_type"] = "DNA"
374
  SeqIO.write(record, output_path, "genbank")
375
 
 
376
  def create_bed_file_from_df(df, output_path):
377
  with open(output_path, 'w') as bed_file:
378
  for index, row in df.iterrows():
379
  chrom = row["Chr"]
380
+ start = int(row["Target Start"])
381
+ end = start + len(row["Target"]) # Calculate the end position based on the target sequence length
382
  strand = '+' if row["Strand"] == '1' else '-'
383
  gRNA = row["gRNA"]
384
  score = str(row["Prediction"])
385
+ is_mutation = 'Yes' if row["Is Mutation"] else 'No'
386
  # transcript_id is not typically part of the standard BED columns but added here for completeness
387
  transcript_id = row["Transcript"]
388
 
389
  # Writing only standard BED columns; additional columns can be appended as needed
390
+ bed_file.write(f"{chrom}\t{start}\t{end}\t{gRNA}\t{score}\t{strand}\t{is_mutation}\n")
 
391
 
392
  def create_csv_from_df(df, output_path):
393
  df.to_csv(output_path, index=False)