Spaces:

NiniCat
/

CRISPRTool

Running

App Files Files Community

supercat666 commited on Feb 11

Commit

99d52d8

•

1 Parent(s): 3c85094

add Genbank

Browse files

Files changed (2) hide show

app.py +18 -4
cas9on.py +39 -10

app.py CHANGED Viewed

@@ -102,13 +102,17 @@ if selected_model == 'Cas9':
         # Process predictions
         if predict_button and gene_symbol:
-            predictions = cas9on.process_gene(gene_symbol, cas9on_path)
             sorted_predictions = sorted(predictions, key=lambda x: x[-1], reverse=True)[:10]
             st.session_state['on_target_results'] = sorted_predictions
         if 'on_target_results' in st.session_state and st.session_state['on_target_results']:
             df = pd.DataFrame(st.session_state['on_target_results'],
                               columns=["Gene ID", "Start Pos", "End Pos", "Strand", "gRNA", "Prediction"])
             st.write('Top on-target predictions:')
             st.dataframe(df)
@@ -124,9 +128,19 @@ if selected_model == 'Cas9':
                 track.add_feature(start, end, strand, label=label)
             # Save and display the visualization
-            gv_fig_path = "crispr_targets.png"
-            gv.savefig(gv_fig_path)
-            st.image(gv_fig_path, caption="CRISPR Targets Visualization")
     elif target_selection == 'off-target':
         ENTRY_METHODS = dict(

         # Process predictions
         if predict_button and gene_symbol:
+            predictions, gene_sequence = cas9on.process_gene(gene_symbol, cas9on_path)
             sorted_predictions = sorted(predictions, key=lambda x: x[-1], reverse=True)[:10]
             st.session_state['on_target_results'] = sorted_predictions
         if 'on_target_results' in st.session_state and st.session_state['on_target_results']:
             df = pd.DataFrame(st.session_state['on_target_results'],
                               columns=["Gene ID", "Start Pos", "End Pos", "Strand", "gRNA", "Prediction"])
+            # Pass the gene_sequence to the function
+            genbank_file_path = f"{gene_symbol}_crispr_targets.gb"
+            cas9on.generate_genbank_file_from_df(df, gene_sequence, gene_symbol, genbank_file_path)
             st.write('Top on-target predictions:')
             st.dataframe(df)
                 track.add_feature(start, end, strand, label=label)
             # Save and display the visualization
+            fig = gv.plotfig()
+            st.pyplot(fig)
+            # After the GenomeViz plot, include the download button
+            with open(genbank_file_path, "rb") as file:
+                btn = st.download_button(
+                    label="Download GenBank file",
+                    data=file,
+                    file_name=genbank_file_path,
+                    mime="application/octet-stream"
+                )
+            os.remove(genbank_file_path)
     elif target_selection == 'off-target':
         ENTRY_METHODS = dict(

cas9on.py CHANGED Viewed

@@ -4,6 +4,10 @@ import pandas as pd
 import numpy as np
 from operator import add
 from functools import reduce
 from keras.models import load_model
 import random
@@ -35,7 +39,6 @@ class DCModelOntar:
         yp = self.model.predict(x)
         return yp.ravel()
 # Function to predict on-target efficiency and format output
 def format_prediction_output(gRNAs, model_path):
     dcModel = DCModelOntar(model_path)
@@ -102,6 +105,7 @@ def find_crispr_targets(sequence, chr, start, strand, pam="NGG", target_length=2
 def process_gene(gene_symbol, model_path):
     transcripts = fetch_ensembl_transcripts(gene_symbol)
     all_data = []
     if transcripts:
         for transcript in transcripts:
@@ -109,7 +113,8 @@ def process_gene(gene_symbol, model_path):
             chr = transcript.get('seq_region_name', 'unknown')
             start = transcript.get('start', 0)
             strand = transcript.get('strand', 'unknown')
-            gene_sequence = fetch_ensembl_sequence(transcript_id)
             if gene_sequence:
                 gRNA_sites = find_crispr_targets(gene_sequence, chr, start, strand)
@@ -117,11 +122,35 @@ def process_gene(gene_symbol, model_path):
                     formatted_data = format_prediction_output(gRNA_sites, model_path)
                     all_data.extend(formatted_data)
-    return all_data
-# Function to save results as CSV
-def save_to_csv(data, filename="crispr_results.csv"):
-    df = pd.DataFrame(data,
-                      columns=["Gene ID", "Start Pos", "End Pos", "Strand", "gRNA", "Prediction"])
-    df.to_csv(filename, index=False)

 import numpy as np
 from operator import add
 from functools import reduce
+from Bio import SeqIO
+from Bio.SeqRecord import SeqRecord
+from Bio.SeqFeature import SeqFeature, FeatureLocation
+from Bio.Seq import Seq
 from keras.models import load_model
 import random
         yp = self.model.predict(x)
         return yp.ravel()
 # Function to predict on-target efficiency and format output
 def format_prediction_output(gRNAs, model_path):
     dcModel = DCModelOntar(model_path)
 def process_gene(gene_symbol, model_path):
     transcripts = fetch_ensembl_transcripts(gene_symbol)
     all_data = []
+    gene_sequence = ''  # Initialize an empty string for the gene sequence
     if transcripts:
         for transcript in transcripts:
             chr = transcript.get('seq_region_name', 'unknown')
             start = transcript.get('start', 0)
             strand = transcript.get('strand', 'unknown')
+            # Fetch the sequence here and concatenate if multiple transcripts
+            gene_sequence += fetch_ensembl_sequence(transcript_id) or ''
             if gene_sequence:
                 gRNA_sites = find_crispr_targets(gene_sequence, chr, start, strand)
                     formatted_data = format_prediction_output(gRNA_sites, model_path)
                     all_data.extend(formatted_data)
+    # Return both the data and the fetched sequence
+    return all_data, gene_sequence
+def create_genbank_features(gRNAs, predictions):
+    features = []
+    for gRNA, prediction in zip(gRNAs, predictions):
+        # Assuming gRNA structure: [Target Seq, Chrom, Start Pos, End Pos, Strand]
+        # And prediction is a single floating point value
+        location = FeatureLocation(start=gRNA[2], end=gRNA[3], strand=gRNA[4])
+        # Creating a feature with type 'CDS' just as an example, change as appropriate
+        feature = SeqFeature(location=location, type="CDS", qualifiers={
+            'label': gRNA[0],  # Target sequence as label
+            'note': f"Prediction: {prediction}"  # Prediction score in note
+        })
+        features.append(feature)
+    return features
+def generate_genbank_file_from_df(df, gene_sequence, gene_symbol, output_path):
+    features = []
+    for index, row in df.iterrows():
+        location = FeatureLocation(start=int(row["Start Pos"]),
+                                   end=int(row["End Pos"]),
+                                   strand=int(row["Strand"]))
+        feature = SeqFeature(location=location, type="gene", qualifiers={
+            'locus_tag': row["Gene ID"],  # Assuming Gene ID is equivalent to Chromosome here
+            'note': f"gRNA: {row['gRNA']}, Prediction: {row['Prediction']}"
+        })
+        features.append(feature)
+    record = SeqRecord(Seq(gene_sequence), id=gene_symbol, name=gene_symbol,
+                       description='CRISPR Cas9 predicted targets', features=features)
+    SeqIO.write(record, output_path, "genbank")