Spaces:

NiniCat
/

CRISPRTool

Running

App Files Files Community

supercat666 commited on May 18

Commit

26e7c05

•

1 Parent(s): bc641c8

add vcf

Browse files

Files changed (3) hide show

app.py +83 -2
cas9att.py +0 -5
cas9attvcf.py +9 -18

app.py CHANGED Viewed

@@ -145,8 +145,8 @@ gene_symbol_list = list(gene_annotations.keys())  # List of gene symbols for the
 if selected_model == 'Cas9':
     # Use a radio button to select enzymes, making sure only one can be selected at a time
     target_selection = st.radio(
-        "Select either on-target or off-target:",
-        ('on-target', 'off-target'),
         key='target_selection'
     )
     if 'current_gene_symbol' not in st.session_state:
@@ -319,6 +319,87 @@ if selected_model == 'Cas9':
                         file_name=f"{gene_symbol}_files.zip",
                         mime="application/zip"
                     )
     elif target_selection == 'off-target':
         ENTRY_METHODS = dict(

 if selected_model == 'Cas9':
     # Use a radio button to select enzymes, making sure only one can be selected at a time
     target_selection = st.radio(
+        "Select either on-target, on-target with mutation or off-target:",
+        ('on-target', 'mutation', 'off-target'),
         key='target_selection'
     )
     if 'current_gene_symbol' not in st.session_state:
                         file_name=f"{gene_symbol}_files.zip",
                         mime="application/zip"
                     )
+    elif target_selection == 'mutation':
+        # Prediction button
+        predict_button = st.button('Predict on-target')
+        vcf_reader =...
+        if 'exons' not in st.session_state:
+            st.session_state['exons'] = []
+        # Process predictions
+        if predict_button and gene_symbol:
+            with st.spinner('Predicting... Please wait'):
+                predictions, gene_sequence, exons = cas9attvcf.process_gene(gene_symbol, cas9att_path)
+                full_predictions = sorted(predictions, key=lambda x: x[8], reverse=True)
+                sorted_predictions = sorted(predictions, key=lambda x: x[8], reverse=True)[:10]
+                st.session_state['full_results'] = full_predictions
+                st.session_state['on_target_results'] = sorted_predictions
+                st.session_state['gene_sequence'] = gene_sequence  # Save gene sequence in session state
+                st.session_state['exons'] = exons  # Store exon data
+            # Notify the user once the process is completed successfully.
+            st.success('Prediction completed!')
+            st.session_state['prediction_made'] = True
+            if 'on_target_results' in st.session_state and st.session_state['on_target_results']:
+                ensembl_id = gene_annotations.get(gene_symbol, 'Unknown')  # Get Ensembl ID or default to 'Unknown'
+                col1, col2, col3 = st.columns(3)
+                with col1:
+                    st.markdown("**Genome**")
+                    st.markdown("Homo sapiens")
+                with col2:
+                    st.markdown("**Gene**")
+                    st.markdown(f"{gene_symbol} : {ensembl_id} (primary)")
+                with col3:
+                    st.markdown("**Nuclease**")
+                    st.markdown("SpCas9")
+                # Include "Target" in the DataFrame's columns
+                try:
+                    df = pd.DataFrame(st.session_state['on_target_results'],
+                                      columns=["Gene Symbol", "Chr", "Strand", "Target Start", "Transcript", "Exon",
+                                               "Target",
+                                               "gRNA", "Prediction", "Is Mutation"])
+                    df_full = pd.DataFrame(st.session_state['full_results'],
+                                           columns=["Gene Symbol", "Chr", "Strand", "Target Start", "Transcript",
+                                                    "Exon", "Target",
+                                                    "gRNA", "Prediction", "Is Mutation"])
+                    st.dataframe(df)
+                except ValueError as e:
+                    st.error(f"DataFrame creation error: {e}")
+                    # Optionally print or log the problematic data for debugging:
+                    print(st.session_state['on_target_results'])
+                if 'gene_sequence' in st.session_state and st.session_state['gene_sequence']:
+                    gene_symbol = st.session_state['current_gene_symbol']
+                    gene_sequence = st.session_state['gene_sequence']
+                    # Define file paths
+                    genbank_file_path = f"{gene_symbol}_crispr_targets.gb"
+                    bed_file_path = f"{gene_symbol}_crispr_targets.bed"
+                    csv_file_path = f"{gene_symbol}_crispr_predictions.csv"
+                    plot_image_path = f"{gene_symbol}_gtracks_plot.png"
+                    # Generate files
+                    cas9att.generate_genbank_file_from_df(df_full, gene_sequence, gene_symbol, genbank_file_path)
+                    cas9att.create_bed_file_from_df(df_full, bed_file_path)
+                    cas9att.create_csv_from_df(df_full, csv_file_path)
+                    # Prepare an in-memory buffer for the ZIP file
+                    zip_buffer = io.BytesIO()
+                    with zipfile.ZipFile(zip_buffer, 'w', zipfile.ZIP_DEFLATED) as zip_file:
+                        # For each file, add it to the ZIP file
+                        zip_file.write(genbank_file_path)
+                        zip_file.write(bed_file_path)
+                        zip_file.write(csv_file_path)
+                    # Display the download button for the ZIP file
+                    st.download_button(
+                        label="Download GenBank, BED, CSV files as ZIP",
+                        data=zip_buffer.getvalue(),
+                        file_name=f"{gene_symbol}_files.zip",
+                        mime="application/zip"
+                    )
     elif target_selection == 'off-target':
         ENTRY_METHODS = dict(

cas9att.py CHANGED Viewed

@@ -224,11 +224,6 @@ def process_gene(gene_symbol, model_path):
     else:
         print("Failed to retrieve transcripts.")
-    output = []
-    for result in results:
-        for item in result:
-            output.append(item)
     # Return the sorted output, combined gene sequences, and all exons
     return results, all_gene_sequences, all_exons

     else:
         print("Failed to retrieve transcripts.")
     # Return the sorted output, combined gene sequences, and all exons
     return results, all_gene_sequences, all_exons

cas9attvcf.py CHANGED Viewed

@@ -325,16 +325,8 @@ def process_gene(gene_symbol, vcf_reader, model_path):
     else:
         print("Failed to retrieve transcripts.")
-    output = []
-    for result in results:
-        for item in result:
-            output.append(item)
-    # Sort results based on prediction score (assuming score is at the 8th index)
-    sorted_results = sorted(output, key=lambda x: x[8], reverse=True)
     # Return the sorted output, combined gene sequences, and all exons
-    return sorted_results, all_gene_sequences, all_exons
 def create_genbank_features(data):
@@ -351,22 +343,22 @@ def create_genbank_features(data):
     for row in formatted_data:
         try:
             start = int(row[1])
-            end = int(row[2])
         except ValueError as e:
             print(f"Error converting start/end to int: {row[1]}, {row[2]} - {e}")
             continue
-        strand = 1 if row[3] == '+' else -1
         location = FeatureLocation(start=start, end=end, strand=strand)
         feature = SeqFeature(location=location, type="misc_feature", qualifiers={
             'label': row[7],  # Use gRNA as the label
-            'note': f"Prediction: {row[8]}"  # Include the prediction score
         })
         features.append(feature)
     return features
 def generate_genbank_file_from_df(df, gene_sequence, gene_symbol, output_path):
     # Ensure gene_sequence is a string before creating Seq object
     if not isinstance(gene_sequence, str):
@@ -381,22 +373,21 @@ def generate_genbank_file_from_df(df, gene_sequence, gene_symbol, output_path):
     record.annotations["molecule_type"] = "DNA"
     SeqIO.write(record, output_path, "genbank")
 def create_bed_file_from_df(df, output_path):
     with open(output_path, 'w') as bed_file:
         for index, row in df.iterrows():
             chrom = row["Chr"]
-            start = int(row["Start Pos"])
-            end = int(row["End Pos"])
             strand = '+' if row["Strand"] == '1' else '-'
             gRNA = row["gRNA"]
             score = str(row["Prediction"])
             # transcript_id is not typically part of the standard BED columns but added here for completeness
             transcript_id = row["Transcript"]
             # Writing only standard BED columns; additional columns can be appended as needed
-            bed_file.write(f"{chrom}\t{start}\t{end}\t{gRNA}\t{score}\t{strand}\n")
 def create_csv_from_df(df, output_path):
     df.to_csv(output_path, index=False)

     else:
         print("Failed to retrieve transcripts.")
     # Return the sorted output, combined gene sequences, and all exons
+    return results, all_gene_sequences, all_exons
 def create_genbank_features(data):
     for row in formatted_data:
         try:
             start = int(row[1])
+            end = start + len(row[6])  # Calculate the end position based on the target sequence length
         except ValueError as e:
             print(f"Error converting start/end to int: {row[1]}, {row[2]} - {e}")
             continue
+        strand = 1 if row[3] == '1' else -1
         location = FeatureLocation(start=start, end=end, strand=strand)
+        is_mutation = 'Yes' if row[9] else 'No'
         feature = SeqFeature(location=location, type="misc_feature", qualifiers={
             'label': row[7],  # Use gRNA as the label
+            'note': f"Prediction: {row[8]}, Mutation: {is_mutation}"  # Include the prediction score and mutation status
         })
         features.append(feature)
     return features
 def generate_genbank_file_from_df(df, gene_sequence, gene_symbol, output_path):
     # Ensure gene_sequence is a string before creating Seq object
     if not isinstance(gene_sequence, str):
     record.annotations["molecule_type"] = "DNA"
     SeqIO.write(record, output_path, "genbank")
 def create_bed_file_from_df(df, output_path):
     with open(output_path, 'w') as bed_file:
         for index, row in df.iterrows():
             chrom = row["Chr"]
+            start = int(row["Target Start"])
+            end = start + len(row["Target"])  # Calculate the end position based on the target sequence length
             strand = '+' if row["Strand"] == '1' else '-'
             gRNA = row["gRNA"]
             score = str(row["Prediction"])
+            is_mutation = 'Yes' if row["Is Mutation"] else 'No'
             # transcript_id is not typically part of the standard BED columns but added here for completeness
             transcript_id = row["Transcript"]
             # Writing only standard BED columns; additional columns can be appended as needed
+            bed_file.write(f"{chrom}\t{start}\t{end}\t{gRNA}\t{score}\t{strand}\t{is_mutation}\n")
 def create_csv_from_df(df, output_path):
     df.to_csv(output_path, index=False)