Spaces:

NiniCat
/

CRISPRTool

Running

App Files Files Community

supercat666 commited on May 29

Commit

e8b587f

•

1 Parent(s): c2e36d2

fix button

Browse files

Files changed (1) hide show

app.py +79 -247

app.py CHANGED Viewed

@@ -490,266 +490,98 @@ if selected_model == 'Cas9':
                 st.experimental_rerun()
 elif selected_model == 'Cas12':
     cas12target_selection = st.radio(
-        "Select either mutation or not:",
         ('regular', 'mutation'),
         key='cas12target_selection'
     )
     if 'current_gene_symbol' not in st.session_state:
         st.session_state['current_gene_symbol'] = ""
-        # Define a function to clean up old files
     def clean_up_old_files(gene_symbol):
-        genbank_file_path = f"{gene_symbol}_crispr_targets.gb"
-        bed_file_path = f"{gene_symbol}_crispr_targets.bed"
-        csv_file_path = f"{gene_symbol}_crispr_predictions.csv"
-        for path in [genbank_file_path, bed_file_path, csv_file_path]:
-            if os.path.exists(path):
-                os.remove(path)
-    # Gene symbol entry with autocomplete-like feature
-    gene_symbol = st.selectbox('Enter a Gene Symbol:', [''] + gene_symbol_list, key='gene_symbol',
-                               format_func=lambda x: x if x else "")
-    # Handle gene symbol change and file cleanup
-    if gene_symbol != st.session_state['current_gene_symbol'] and gene_symbol:
         if st.session_state['current_gene_symbol']:
-            # Clean up files only if a different gene symbol is entered and a previous symbol exists
             clean_up_old_files(st.session_state['current_gene_symbol'])
-        # Update the session state with the new gene symbol
         st.session_state['current_gene_symbol'] = gene_symbol
-        if cas12target_selection == 'regular':
-            predict_button = st.button('Predict cas12')
-            if 'exons' not in st.session_state:
-                st.session_state['exons'] = []
-            # Process predictions
-            if predict_button and gene_symbol:
-                with st.spinner('Predicting... Please wait'):
-                    predictions, gene_sequence, exons = cas12lstm.process_gene(gene_symbol, cas12lstm_path)
-                    sorted_predictions = sorted(predictions, key=lambda x: x[8], reverse=True)[:10]
-                    st.session_state['on_target_results'] = sorted_predictions
-                    st.session_state['gene_sequence'] = gene_sequence  # Save gene sequence in session state
-                    st.session_state['exons'] = exons  # Store exon data
-                # Notify the user once the process is completed successfully.
-                st.success('Prediction completed!')
-                st.session_state['prediction_made'] = True
-                if 'on_target_results' in st.session_state and st.session_state['on_target_results']:
-                    ensembl_id = gene_annotations.get(gene_symbol, 'Unknown')  # Get Ensembl ID or default to 'Unknown'
-                    col1, col2, col3 = st.columns(3)
-                    with col1:
-                        st.markdown("**Genome**")
-                        st.markdown("Homo sapiens")
-                    with col2:
-                        st.markdown("**Gene**")
-                        st.markdown(f"{gene_symbol} : {ensembl_id} (primary)")
-                    with col3:
-                        st.markdown("**Nuclease**")
-                        st.markdown("SpCas9")
-                        # Include "Target" in the DataFrame's columns
-                    try:
-                        df = pd.DataFrame(st.session_state['on_target_results'],
-                                            columns=["Chr", "Start Pos", "End Pos", "Strand", "Transcript", "Exon",
-                                                       "Target",
-                                                       "gRNA", "Prediction"])
-                        st.dataframe(df)
-                    except ValueError as e:
-                        st.error(f"DataFrame creation error: {e}")
-                        # Optionally print or log the problematic data for debugging:
-                        print(st.session_state['on_target_results'])
-                    # Initialize Plotly figure
-                    fig = go.Figure()
-                    EXON_BASE = 0  # Base position for exons and CDS on the Y axis
-                    EXON_HEIGHT = 0.02  # How 'tall' the exon markers should appear
-                        # Plot Exons as small markers on the X-axis
-                    for exon in st.session_state['exons']:
-                        exon_start, exon_end = exon['start'], exon['end']
-                        fig.add_trace(go.Bar(
-                            x=[(exon_start + exon_end) / 2],
-                            y=[EXON_HEIGHT],
-                            width=[exon_end - exon_start],
-                            base=EXON_BASE,
-                            marker_color='rgba(128, 0, 128, 0.5)',
-                            name='Exon'
-                        ))
-                    VERTICAL_GAP = 0.2  # Gap between different ranks
-                    # Define max and min Y values based on strand and rank
-                    MAX_STRAND_Y = 0.1  # Maximum Y value for positive strand results
-                    MIN_STRAND_Y = -0.1  # Minimum Y value for negative strand results
-                    # Iterate over top 5 sorted predictions to create the plot
-                    for i, prediction in enumerate(st.session_state['on_target_results'][:5], start=1):  # Only top 5
-                        chrom, start, end, strand, transcript, exon, target, gRNA, prediction_score = prediction
-                        midpoint = (int(start) + int(end)) / 2
-                        # Vertical position based on rank, modified by strand
-                        y_value = (MAX_STRAND_Y - (i - 1) * VERTICAL_GAP) if strand == '1' or strand == '+' else (
-                                MIN_STRAND_Y + (i - 1) * VERTICAL_GAP)
-                        fig.add_trace(go.Scatter(
-                            x=[midpoint],
-                            y=[y_value],
-                            mode='markers+text',
-                            marker=dict(symbol='triangle-up' if strand == '1' or strand == '+' else 'triangle-down',
-                                            size=12),
-                            text=f"Rank: {i}",  # Text label
-                            hoverinfo='text',
-                            hovertext=f"Rank: {i}<br>Chromosome: {chrom}<br>Target Sequence: {target}<br>gRNA: {gRNA}<br>Start: {start}<br>End: {end}<br>Strand: {'+' if strand == '1' or strand == '+' else '-'}<br>Transcript: {transcript}<br>Prediction: {prediction_score:.4f}",
-                        ))
-                    # Update layout for clarity and interaction
-                    fig.update_layout(
-                        title='Top 5 gRNA Sequences by Prediction Score',
-                        xaxis_title='Genomic Position',
-                        yaxis_title='Strand',
-                        yaxis=dict(tickvals=[MAX_STRAND_Y, MIN_STRAND_Y], ticktext=['+', '-']),
-                        showlegend=False,
-                        hovermode='x unified',
-                    )
-                    # Display the plot
-                    st.plotly_chart(fig)
-                    if 'gene_sequence' in st.session_state and st.session_state['gene_sequence']:
-                        gene_symbol = st.session_state['current_gene_symbol']
-                        gene_sequence = st.session_state['gene_sequence']
-                        # Define file paths
-                        genbank_file_path = f"{gene_symbol}_crispr_targets.gb"
-                        bed_file_path = f"{gene_symbol}_crispr_targets.bed"
-                        csv_file_path = f"{gene_symbol}_crispr_predictions.csv"
-                        plot_image_path = f"{gene_symbol}_gtracks_plot.png"
-                        # Generate files
-                        cas12lstm.generate_genbank_file_from_df(df, gene_sequence, gene_symbol, genbank_file_path)
-                        cas12lstm.create_bed_file_from_df(df, bed_file_path)
-                        cas12lstm.create_csv_from_df(df, csv_file_path)
-                        # Prepare an in-memory buffer for the ZIP file
-                        zip_buffer = io.BytesIO()
-                        with zipfile.ZipFile(zip_buffer, 'w', zipfile.ZIP_DEFLATED) as zip_file:
-                            # For each file, add it to the ZIP file
-                            zip_file.write(genbank_file_path)
-                            zip_file.write(bed_file_path)
-                            zip_file.write(csv_file_path)
-                        # Important: move the cursor to the beginning of the BytesIO buffer before reading it
-                        zip_buffer.seek(0)
-                        # Specify the region you want to visualize
-                        min_start = df['Start Pos'].min()
-                        max_end = df['End Pos'].max()
-                        chromosome = df['Chr'].mode()[0]  # Assumes most common chromosome is the target
-                        region = f"{chromosome}:{min_start}-{max_end}"
-                        # Generate the pyGenomeTracks plot
-                        gtracks_command = f"gtracks {region} {bed_file_path} {plot_image_path}"
-                        subprocess.run(gtracks_command, shell=True)
-                        st.image(plot_image_path)
-                        # Display the download button for the ZIP file
-                        st.download_button(
-                            label="Download GenBank, BED, CSV files as ZIP",
-                            data=zip_buffer.getvalue(),
-                            file_name=f"{gene_symbol}_files.zip",
-                            mime="application/zip"
-                        )
-        elif cas12target_selection == 'mutation':
-            # Prediction button
-            predict_button = st.button('Predict cas12')
-            vcf_reader = cyvcf2.VCF('SRR25934512.filter.snps.indels.vcf.gz')
-            if 'exons' not in st.session_state:
-                st.session_state['exons'] = []
-            # Process predictions
-            if predict_button and gene_symbol:
-                with st.spinner('Predicting... Please wait'):
-                    predictions, gene_sequence, exons = cas12lstmvcf.process_gene(gene_symbol, vcf_reader,
-                                                                                    cas12lstm_path)
-                    full_predictions = sorted(predictions, key=lambda x: x[8], reverse=True)
-                    sorted_predictions = sorted(predictions, key=lambda x: x[8], reverse=True)[:10]
-                    st.session_state['full_results'] = full_predictions
-                    st.session_state['on_target_results'] = sorted_predictions
-                    st.session_state['gene_sequence'] = gene_sequence  # Save gene sequence in session state
-                    st.session_state['exons'] = exons  # Store exon data
-                # Notify the user once the process is completed successfully.
-                st.success('Prediction completed!')
-                st.session_state['prediction_made'] = True
-                if 'on_target_results' in st.session_state and st.session_state['on_target_results']:
-                    ensembl_id = gene_annotations.get(gene_symbol,
-                                                          'Unknown')  # Get Ensembl ID or default to 'Unknown'
-                    col1, col2, col3 = st.columns(3)
-                    with col1:
-                        st.markdown("**Genome**")
-                        st.markdown("Homo sapiens")
-                    with col2:
-                        st.markdown("**Gene**")
-                        st.markdown(f"{gene_symbol} : {ensembl_id} (primary)")
-                    with col3:
-                        st.markdown("**Nuclease**")
-                        st.markdown("SpCas9")
-                    # Include "Target" in the DataFrame's columns
-                    try:
-                        df = pd.DataFrame(st.session_state['on_target_results'],
-                                            columns=["Gene Symbol", "Chr", "Strand", "Target Start", "Transcript",
-                                                    "Exon",
-                                                    "Target",
-                                                    "gRNA", "Prediction", "Is Mutation"])
-                        df_full = pd.DataFrame(st.session_state['full_results'],
-                                                columns=["Gene Symbol", "Chr", "Strand", "Target Start",
-                                                        "Transcript",
-                                                        "Exon", "Target",
-                                                        "gRNA", "Prediction", "Is Mutation"])
-                        st.dataframe(df)
-                    except ValueError as e:
-                        st.error(f"DataFrame creation error: {e}")
-                        # Optionally print or log the problematic data for debugging:
-                        print(st.session_state['on_target_results'])
-                    if 'gene_sequence' in st.session_state and st.session_state['gene_sequence']:
-                        gene_symbol = st.session_state['current_gene_symbol']
-                        gene_sequence = st.session_state['gene_sequence']
-                        # Define file paths
-                        genbank_file_path = f"{gene_symbol}_crispr_targets.gb"
-                        bed_file_path = f"{gene_symbol}_crispr_targets.bed"
-                        csv_file_path = f"{gene_symbol}_crispr_predictions.csv"
-                        plot_image_path = f"{gene_symbol}_gtracks_plot.png"
-                        # Generate files
-                        cas12lstmvcf.generate_genbank_file_from_df(df_full, gene_sequence, gene_symbol,
-                                                                  genbank_file_path)
-                        cas12lstmvcf.create_bed_file_from_df(df_full, bed_file_path)
-                        cas12lstmvcf.create_csv_from_df(df_full, csv_file_path)
-                        # Prepare an in-memory buffer for the ZIP file
-                        zip_buffer = io.BytesIO()
-                        with zipfile.ZipFile(zip_buffer, 'w', zipfile.ZIP_DEFLATED) as zip_file:
-                            # For each file, add it to the ZIP file
-                            zip_file.write(genbank_file_path)
-                            zip_file.write(bed_file_path)
-                            zip_file.write(csv_file_path)
-                        # Display the download button for the ZIP file
-                        st.download_button(
-                            label="Download GenBank, BED, CSV files as ZIP",
-                            data=zip_buffer.getvalue(),
-                            file_name=f"{gene_symbol}_files.zip",
-                            mime="application/zip"
-                        )
 elif selected_model == 'Cas13d':
         ENTRY_METHODS = dict(

                 st.experimental_rerun()
 elif selected_model == 'Cas12':
+    def visualize_and_generate_files(df, gene_sequence, exons, gene_symbol):
+        fig = go.Figure()
+        # Exon visualization
+        for exon in exons:
+            exon_start, exon_end = exon['start'], exon['end']
+            fig.add_trace(go.Bar(x=[(exon_start + exon_end) / 2], y=[0.5], width=[exon_end - exon_start], base=0,
+                                 marker_color='purple', name='Exon'))
+        # Prediction visualization
+        for i, prediction in enumerate(df.itertuples(), start=1):
+            fig.add_trace(go.Scatter(x=[(prediction.Start_Pos + prediction.End_Pos) / 2], y=[1], mode='markers',
+                                     marker=dict(size=10, color='blue'), name=f'Prediction {i}'))
+        fig.update_layout(title='Cas12 Prediction Visualization', xaxis_title='Position',
+                          yaxis=dict(tickvals=[0.5, 1], ticktext=['Exons', 'Predictions']), showlegend=True)
+        st.plotly_chart(fig)
+        # File generation and download
+        generate_and_download_files(df, gene_symbol)
+    def generate_and_download_files(df, gene_symbol):
+        genbank_file_path = f"{gene_symbol}_crispr_targets.gb"
+        bed_file_path = f"{gene_symbol}_crispr_targets.bed"
+        csv_file_path = f"{gene_symbol}_crispr_predictions.csv"
+        df.to_csv(csv_file_path, index=False)
+        # Assume functions to generate GenBank and BED are defined in cas12lstm or cas12lstmvcf
+        cas12lstm.generate_genbank_file_from_df(df, gene_symbol, genbank_file_path)
+        cas12lstm.create_bed_file_from_df(df, bed_file_path)
+        zip_buffer = io.BytesIO()
+        with zipfile.ZipFile(zip_buffer, 'w', zipfile.ZIP_DEFLATED) as zip_file:
+            zip_file.write(genbank_file_path)
+            zip_file.write(bed_file_path)
+            zip_file.write(csv_file_path)
+        zip_buffer.seek(0)
+        st.download_button("Download GenBank, BED, CSV files as ZIP", data=zip_buffer.getvalue(),
+                           file_name=f"{gene_symbol}_files.zip", mime="application/zip")
+    def display_results(predictions, gene_sequence, exons, gene_symbol):
+        st.success('Prediction completed!')
+        ensembl_id = gene_annotations.get(gene_symbol, 'Unknown')
+        st.write(f"**Genome:** Homo sapiens")
+        st.write(f"**Gene:** {gene_symbol} : {ensembl_id} (primary)")
+        st.write("**Nuclease:** Cas12")
+        df = pd.DataFrame(predictions,
+                          columns=["Chr", "Start Pos", "End Pos", "Strand", "Transcript", "Exon", "Target", "gRNA",
+                                   "Prediction"])
+        st.dataframe(df)
+        # Visualization and file generation as demonstrated in the Cas9 example
+        visualize_and_generate_files(df, gene_sequence, exons, gene_symbol)
     cas12target_selection = st.radio(
+        "Select either regular or mutation:",
         ('regular', 'mutation'),
         key='cas12target_selection'
     )
     if 'current_gene_symbol' not in st.session_state:
         st.session_state['current_gene_symbol'] = ""
     def clean_up_old_files(gene_symbol):
+        for suffix in ['_crispr_targets.gb', '_crispr_targets.bed', '_crispr_predictions.csv']:
+            file_path = f"{gene_symbol}{suffix}"
+            if os.path.exists(file_path):
+                os.remove(file_path)
+    gene_symbol = st.selectbox(
+        'Enter a Gene Symbol:',
+        [''] + gene_symbol_list,
+        key='gene_symbol',
+        format_func=lambda x: x if x else ""
+    )
+    if gene_symbol != st.session_state['current_gene_symbol']:
         if st.session_state['current_gene_symbol']:
             clean_up_old_files(st.session_state['current_gene_symbol'])
         st.session_state['current_gene_symbol'] = gene_symbol
+    if cas12target_selection == 'regular':
+        if st.button('Predict cas12 Regular'):
+            with st.spinner('Predicting... Please wait'):
+                predictions, gene_sequence, exons = cas12lstm.process_gene(gene_symbol, cas12lstm_path)
+                sorted_predictions = sorted(predictions, key=lambda x: x[8], reverse=True)[:10]
+                display_results(sorted_predictions, gene_sequence, exons, gene_symbol)
+    elif cas12target_selection == 'mutation':
+        vcf_reader = cyvcf2.VCF('SRR25934512.filter.snps.indels.vcf.gz')
+        if st.button('Predict cas12 Mutation'):
+            with st.spinner('Predicting... Please wait'):
+                predictions, gene_sequence, exons = cas12lstmvcf.process_gene(gene_symbol, vcf_reader, cas12lstm_path)
+                sorted_predictions = sorted(predictions, key=lambda x: x[8], reverse=True)[:10]
+                display_results(sorted_predictions, gene_sequence, exons, gene_symbol)
 elif selected_model == 'Cas13d':
         ENTRY_METHODS = dict(