Spaces:

NiniCat
/

CRISPRTool

Sleeping

App Files Files Community

supercat666 commited on Mar 10

Commit

3023ae4

•

1 Parent(s): 379f333

fix

Browse files

Files changed (3) hide show

app.py +19 -14
cas9on.py +31 -32
crisprTool.md +2 -1

app.py CHANGED Viewed

@@ -181,12 +181,17 @@ if selected_model == 'Cas9':
                 # Include "Target" in the DataFrame's columns
                 try:
                     df = pd.DataFrame(st.session_state['on_target_results'],
-                                      columns=["Gene ID", "Start Pos", "End Pos", "Strand", "Target", "gRNA", "Prediction"])
                     st.dataframe(df)
                 except ValueError as e:
                     st.error(f"DataFrame creation error: {e}")
                     # Optionally print or log the problematic data for debugging:
                     print(st.session_state['on_target_results'])
                 # Initialize Plotly figure
                 fig = go.Figure()
@@ -219,17 +224,16 @@ if selected_model == 'Cas9':
                         name='CDS'
                     ))
-                # Adjust hover interaction and strand plotting
-                MAX_STRAND_Y = 0.5  # Maximum Y value for positive strand
-                MIN_STRAND_Y = -0.5  # Minimum Y value for negative strand
-                # Iterate over sorted predictions to create the plot
-                for i, prediction in enumerate(st.session_state['on_target_results'], start=1):
-                    chrom, start, end, strand, target, gRNA, pred_score = prediction
                     midpoint = (int(start) + int(end)) / 2
-                    # Position based on strand, but within a fixed range
-                    y_value = MAX_STRAND_Y if strand == '1' else MIN_STRAND_Y
                     fig.add_trace(go.Scatter(
                         x=[midpoint],
@@ -238,18 +242,19 @@ if selected_model == 'Cas9':
                         marker=dict(symbol='triangle-up' if strand == '1' else 'triangle-down', size=12),
                         text=f"Rank: {i}",  # Text label
                         hoverinfo='text',
-                        hovertext=f"Rank: {i}<br>Chromosome: {chrom}<br>Target Sequence: {target}<br>gRNA: {gRNA}<br>Start: {start}<br>End: {end}<br>Strand: {'+' if strand == '1' else '-'}<br>Prediction Score: {pred_score:.4f}",
                     ))
                 # Update layout for clarity and interaction
                 fig.update_layout(
-                    title='Top 10 gRNA Sequences by Prediction Score',
                     xaxis_title='Genomic Position',
-                    yaxis_title='Strand',
-                    yaxis=dict(range=[MIN_STRAND_Y - 0.1, MAX_STRAND_Y + 0.1]),  # Fix y-axis range
                     showlegend=False,
                     hovermode='closest',  # Adjust hover mode
-                    hoverdistance=20,  # Reduce hover distance to improve accuracy
                 )
                 # Display the plot

                 # Include "Target" in the DataFrame's columns
                 try:
                     df = pd.DataFrame(st.session_state['on_target_results'],
+                                      columns=["Chr", "Start Pos", "End Pos", "Strand", "Transcript", "Target", "gRNA", "pred_Score"])
                     st.dataframe(df)
                 except ValueError as e:
                     st.error(f"DataFrame creation error: {e}")
                     # Optionally print or log the problematic data for debugging:
                     print(st.session_state['on_target_results'])
+                # Initialize Plotly figure
+                # Adjust hover interaction and strand plotting
+                MAX_STRAND_Y = 0.5  # Maximum Y value for positive strand
+                MIN_STRAND_Y = -0.5  # Minimum Y value for negative strand
                 # Initialize Plotly figure
                 fig = go.Figure()
                         name='CDS'
                     ))
+                # Define the vertical separation for each rank
+                VERTICAL_GAP = 0.2  # Gap between different ranks
+                # Iterate over top 5 sorted predictions to create the plot
+                for i, prediction in enumerate(st.session_state['on_target_results'][:5], start=1):  # Only top 5
+                    chrom, start, end, strand, transcript, target, gRNA, pred_score = prediction
                     midpoint = (int(start) + int(end)) / 2
+                    # Vertical position based on rank, spaced by VERTICAL_GAP
+                    y_value = MAX_STRAND_Y - (i - 1) * VERTICAL_GAP
                     fig.add_trace(go.Scatter(
                         x=[midpoint],
                         marker=dict(symbol='triangle-up' if strand == '1' else 'triangle-down', size=12),
                         text=f"Rank: {i}",  # Text label
                         hoverinfo='text',
+                        hovertext=f"Rank: {i}<br>Chromosome: {chrom}<br>Target Sequence: {target}<br>gRNA: {gRNA}<br>Start: {start}<br>End: {end}<br>Strand: {'+' if strand == '1' else '-'}<br>Transcript:{transcript}<br>Prediction Score: {pred_score:.4f}",
                     ))
                 # Update layout for clarity and interaction
                 fig.update_layout(
+                    title='Top 5 gRNA Sequences by Prediction Score',
                     xaxis_title='Genomic Position',
+                    yaxis_title='Rank / Strand',
+                    yaxis=dict(range=[MAX_STRAND_Y - 5 * VERTICAL_GAP, MAX_STRAND_Y + 0.1]),
+                    # Adjust y-axis range to fit 5 ranks
                     showlegend=False,
                     hovermode='closest',  # Adjust hover mode
+                    hoverdistance=10,  # Reduce hover distance to improve accuracy
                 )
                 # Display the plot

cas9on.py CHANGED Viewed

@@ -52,12 +52,13 @@ def format_prediction_output(targets, model_path):
         prediction = dcModel.ontar_predict(encoded_seq)
         # Format output
-        gRNA = target[1]
         chr = target[2]
         start = target[3]
         end = target[4]
         strand = target[5]
-        formatted_data.append([chr, start, end, strand, target[0], gRNA, prediction[0]])
     return formatted_data
@@ -89,18 +90,41 @@ def fetch_ensembl_sequence(transcript_id):
         print(f"Error fetching sequence data from Ensembl: {response.text}")
         return None
-def find_crispr_targets(sequence, chr, start, strand, pam="NGG", target_length=20):
     targets = []
     len_sequence = len(sequence)
     for i in range(len_sequence - len(pam) + 1):
         if sequence[i + 1:i + 3] == pam[1:]:
             if i >= target_length:
                 target_seq = sequence[i - target_length:i + 3]
                 tar_start = start + i - target_length
                 tar_end = start + i + 3
-                gRNA = sequence[i - target_length:i]
-                targets.append([target_seq, gRNA, chr, str(tar_start), str(tar_end), str(strand)])
     return targets
@@ -111,21 +135,16 @@ def process_gene(gene_symbol, model_path):
     gene_sequence = ''  # Initialize an empty string for the gene sequence
     if transcripts:
         for transcript in transcripts:
-            transcript_id = transcript['id']
             chr = transcript.get('seq_region_name', 'unknown')
             start = transcript.get('start', 0)
             strand = transcript.get('strand', 'unknown')
             # Fetch the sequence here and concatenate if multiple transcripts
             gene_sequence += fetch_ensembl_sequence(transcript_id) or ''
             # Fetch exon and CDS information
             exons = fetch_ensembl_exons(transcript_id)
-            cds_list = fetch_ensembl_cds(transcript_id)
-            # You might want to do something specific with exons and CDS information here
-            # For example, store them, print them, or include them in your analysis
             if gene_sequence:
                 gRNA_sites = find_crispr_targets(gene_sequence, chr, start, strand)
                 if gRNA_sites:
@@ -135,26 +154,6 @@ def process_gene(gene_symbol, model_path):
     # Return the data, fetched sequence, and possibly exon/CDS data
     return all_data, gene_sequence, exons, cds_list
-def fetch_ensembl_exons(transcript_id):
-    """Fetch exon information for a given transcript from Ensembl."""
-    url = f"https://rest.ensembl.org/overlap/id/{transcript_id}?feature=exon;content-type=application/json"
-    response = requests.get(url)
-    if response.status_code == 200:
-        return response.json()  # Returns a list of exons for the transcript
-    else:
-        print(f"Error fetching exon data from Ensembl for transcript {transcript_id}: {response.text}")
-        return None
-def fetch_ensembl_cds(transcript_id):
-    """Fetch coding sequence (CDS) information for a given transcript from Ensembl."""
-    url = f"https://rest.ensembl.org/overlap/id/{transcript_id}?feature=cds;content-type=application/json"
-    response = requests.get(url)
-    if response.status_code == 200:
-        return response.json()  # Returns a list of CDS regions for the transcript
-    else:
-        print(f"Error fetching CDS data from Ensembl for transcript {transcript_id}: {response.text}")
-        return None
 def create_genbank_features(formatted_data):
     features = []
     for data in formatted_data:

         prediction = dcModel.ontar_predict(encoded_seq)
         # Format output
+        sgRNA = target[1]
         chr = target[2]
         start = target[3]
         end = target[4]
         strand = target[5]
+        transcript_id = target[6]
+        formatted_data.append([chr, start, end, strand, transcript_id, target[0], sgRNA, prediction[0]])
     return formatted_data
         print(f"Error fetching sequence data from Ensembl: {response.text}")
         return None
+def fetch_ensembl_exons(transcript_id):
+    """Fetch exon information for a given transcript from Ensembl."""
+    url = f"https://rest.ensembl.org/overlap/id/{transcript_id}?feature=exon;content-type=application/json"
+    response = requests.get(url)
+    if response.status_code == 200:
+        return response.json()  # Returns a list of exons for the transcript
+    else:
+        print(f"Error fetching exon data from Ensembl for transcript {transcript_id}: {response.text}")
+        return None
+def fetch_ensembl_cds(transcript_id):
+    """Fetch coding sequence (CDS) information for a given transcript from Ensembl."""
+    url = f"https://rest.ensembl.org/overlap/id/{transcript_id}?feature=cds;content-type=application/json"
+    response = requests.get(url)
+    if response.status_code == 200:
+        return response.json()  # Returns a list of CDS regions for the transcript
+    else:
+        print(f"Error fetching CDS data from Ensembl for transcript {transcript_id}: {response.text}")
+        return None
+def find_crispr_targets(sequence, chr, start, strand, transcript_id, pam="NGG", target_length=20):
     targets = []
     len_sequence = len(sequence)
+    complement = {'A': 'T', 'T': 'A', 'C': 'G', 'G': 'C'}
+    if strand == -1:
+        sequence = ''.join([complement[base] for base in reversed(sequence)])
     for i in range(len_sequence - len(pam) + 1):
         if sequence[i + 1:i + 3] == pam[1:]:
             if i >= target_length:
                 target_seq = sequence[i - target_length:i + 3]
                 tar_start = start + i - target_length
                 tar_end = start + i + 3
+                sgRNA = sequence[i - target_length:i]
+                targets.append([target_seq, sgRNA, chr, str(tar_start), str(tar_end), str(strand), transcript_id])
     return targets
     gene_sequence = ''  # Initialize an empty string for the gene sequence
     if transcripts:
+        cds_list = fetch_ensembl_cds(transcripts)
         for transcript in transcripts:
+            transcript_id = transcript['display_name']
             chr = transcript.get('seq_region_name', 'unknown')
             start = transcript.get('start', 0)
             strand = transcript.get('strand', 'unknown')
             # Fetch the sequence here and concatenate if multiple transcripts
             gene_sequence += fetch_ensembl_sequence(transcript_id) or ''
             # Fetch exon and CDS information
             exons = fetch_ensembl_exons(transcript_id)
             if gene_sequence:
                 gRNA_sites = find_crispr_targets(gene_sequence, chr, start, strand)
                 if gRNA_sites:
     # Return the data, fetched sequence, and possibly exon/CDS data
     return all_data, gene_sequence, exons, cds_list
 def create_genbank_features(formatted_data):
     features = []
     for data in formatted_data:

crisprTool.md CHANGED Viewed

@@ -1,4 +1,5 @@
 CRISPR Online Tool for Cas9/Cas12/Cas13d Efficacy Prediction
-You are using version 0.2b of this tool.

 CRISPR Online Tool for Cas9/Cas12/Cas13d Efficacy Prediction
+You are using version 1.0b of this tool.
+Note: Once you click the download button, the page will automatically refresh