Spaces:

NiniCat
/

CRISPRTool

Sleeping

App Files Files Community

supercat666 commited on Mar 25

Commit

9999544

•

1 Parent(s): 22fbe15

fix

Browse files

Files changed (2) hide show

app.py +9 -10
cas9on.py +54 -45

app.py CHANGED Viewed

@@ -275,18 +275,17 @@ if selected_model == 'Cas9':
                 if 'gene_sequence' in st.session_state and st.session_state['gene_sequence']:
                     gene_symbol = st.session_state['current_gene_symbol']
                     gene_sequence = st.session_state['gene_sequence']
-                    df = st.session_state['on_target_results']  # Make sure df is the DataFrame you want to use
                     # Define file paths
-                    genbank_file_path = f"{gene_symbol}_crispr_targets.gb"
-                    bed_file_path = f"{gene_symbol}_crispr_targets.bed"
-                    csv_file_path = f"{gene_symbol}_crispr_predictions.csv"
                     bigwig_file_path = f"{gene_symbol}_crispr_predictions.bw"
                     # Generate files
-                    cas9on.generate_genbank_file_from_df(df, gene_sequence, gene_symbol, genbank_file_path)
-                    cas9on.create_bed_file_from_df(df, bed_file_path)
-                    cas9on.create_csv_from_df(df, csv_file_path)
                     # Assuming create_bigwig_from_df is a function that generates a BigWig file from the DataFrame
                     cas9on.create_bigwig_from_df(df, bigwig_file_path)
@@ -295,9 +294,9 @@ if selected_model == 'Cas9':
                     zip_buffer = io.BytesIO()
                     with zipfile.ZipFile(zip_buffer, 'w', zipfile.ZIP_DEFLATED) as zip_file:
                         # For each file, add it to the ZIP file
-                        zip_file.write(genbank_file_path)
-                        zip_file.write(bed_file_path)
-                        zip_file.write(csv_file_path)
                         zip_file.write(bigwig_file_path)
                     # Important: move the cursor to the beginning of the BytesIO buffer before reading it

                 if 'gene_sequence' in st.session_state and st.session_state['gene_sequence']:
                     gene_symbol = st.session_state['current_gene_symbol']
                     gene_sequence = st.session_state['gene_sequence']
                     # Define file paths
+                    # genbank_file_path = f"{gene_symbol}_crispr_targets.gb"
+                    # bed_file_path = f"{gene_symbol}_crispr_targets.bed"
+                    # csv_file_path = f"{gene_symbol}_crispr_predictions.csv"
                     bigwig_file_path = f"{gene_symbol}_crispr_predictions.bw"
                     # Generate files
+                    # cas9on.generate_genbank_file_from_df(df, gene_sequence, gene_symbol, genbank_file_path)
+                    # cas9on.create_bed_file_from_df(df, bed_file_path)
+                    # cas9on.create_csv_from_df(df, csv_file_path)
                     # Assuming create_bigwig_from_df is a function that generates a BigWig file from the DataFrame
                     cas9on.create_bigwig_from_df(df, bigwig_file_path)
                     zip_buffer = io.BytesIO()
                     with zipfile.ZipFile(zip_buffer, 'w', zipfile.ZIP_DEFLATED) as zip_file:
                         # For each file, add it to the ZIP file
+                        # zip_file.write(genbank_file_path)
+                        # zip_file.write(bed_file_path)
+                        # zip_file.write(csv_file_path)
                         zip_file.write(bigwig_file_path)
                     # Important: move the cursor to the beginning of the BytesIO buffer before reading it

cas9on.py CHANGED Viewed

@@ -147,51 +147,60 @@ def process_gene(gene_symbol, model_path):
     return results, all_gene_sequences, all_exons
-def create_genbank_features(df):
-    features = []
-    for index, row in df.iterrows():
-        # Convert strand from '+/-' to 1/-1 for Biopython
-        strand = 1 if row['Strand'] == '+' else -1 if row['Strand'] == '-' else 0
-        # Create feature location using the 'Start Pos' and 'End Pos'
-        location = FeatureLocation(start=int(row['Start Pos']), end=int(row['End Pos']), strand=strand)
-        # Create a SeqFeature
-        feature = SeqFeature(location=location, type="misc_feature", qualifiers={
-            'label': row['gRNA'],  # Use gRNA as the label
-            'target': row['Target'],  # Include the target sequence
-            'note': f"Prediction: {row['Prediction']:.4f}",  # Include the prediction score
-            'transcript_id': row['Transcript'],
-            'exon_id': row['Exon']
-        })
-        features.append(feature)
-    return features
-def generate_genbank_file_from_df(df, gene_sequence, gene_symbol, output_path):
-    features = create_genbank_features(df)
-    record = SeqRecord(Seq(gene_sequence), id=gene_symbol, name=gene_symbol,
-                       description=f'CRISPR Cas9 predicted targets for {gene_symbol}', features=features)
-    record.annotations["molecule_type"] = "DNA"
-    SeqIO.write(record, output_path, "genbank")
-def create_bed_file_from_df(df, output_path):
-    with open(output_path, 'w') as bed_file:
-        for index, row in df.iterrows():
-            chrom = row["Chr"]
-            start = int(row["Start Pos"])  # Assuming 'Start Pos' is the column name in the df
-            end = int(row["End Pos"])  # Assuming 'End Pos' is the column name in the df
-            strand = '+' if row["Strand"] == '1' else '-'  # Assuming 'Strand' is the column name in the df
-            gRNA = row["gRNA"]
-            score = str(row["Prediction"])
-            transcript_id = row["Transcript"]  # Assuming 'Transcript' is the column name in the df
-            bed_file.write(f"{chrom}\t{start}\t{end}\t{gRNA}\t{score}\t{strand}\t{transcript_id}\n")
-def create_csv_from_df(df, output_path):
-    df.to_csv(output_path, index=False)
 def create_bigwig(df, bigwig_path, chrom_sizes_path):

     return results, all_gene_sequences, all_exons
+# def create_genbank_features(data):
+#     features = []
+#
+#     # If the input data is a DataFrame, convert it to a list of lists
+#     if isinstance(data, pd.DataFrame):
+#         formatted_data = data.values.tolist()
+#     elif isinstance(data, list):
+#         formatted_data = data
+#     else:
+#         raise TypeError("Data should be either a list or a pandas DataFrame.")
+#
+#     for row in formatted_data:
+#         try:
+#             start = int(row[1])
+#             end = int(row[2])
+#         except ValueError as e:
+#             print(f"Error converting start/end to int: {row[1]}, {row[2]} - {e}")
+#             continue
+#
+#         strand = 1 if row[3] == '+' else -1
+#         location = FeatureLocation(start=start, end=end, strand=strand)
+#         feature = SeqFeature(location=location, type="misc_feature", qualifiers={
+#             'label': row[7],  # Use gRNA as the label
+#             'note': f"Prediction: {row[8]}"  # Include the prediction score
+#         })
+#         features.append(feature)
+#
+#     return features
+#
+#
+# def generate_genbank_file_from_df(df, gene_sequence, gene_symbol, output_path):
+#     features = create_genbank_features(df)
+#     record = SeqRecord(Seq(gene_sequence), id=gene_symbol, name=gene_symbol,
+#                        description=f'CRISPR Cas9 predicted targets for {gene_symbol}', features=features)
+#     record.annotations["molecule_type"] = "DNA"
+#     SeqIO.write(record, output_path, "genbank")
+#
+#
+# def create_bed_file_from_df(df, output_path):
+#     with open(output_path, 'w') as bed_file:
+#         for index, row in df.iterrows():
+#             chrom = row["Chr"]
+#             start = int(row["Start Pos"])  # Assuming 'Start Pos' is the column name in the df
+#             end = int(row["End Pos"])  # Assuming 'End Pos' is the column name in the df
+#             strand = '+' if row["Strand"] == '1' else '-'  # Assuming 'Strand' is the column name in the df
+#             gRNA = row["gRNA"]
+#             score = str(row["Prediction"])
+#             transcript_id = row["Transcript"]  # Assuming 'Transcript' is the column name in the df
+#
+#             bed_file.write(f"{chrom}\t{start}\t{end}\t{gRNA}\t{score}\t{strand}\t{transcript_id}\n")
+#
+#
+# def create_csv_from_df(df, output_path):
+#     df.to_csv(output_path, index=False)
 def create_bigwig(df, bigwig_path, chrom_sizes_path):