supercat666 commited on
Commit
9999544
1 Parent(s): 22fbe15
Files changed (2) hide show
  1. app.py +9 -10
  2. cas9on.py +54 -45
app.py CHANGED
@@ -275,18 +275,17 @@ if selected_model == 'Cas9':
275
  if 'gene_sequence' in st.session_state and st.session_state['gene_sequence']:
276
  gene_symbol = st.session_state['current_gene_symbol']
277
  gene_sequence = st.session_state['gene_sequence']
278
- df = st.session_state['on_target_results'] # Make sure df is the DataFrame you want to use
279
 
280
  # Define file paths
281
- genbank_file_path = f"{gene_symbol}_crispr_targets.gb"
282
- bed_file_path = f"{gene_symbol}_crispr_targets.bed"
283
- csv_file_path = f"{gene_symbol}_crispr_predictions.csv"
284
  bigwig_file_path = f"{gene_symbol}_crispr_predictions.bw"
285
 
286
  # Generate files
287
- cas9on.generate_genbank_file_from_df(df, gene_sequence, gene_symbol, genbank_file_path)
288
- cas9on.create_bed_file_from_df(df, bed_file_path)
289
- cas9on.create_csv_from_df(df, csv_file_path)
290
 
291
  # Assuming create_bigwig_from_df is a function that generates a BigWig file from the DataFrame
292
  cas9on.create_bigwig_from_df(df, bigwig_file_path)
@@ -295,9 +294,9 @@ if selected_model == 'Cas9':
295
  zip_buffer = io.BytesIO()
296
  with zipfile.ZipFile(zip_buffer, 'w', zipfile.ZIP_DEFLATED) as zip_file:
297
  # For each file, add it to the ZIP file
298
- zip_file.write(genbank_file_path)
299
- zip_file.write(bed_file_path)
300
- zip_file.write(csv_file_path)
301
  zip_file.write(bigwig_file_path)
302
 
303
  # Important: move the cursor to the beginning of the BytesIO buffer before reading it
 
275
  if 'gene_sequence' in st.session_state and st.session_state['gene_sequence']:
276
  gene_symbol = st.session_state['current_gene_symbol']
277
  gene_sequence = st.session_state['gene_sequence']
 
278
 
279
  # Define file paths
280
+ # genbank_file_path = f"{gene_symbol}_crispr_targets.gb"
281
+ # bed_file_path = f"{gene_symbol}_crispr_targets.bed"
282
+ # csv_file_path = f"{gene_symbol}_crispr_predictions.csv"
283
  bigwig_file_path = f"{gene_symbol}_crispr_predictions.bw"
284
 
285
  # Generate files
286
+ # cas9on.generate_genbank_file_from_df(df, gene_sequence, gene_symbol, genbank_file_path)
287
+ # cas9on.create_bed_file_from_df(df, bed_file_path)
288
+ # cas9on.create_csv_from_df(df, csv_file_path)
289
 
290
  # Assuming create_bigwig_from_df is a function that generates a BigWig file from the DataFrame
291
  cas9on.create_bigwig_from_df(df, bigwig_file_path)
 
294
  zip_buffer = io.BytesIO()
295
  with zipfile.ZipFile(zip_buffer, 'w', zipfile.ZIP_DEFLATED) as zip_file:
296
  # For each file, add it to the ZIP file
297
+ # zip_file.write(genbank_file_path)
298
+ # zip_file.write(bed_file_path)
299
+ # zip_file.write(csv_file_path)
300
  zip_file.write(bigwig_file_path)
301
 
302
  # Important: move the cursor to the beginning of the BytesIO buffer before reading it
cas9on.py CHANGED
@@ -147,51 +147,60 @@ def process_gene(gene_symbol, model_path):
147
  return results, all_gene_sequences, all_exons
148
 
149
 
150
- def create_genbank_features(df):
151
- features = []
152
- for index, row in df.iterrows():
153
- # Convert strand from '+/-' to 1/-1 for Biopython
154
- strand = 1 if row['Strand'] == '+' else -1 if row['Strand'] == '-' else 0
155
-
156
- # Create feature location using the 'Start Pos' and 'End Pos'
157
- location = FeatureLocation(start=int(row['Start Pos']), end=int(row['End Pos']), strand=strand)
158
-
159
- # Create a SeqFeature
160
- feature = SeqFeature(location=location, type="misc_feature", qualifiers={
161
- 'label': row['gRNA'], # Use gRNA as the label
162
- 'target': row['Target'], # Include the target sequence
163
- 'note': f"Prediction: {row['Prediction']:.4f}", # Include the prediction score
164
- 'transcript_id': row['Transcript'],
165
- 'exon_id': row['Exon']
166
- })
167
- features.append(feature)
168
- return features
169
-
170
-
171
- def generate_genbank_file_from_df(df, gene_sequence, gene_symbol, output_path):
172
- features = create_genbank_features(df)
173
- record = SeqRecord(Seq(gene_sequence), id=gene_symbol, name=gene_symbol,
174
- description=f'CRISPR Cas9 predicted targets for {gene_symbol}', features=features)
175
- record.annotations["molecule_type"] = "DNA"
176
- SeqIO.write(record, output_path, "genbank")
177
-
178
-
179
- def create_bed_file_from_df(df, output_path):
180
- with open(output_path, 'w') as bed_file:
181
- for index, row in df.iterrows():
182
- chrom = row["Chr"]
183
- start = int(row["Start Pos"]) # Assuming 'Start Pos' is the column name in the df
184
- end = int(row["End Pos"]) # Assuming 'End Pos' is the column name in the df
185
- strand = '+' if row["Strand"] == '1' else '-' # Assuming 'Strand' is the column name in the df
186
- gRNA = row["gRNA"]
187
- score = str(row["Prediction"])
188
- transcript_id = row["Transcript"] # Assuming 'Transcript' is the column name in the df
189
-
190
- bed_file.write(f"{chrom}\t{start}\t{end}\t{gRNA}\t{score}\t{strand}\t{transcript_id}\n")
191
-
192
-
193
- def create_csv_from_df(df, output_path):
194
- df.to_csv(output_path, index=False)
 
 
 
 
 
 
 
 
 
195
 
196
 
197
  def create_bigwig(df, bigwig_path, chrom_sizes_path):
 
147
  return results, all_gene_sequences, all_exons
148
 
149
 
150
+ # def create_genbank_features(data):
151
+ # features = []
152
+ #
153
+ # # If the input data is a DataFrame, convert it to a list of lists
154
+ # if isinstance(data, pd.DataFrame):
155
+ # formatted_data = data.values.tolist()
156
+ # elif isinstance(data, list):
157
+ # formatted_data = data
158
+ # else:
159
+ # raise TypeError("Data should be either a list or a pandas DataFrame.")
160
+ #
161
+ # for row in formatted_data:
162
+ # try:
163
+ # start = int(row[1])
164
+ # end = int(row[2])
165
+ # except ValueError as e:
166
+ # print(f"Error converting start/end to int: {row[1]}, {row[2]} - {e}")
167
+ # continue
168
+ #
169
+ # strand = 1 if row[3] == '+' else -1
170
+ # location = FeatureLocation(start=start, end=end, strand=strand)
171
+ # feature = SeqFeature(location=location, type="misc_feature", qualifiers={
172
+ # 'label': row[7], # Use gRNA as the label
173
+ # 'note': f"Prediction: {row[8]}" # Include the prediction score
174
+ # })
175
+ # features.append(feature)
176
+ #
177
+ # return features
178
+ #
179
+ #
180
+ # def generate_genbank_file_from_df(df, gene_sequence, gene_symbol, output_path):
181
+ # features = create_genbank_features(df)
182
+ # record = SeqRecord(Seq(gene_sequence), id=gene_symbol, name=gene_symbol,
183
+ # description=f'CRISPR Cas9 predicted targets for {gene_symbol}', features=features)
184
+ # record.annotations["molecule_type"] = "DNA"
185
+ # SeqIO.write(record, output_path, "genbank")
186
+ #
187
+ #
188
+ # def create_bed_file_from_df(df, output_path):
189
+ # with open(output_path, 'w') as bed_file:
190
+ # for index, row in df.iterrows():
191
+ # chrom = row["Chr"]
192
+ # start = int(row["Start Pos"]) # Assuming 'Start Pos' is the column name in the df
193
+ # end = int(row["End Pos"]) # Assuming 'End Pos' is the column name in the df
194
+ # strand = '+' if row["Strand"] == '1' else '-' # Assuming 'Strand' is the column name in the df
195
+ # gRNA = row["gRNA"]
196
+ # score = str(row["Prediction"])
197
+ # transcript_id = row["Transcript"] # Assuming 'Transcript' is the column name in the df
198
+ #
199
+ # bed_file.write(f"{chrom}\t{start}\t{end}\t{gRNA}\t{score}\t{strand}\t{transcript_id}\n")
200
+ #
201
+ #
202
+ # def create_csv_from_df(df, output_path):
203
+ # df.to_csv(output_path, index=False)
204
 
205
 
206
  def create_bigwig(df, bigwig_path, chrom_sizes_path):