supercat666 commited on
Commit
a6e4e6b
1 Parent(s): 3ccc65d
Files changed (1) hide show
  1. cas9on.py +19 -11
cas9on.py CHANGED
@@ -203,37 +203,45 @@ def process_gene(gene_symbol, model_path):
203
  # df.to_csv(output_path, index=False)
204
 
205
 
 
 
 
206
  def create_bigwig(df, bigwig_path):
207
- # Ensure the dataframe has the required columns
208
  required_columns = ["Chr", "Start Pos", "End Pos", "Prediction"]
209
  if not all(column in df.columns for column in required_columns):
210
  raise ValueError(f"DataFrame must contain {required_columns} columns.")
211
 
212
- # Convert columns to appropriate types and sort
213
  df['Start Pos'] = df['Start Pos'].astype(int)
214
  df['End Pos'] = df['End Pos'].astype(int)
215
  df['Prediction'] = df['Prediction'].astype(float)
 
 
216
  df = df.sort_values(by=['Chr', 'Start Pos'])
217
 
218
- # Prepare the BigWig header with chromosome sizes
219
  chr_sizes = df.groupby('Chr')['End Pos'].max().to_dict()
220
  header = [(chr, size) for chr, size in chr_sizes.items()]
221
 
222
- # Initialize the BigWig file
223
  bw = pyBigWig.open(bigwig_path, "w")
224
  bw.addHeader(header)
225
 
226
- # Iterate over each chromosome and add entries in sorted order
227
- for chr in sorted(df['Chr'].unique()):
228
- chrom_df = df[df['Chr'] == chr]
229
- starts = chrom_df['Start Pos'].tolist()
230
- ends = chrom_df['End Pos'].tolist()
231
- values = chrom_df['Prediction'].tolist()
232
- bw.addEntries([chr] * len(starts), starts, ends=ends, values=values)
233
 
 
234
  bw.close()
235
 
236
 
237
 
238
 
239
 
 
 
 
 
203
  # df.to_csv(output_path, index=False)
204
 
205
 
206
+ import pyBigWig
207
+
208
+
209
  def create_bigwig(df, bigwig_path):
210
+ # Check for required columns in the DataFrame
211
  required_columns = ["Chr", "Start Pos", "End Pos", "Prediction"]
212
  if not all(column in df.columns for column in required_columns):
213
  raise ValueError(f"DataFrame must contain {required_columns} columns.")
214
 
215
+ # Convert columns to the correct types
216
  df['Start Pos'] = df['Start Pos'].astype(int)
217
  df['End Pos'] = df['End Pos'].astype(int)
218
  df['Prediction'] = df['Prediction'].astype(float)
219
+
220
+ # Sort the DataFrame by chromosome and start position to ensure order
221
  df = df.sort_values(by=['Chr', 'Start Pos'])
222
 
223
+ # Calculate chromosome sizes for the BigWig header
224
  chr_sizes = df.groupby('Chr')['End Pos'].max().to_dict()
225
  header = [(chr, size) for chr, size in chr_sizes.items()]
226
 
227
+ # Create the BigWig file and add the header
228
  bw = pyBigWig.open(bigwig_path, "w")
229
  bw.addHeader(header)
230
 
231
+ # Add entries for each chromosome
232
+ for chr, group in df.groupby('Chr'):
233
+ starts = group['Start Pos'].tolist()
234
+ ends = group['End Pos'].tolist()
235
+ values = group['Prediction'].tolist()
236
+ bw.addEntries(chr, starts, ends=ends, values=values)
 
237
 
238
+ # Close the BigWig file
239
  bw.close()
240
 
241
 
242
 
243
 
244
 
245
+
246
+
247
+