Spaces:
Running
Running
supercat666
commited on
Commit
•
a6e4e6b
1
Parent(s):
3ccc65d
fix
Browse files
cas9on.py
CHANGED
@@ -203,37 +203,45 @@ def process_gene(gene_symbol, model_path):
|
|
203 |
# df.to_csv(output_path, index=False)
|
204 |
|
205 |
|
|
|
|
|
|
|
206 |
def create_bigwig(df, bigwig_path):
|
207 |
-
#
|
208 |
required_columns = ["Chr", "Start Pos", "End Pos", "Prediction"]
|
209 |
if not all(column in df.columns for column in required_columns):
|
210 |
raise ValueError(f"DataFrame must contain {required_columns} columns.")
|
211 |
|
212 |
-
# Convert columns to
|
213 |
df['Start Pos'] = df['Start Pos'].astype(int)
|
214 |
df['End Pos'] = df['End Pos'].astype(int)
|
215 |
df['Prediction'] = df['Prediction'].astype(float)
|
|
|
|
|
216 |
df = df.sort_values(by=['Chr', 'Start Pos'])
|
217 |
|
218 |
-
#
|
219 |
chr_sizes = df.groupby('Chr')['End Pos'].max().to_dict()
|
220 |
header = [(chr, size) for chr, size in chr_sizes.items()]
|
221 |
|
222 |
-
#
|
223 |
bw = pyBigWig.open(bigwig_path, "w")
|
224 |
bw.addHeader(header)
|
225 |
|
226 |
-
#
|
227 |
-
for chr in
|
228 |
-
|
229 |
-
|
230 |
-
|
231 |
-
|
232 |
-
bw.addEntries([chr] * len(starts), starts, ends=ends, values=values)
|
233 |
|
|
|
234 |
bw.close()
|
235 |
|
236 |
|
237 |
|
238 |
|
239 |
|
|
|
|
|
|
|
|
203 |
# df.to_csv(output_path, index=False)
|
204 |
|
205 |
|
206 |
+
import pyBigWig
|
207 |
+
|
208 |
+
|
209 |
def create_bigwig(df, bigwig_path):
|
210 |
+
# Check for required columns in the DataFrame
|
211 |
required_columns = ["Chr", "Start Pos", "End Pos", "Prediction"]
|
212 |
if not all(column in df.columns for column in required_columns):
|
213 |
raise ValueError(f"DataFrame must contain {required_columns} columns.")
|
214 |
|
215 |
+
# Convert columns to the correct types
|
216 |
df['Start Pos'] = df['Start Pos'].astype(int)
|
217 |
df['End Pos'] = df['End Pos'].astype(int)
|
218 |
df['Prediction'] = df['Prediction'].astype(float)
|
219 |
+
|
220 |
+
# Sort the DataFrame by chromosome and start position to ensure order
|
221 |
df = df.sort_values(by=['Chr', 'Start Pos'])
|
222 |
|
223 |
+
# Calculate chromosome sizes for the BigWig header
|
224 |
chr_sizes = df.groupby('Chr')['End Pos'].max().to_dict()
|
225 |
header = [(chr, size) for chr, size in chr_sizes.items()]
|
226 |
|
227 |
+
# Create the BigWig file and add the header
|
228 |
bw = pyBigWig.open(bigwig_path, "w")
|
229 |
bw.addHeader(header)
|
230 |
|
231 |
+
# Add entries for each chromosome
|
232 |
+
for chr, group in df.groupby('Chr'):
|
233 |
+
starts = group['Start Pos'].tolist()
|
234 |
+
ends = group['End Pos'].tolist()
|
235 |
+
values = group['Prediction'].tolist()
|
236 |
+
bw.addEntries(chr, starts, ends=ends, values=values)
|
|
|
237 |
|
238 |
+
# Close the BigWig file
|
239 |
bw.close()
|
240 |
|
241 |
|
242 |
|
243 |
|
244 |
|
245 |
+
|
246 |
+
|
247 |
+
|