File size: 32,667 Bytes
e93c659
 
4a303ce
 
0d0c645
dc94424
16e89c0
e93c659
 
73dcc35
105bca0
e93c659
49ebae9
 
544275a
d51aeae
544275a
3c85094
e93c659
aaf258e
0d0c645
aaf258e
6a7c0e6
aaf258e
2f35b39
306ab4d
c4a9d91
 
dfc8b26
45e4726
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0d0c645
 
83a2e73
 
e93c659
 
0d0c645
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83a2e73
0d0c645
 
 
83a2e73
0d0c645
 
 
83a2e73
0d0c645
 
 
83a2e73
a1203ca
 
 
743b6b7
9260769
 
a1203ca
 
9260769
 
 
a1203ca
 
 
2b3514d
a1203ca
0d0c645
 
 
 
 
 
 
 
c94ba08
 
 
 
 
 
 
 
 
 
 
 
a1203ca
 
 
9f2ad27
37deaa1
c94ba08
 
 
 
 
 
 
 
37deaa1
fcd198b
dd69d15
3488e95
242350b
 
 
fcd198b
dd69d15
37deaa1
16e89c0
56b7c8f
4a303ce
37deaa1
 
242350b
37deaa1
 
 
 
 
cf2d407
9260769
 
 
 
 
 
 
 
 
 
 
ba43ebe
7708ddd
 
4fa4501
7708ddd
 
 
 
 
3023ae4
5befd90
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c94ba08
45e4726
 
 
 
 
d51aeae
 
 
 
45e4726
 
d51aeae
4a303ce
 
 
45e4726
 
 
 
 
d51aeae
 
 
 
45e4726
 
 
 
544275a
 
 
 
 
45e4726
 
d51aeae
 
544275a
45e4726
 
 
d51aeae
45e4726
 
 
 
3d0dd11
a5afc1a
0d0c645
 
 
 
 
 
 
 
 
 
 
 
83a2e73
0d0c645
 
 
 
 
 
 
83a2e73
0d0c645
 
 
 
 
 
 
 
 
 
 
 
83a2e73
0d0c645
adf804d
66c57f6
c0e089e
adf804d
 
66c57f6
c0e089e
 
 
 
 
 
 
66c57f6
 
0d0c645
83a2e73
0d0c645
 
 
 
 
 
83a2e73
0d0c645
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83a2e73
0d0c645
 
 
 
 
 
 
 
83a2e73
0d0c645
a1203ca
 
7708ddd
dc94424
c94ba08
 
 
 
dc94424
 
 
c94ba08
 
 
 
 
 
 
 
 
 
 
 
 
dc94424
 
1acd869
16e89c0
 
 
1acd869
107852c
 
16e89c0
 
1acd869
16e89c0
dc94424
 
16e89c0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dc94424
 
16e89c0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3d0dd11
dc94424
aa8a757
16e89c0
e272fdd
16e89c0
 
 
dc94424
16e89c0
dc94424
7274bd3
16e89c0
dc94424
16e89c0
dc94424
16e89c0
 
 
 
dc94424
7274bd3
dc94424
 
 
49ebae9
 
 
 
7274bd3
dc94424
 
7274bd3
16e89c0
dc94424
7274bd3
16e89c0
 
 
49ebae9
 
 
 
 
16e89c0
 
 
49ebae9
 
 
 
16e89c0
 
 
 
 
 
 
 
 
 
 
49ebae9
 
16e89c0
49ebae9
 
 
3d0dd11
 
0d0c645
 
 
 
 
83a2e73
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e93c659
83a2e73
 
 
 
 
 
e93c659
83a2e73
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e93c659
83a2e73
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0d0c645
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
import os
import tiger
import cas9att
import cas9attvcf
import cas9off
import cas12
import cas12lstm
import pandas as pd
import streamlit as st
import plotly.graph_objs as go
import numpy as np
from pathlib import Path
import zipfile
import io
import gtracks
import subprocess



# title and documentation
st.markdown(Path('crisprTool.md').read_text(), unsafe_allow_html=True)
st.divider()

CRISPR_MODELS = ['Cas9', 'Cas12', 'Cas13d']

selected_model = st.selectbox('Select CRISPR model:', CRISPR_MODELS, key='selected_model')
cas9att_path = 'cas9_model/Cas9_MultiHeadAttention_weights.h5'
cas12_path = 'cas12_model/BiLSTM_Cpf1_weights.h5'

#plot functions
def generate_coolbox_plot(bigwig_path, region, output_image_path):
    frame = CoolBox()
    frame += BigWig(bigwig_path)
    frame.plot(region, savefig=output_image_path)

def generate_pygenometracks_plot(bigwig_file_path, region, output_image_path):
    # Define the configuration for pyGenomeTracks
    tracks = """
    [bigwig]
    file = {}
    height = 4
    color = blue
    min_value = 0
    max_value = 10
    """.format(bigwig_file_path)

    # Write the configuration to a temporary INI file
    config_file_path = "pygenometracks.ini"
    with open(config_file_path, 'w') as configfile:
        configfile.write(tracks)

    # Define the region to plot
    region_dict = {'chrom': region.split(':')[0],
                   'start': int(region.split(':')[1].split('-')[0]),
                   'end': int(region.split(':')[1].split('-')[1])}

    # Generate the plot
    plot_tracks(tracks_file=config_file_path,
                region=region_dict,
                out_file_name=output_image_path)

@st.cache_data
def convert_df(df):
            # IMPORTANT: Cache the conversion to prevent computation on every rerun
            return df.to_csv().encode('utf-8')


def mode_change_callback():
        if st.session_state.mode in {tiger.RUN_MODES['all'], tiger.RUN_MODES['titration']}:  # TODO: support titration
            st.session_state.check_off_targets = False
            st.session_state.disable_off_target_checkbox = True
        else:
            st.session_state.disable_off_target_checkbox = False


def progress_update(update_text, percent_complete):
        with progress.container():
            st.write(update_text)
            st.progress(percent_complete / 100)


def initiate_run():
        # initialize state variables
        st.session_state.transcripts = None
        st.session_state.input_error = None
        st.session_state.on_target = None
        st.session_state.titration = None
        st.session_state.off_target = None

        # initialize transcript DataFrame
        transcripts = pd.DataFrame(columns=[tiger.ID_COL, tiger.SEQ_COL])

        # manual entry
        if st.session_state.entry_method == ENTRY_METHODS['manual']:
            transcripts = pd.DataFrame({
                tiger.ID_COL: ['ManualEntry'],
                tiger.SEQ_COL: [st.session_state.manual_entry]
            }).set_index(tiger.ID_COL)

        # fasta file upload
        elif st.session_state.entry_method == ENTRY_METHODS['fasta']:
            if st.session_state.fasta_entry is not None:
                fasta_path = st.session_state.fasta_entry.name
                with open(fasta_path, 'w') as f:
                    f.write(st.session_state.fasta_entry.getvalue().decode('utf-8'))
                transcripts = tiger.load_transcripts([fasta_path], enforce_unique_ids=False)
                os.remove(fasta_path)

        # convert to upper case as used by tokenizer
        transcripts[tiger.SEQ_COL] = transcripts[tiger.SEQ_COL].apply(lambda s: s.upper().replace('U', 'T'))

        # ensure all transcripts have unique identifiers
        if transcripts.index.has_duplicates:
                st.session_state.input_error = "Duplicate transcript ID's detected in fasta file"

        # ensure all transcripts only contain nucleotides A, C, G, T, and wildcard N
        elif not all(transcripts[tiger.SEQ_COL].apply(lambda s: set(s).issubset(tiger.NUCLEOTIDE_TOKENS.keys()))):
            st.session_state.input_error = 'Transcript(s) must only contain upper or lower case A, C, G, and Ts or Us'

        # ensure all transcripts satisfy length requirements
        elif any(transcripts[tiger.SEQ_COL].apply(lambda s: len(s) < tiger.TARGET_LEN)):
            st.session_state.input_error = 'Transcript(s) must be at least {:d} bases.'.format(tiger.TARGET_LEN)

        # run model if we have any transcripts
        elif len(transcripts) > 0:
            st.session_state.transcripts = transcripts

def parse_gene_annotations(file_path):
    gene_dict = {}
    with open(file_path, 'r') as file:
        headers = file.readline().strip().split('\t')  # Assuming tab-delimited file
        symbol_idx = headers.index('Approved symbol')  # Find index of 'Approved symbol'
        ensembl_idx = headers.index('Ensembl gene ID')  # Find index of 'Ensembl gene ID'
        for line in file:
            values = line.strip().split('\t')
            # Ensure we have enough values and add mapping from symbol to Ensembl ID
            if len(values) > max(symbol_idx, ensembl_idx):
                gene_dict[values[symbol_idx]] = values[ensembl_idx]
    return gene_dict

# Replace 'your_annotation_file.txt' with the path to your actual gene annotation file
gene_annotations = parse_gene_annotations('Human_genes_HUGO_02242024_annotation.txt')
gene_symbol_list = list(gene_annotations.keys())  # List of gene symbols for the autocomplete feature
# Check if the selected model is Cas9
if selected_model == 'Cas9':
    # Use a radio button to select enzymes, making sure only one can be selected at a time
    target_selection = st.radio(
        "Select either on-target or off-target:",
        ('on-target', 'off-target'),
        key='target_selection'
    )
    if 'current_gene_symbol' not in st.session_state:
        st.session_state['current_gene_symbol'] = ""

    # Define a function to clean up old files
    def clean_up_old_files(gene_symbol):
        genbank_file_path = f"{gene_symbol}_crispr_targets.gb"
        bed_file_path = f"{gene_symbol}_crispr_targets.bed"
        csv_file_path = f"{gene_symbol}_crispr_predictions.csv"
        for path in [genbank_file_path, bed_file_path, csv_file_path]:
            if os.path.exists(path):
                os.remove(path)


    # Gene symbol entry with autocomplete-like feature
    gene_symbol = st.selectbox('Enter a Gene Symbol:', [''] + gene_symbol_list, key='gene_symbol',
                               format_func=lambda x: x if x else "")

    # Handle gene symbol change and file cleanup
    if gene_symbol != st.session_state['current_gene_symbol'] and gene_symbol:
        if st.session_state['current_gene_symbol']:
            # Clean up files only if a different gene symbol is entered and a previous symbol exists
            clean_up_old_files(st.session_state['current_gene_symbol'])
        # Update the session state with the new gene symbol
        st.session_state['current_gene_symbol'] = gene_symbol

    if target_selection == 'on-target':
        # Prediction button
        predict_button = st.button('Predict on-target')

        if 'exons' not in st.session_state:
            st.session_state['exons'] = []

        # Process predictions
        if predict_button and gene_symbol:
            with st.spinner('Predicting... Please wait'):
                predictions, gene_sequence, exons = cas9att.process_gene(gene_symbol, cas9att_path)

                sorted_predictions = sorted(predictions)[:10]
                st.session_state['on_target_results'] = sorted_predictions
                st.session_state['gene_sequence'] = gene_sequence  # Save gene sequence in session state
                st.session_state['exons'] = exons  # Store exon data

            # Notify the user once the process is completed successfully.
            st.success('Prediction completed!')
            st.session_state['prediction_made'] = True

            if 'on_target_results' in st.session_state and st.session_state['on_target_results']:
                ensembl_id = gene_annotations.get(gene_symbol, 'Unknown')  # Get Ensembl ID or default to 'Unknown'
                col1, col2, col3 = st.columns(3)
                with col1:
                    st.markdown("**Genome**")
                    st.markdown("Homo sapiens")
                with col2:
                    st.markdown("**Gene**")
                    st.markdown(f"{gene_symbol} : {ensembl_id} (primary)")
                with col3:
                    st.markdown("**Nuclease**")
                    st.markdown("SpCas9")
                # Include "Target" in the DataFrame's columns
                try:
                    df = pd.DataFrame(st.session_state['on_target_results'],
                                      columns=["Chr", "Start Pos", "End Pos", "Strand", "Transcript", "Exon", "Target", "gRNA", "Prediction"])
                    st.dataframe(df)
                except ValueError as e:
                    st.error(f"DataFrame creation error: {e}")
                    # Optionally print or log the problematic data for debugging:
                    print(st.session_state['on_target_results'])

                # Initialize Plotly figure
                fig = go.Figure()

                EXON_BASE = 0  # Base position for exons and CDS on the Y axis
                EXON_HEIGHT = 0.02  # How 'tall' the exon markers should appear

                # Plot Exons as small markers on the X-axis
                for exon in st.session_state['exons']:
                    exon_start, exon_end = exon['start'], exon['end']
                    fig.add_trace(go.Bar(
                        x=[(exon_start + exon_end) / 2],
                        y=[EXON_HEIGHT],
                        width=[exon_end - exon_start],
                        base=EXON_BASE,
                        marker_color='rgba(128, 0, 128, 0.5)',
                        name='Exon'
                    ))

                VERTICAL_GAP = 0.2  # Gap between different ranks

                # Define max and min Y values based on strand and rank
                MAX_STRAND_Y = 0.1  # Maximum Y value for positive strand results
                MIN_STRAND_Y = -0.1  # Minimum Y value for negative strand results

                # Iterate over top 5 sorted predictions to create the plot
                for i, prediction in enumerate(st.session_state['on_target_results'][:5], start=1):  # Only top 5
                    chrom, start, end, strand, transcript, exon, target, gRNA, prediction_score = prediction
                    midpoint = (int(start) + int(end)) / 2

                    # Vertical position based on rank, modified by strand
                    y_value = (MAX_STRAND_Y - (i - 1) * VERTICAL_GAP) if strand == '1' or strand == '+' else (
                            MIN_STRAND_Y + (i - 1) * VERTICAL_GAP)

                    fig.add_trace(go.Scatter(
                        x=[midpoint],
                        y=[y_value],
                        mode='markers+text',
                        marker=dict(symbol='triangle-up' if strand == '1' or strand == '+' else 'triangle-down',
                                    size=12),
                        text=f"Rank: {i}",  # Text label
                        hoverinfo='text',
                        hovertext=f"Rank: {i}<br>Chromosome: {chrom}<br>Target Sequence: {target}<br>gRNA: {gRNA}<br>Start: {start}<br>End: {end}<br>Strand: {'+' if strand == '1' or strand == '+' else '-'}<br>Transcript: {transcript}<br>Prediction: {prediction_score:.4f}",
                    ))

                # Update layout for clarity and interaction
                fig.update_layout(
                    title='Top 5 gRNA Sequences by Prediction Score',
                    xaxis_title='Genomic Position',
                    yaxis_title='Strand',
                    yaxis=dict(tickvals=[MAX_STRAND_Y, MIN_STRAND_Y], ticktext=['+', '-']),
                    showlegend=False,
                    hovermode='x unified',
                )

                # Display the plot
                st.plotly_chart(fig)

                if 'gene_sequence' in st.session_state and st.session_state['gene_sequence']:
                    gene_symbol = st.session_state['current_gene_symbol']
                    gene_sequence = st.session_state['gene_sequence']

                    # Define file paths
                    genbank_file_path = f"{gene_symbol}_crispr_targets.gb"
                    bed_file_path = f"{gene_symbol}_crispr_targets.bed"
                    csv_file_path = f"{gene_symbol}_crispr_predictions.csv"
                    plot_image_path = f"{gene_symbol}_gtracks_plot.png"


                    # Generate files
                    cas9att.generate_genbank_file_from_df(df, gene_sequence, gene_symbol, genbank_file_path)
                    cas9att.create_bed_file_from_df(df, bed_file_path)
                    cas9att.create_csv_from_df(df, csv_file_path)

                    # Prepare an in-memory buffer for the ZIP file
                    zip_buffer = io.BytesIO()
                    with zipfile.ZipFile(zip_buffer, 'w', zipfile.ZIP_DEFLATED) as zip_file:
                        # For each file, add it to the ZIP file
                        zip_file.write(genbank_file_path)
                        zip_file.write(bed_file_path)
                        zip_file.write(csv_file_path)


                    # Important: move the cursor to the beginning of the BytesIO buffer before reading it
                    zip_buffer.seek(0)

                    # Specify the region you want to visualize
                    min_start = df['Start Pos'].min()
                    max_end = df['End Pos'].max()
                    chromosome = df['Chr'].mode()[0]  # Assumes most common chromosome is the target
                    region = f"{chromosome}:{min_start}-{max_end}"

                    # Generate the pyGenomeTracks plot
                    gtracks_command = f"gtracks {region} {bed_file_path} {plot_image_path}"
                    subprocess.run(gtracks_command, shell=True)
                    st.image(plot_image_path)

                    # Display the download button for the ZIP file
                    st.download_button(
                        label="Download GenBank, BED, CSV files as ZIP",
                        data=zip_buffer.getvalue(),
                        file_name=f"{gene_symbol}_files.zip",
                        mime="application/zip"
                    )

    elif target_selection == 'off-target':
        ENTRY_METHODS = dict(
            manual='Manual entry of target sequence',
            txt="txt file upload"
        )
        if __name__ == '__main__':
            # app initialization for Cas9 off-target
            if 'target_sequence' not in st.session_state:
                st.session_state.target_sequence = None
            if 'input_error' not in st.session_state:
                st.session_state.input_error = None
            if 'off_target_results' not in st.session_state:
                st.session_state.off_target_results = None

            # target sequence entry
            st.selectbox(
                label='How would you like to provide target sequences?',
                options=ENTRY_METHODS.values(),
                key='entry_method',
                disabled=st.session_state.target_sequence is not None
            )
            if st.session_state.entry_method == ENTRY_METHODS['manual']:
                st.text_input(
                    label='Enter on/off sequences:',
                    key='manual_entry',
                    placeholder='Enter on/off sequences like:GGGTGGGGGGAGTTTGCTCCAGG,AGGTGGGGTGA_TTTGCTCCAGG',
                    disabled=st.session_state.target_sequence is not None
                )
            elif st.session_state.entry_method == ENTRY_METHODS['txt']:
                st.file_uploader(
                    label='Upload a txt file:',
                    key='txt_entry',
                    disabled=st.session_state.target_sequence is not None
                )

            # prediction button
            if st.button('Predict off-target'):
                if st.session_state.entry_method == ENTRY_METHODS['manual']:
                    user_input = st.session_state.manual_entry
                    if user_input:  # Check if user_input is not empty
                        predictions = cas9off.process_input_and_predict(user_input, input_type='manual')
                elif st.session_state.entry_method == ENTRY_METHODS['txt']:
                    uploaded_file = st.session_state.txt_entry
                    if uploaded_file is not None:
                        # Read the uploaded file content
                        file_content = uploaded_file.getvalue().decode("utf-8")
                        predictions = cas9off.process_input_and_predict(file_content, input_type='manual')

                st.session_state.off_target_results = predictions
            else:
                predictions = None
            progress = st.empty()

            # input error display
            error = st.empty()
            if st.session_state.input_error is not None:
                error.error(st.session_state.input_error, icon="🚨")
            else:
                error.empty()

            # off-target results display
            off_target_results = st.empty()
            if st.session_state.off_target_results is not None:
                with off_target_results.container():
                    if len(st.session_state.off_target_results) > 0:
                        st.write('Off-target predictions:', st.session_state.off_target_results)
                        st.download_button(
                            label='Download off-target predictions',
                            data=convert_df(st.session_state.off_target_results),
                            file_name='off_target_results.csv',
                            mime='text/csv'
                        )
                    else:
                        st.write('No significant off-target effects detected!')
            else:
                off_target_results.empty()

            # running the CRISPR-Net model for off-target predictions
            if st.session_state.target_sequence is not None:
                st.session_state.off_target_results = cas9off.predict_off_targets(
                    target_sequence=st.session_state.target_sequence,
                    status_update_fn=progress_update
                )
                st.session_state.target_sequence = None
                st.experimental_rerun()

elif selected_model == 'Cas12':
    # Gene symbol entry with autocomplete-like feature
    gene_symbol = st.selectbox('Enter a Gene Symbol:', [''] + gene_symbol_list, key='gene_symbol',
                               format_func=lambda x: x if x else "")

    # Initialize the current_gene_symbol in the session state if it doesn't exist
    if 'current_gene_symbol' not in st.session_state:
        st.session_state['current_gene_symbol'] = ""

    # Prediction button
    predict_button = st.button('Predict on-target')

    # Function to clean up old files
    def clean_up_old_files(gene_symbol):
        genbank_file_path = f"{gene_symbol}_crispr_targets.gb"
        bed_file_path = f"{gene_symbol}_crispr_targets.bed"
        csv_file_path = f"{gene_symbol}_crispr_predictions.csv"
        for path in [genbank_file_path, bed_file_path, csv_file_path]:
            if os.path.exists(path):
                os.remove(path)

    # Clean up files if a new gene symbol is entered
    if st.session_state['current_gene_symbol'] and gene_symbol != st.session_state['current_gene_symbol']:
        clean_up_old_files(st.session_state['current_gene_symbol'])

    # Process predictions
    if predict_button and gene_symbol:
        with st.spinner('Predicting... Please wait'):
            predictions, gene_sequence, exons = cas12lstm.process_gene(gene_symbol, cas9att_path)

            sorted_predictions = sorted(predictions)[:10]
            st.session_state['on_target_results'] = sorted_predictions
            st.session_state['gene_sequence'] = gene_sequence  # Save gene sequence in session state
            st.session_state['exons'] = exons  # Store exon data

        # Notify the user once the process is completed successfully.
        st.success('Prediction completed!')
        st.session_state['prediction_made'] = True

        if 'on_target_results' in st.session_state and st.session_state['on_target_results']:
            ensembl_id = gene_annotations.get(gene_symbol, 'Unknown')  # Get Ensembl ID or default to 'Unknown'
            col1, col2, col3 = st.columns(3)
            with col1:
                st.markdown("**Genome**")
                st.markdown("Homo sapiens")
            with col2:
                st.markdown("**Gene**")
                st.markdown(f"{gene_symbol} : {ensembl_id} (primary)")
            with col3:
                st.markdown("**Nuclease**")
                st.markdown("SpCas9")
            # Include "Target" in the DataFrame's columns
            try:
                df = pd.DataFrame(st.session_state['on_target_results'],
                                  columns=["Chr", "Start Pos", "End Pos", "Strand", "Transcript", "Exon", "Target",
                                           "gRNA", "Prediction"])
                st.dataframe(df)
            except ValueError as e:
                st.error(f"DataFrame creation error: {e}")
                # Optionally print or log the problematic data for debugging:
                print(st.session_state['on_target_results'])

            # Initialize Plotly figure
            fig = go.Figure()

            EXON_BASE = 0  # Base position for exons and CDS on the Y axis
            EXON_HEIGHT = 0.02  # How 'tall' the exon markers should appear

            # Plot Exons as small markers on the X-axis
            for exon in st.session_state['exons']:
                exon_start, exon_end = exon['start'], exon['end']
                fig.add_trace(go.Bar(
                    x=[(exon_start + exon_end) / 2],
                    y=[EXON_HEIGHT],
                    width=[exon_end - exon_start],
                    base=EXON_BASE,
                    marker_color='rgba(128, 0, 128, 0.5)',
                    name='Exon'
                ))

            VERTICAL_GAP = 0.2  # Gap between different ranks

            # Define max and min Y values based on strand and rank
            MAX_STRAND_Y = 0.1  # Maximum Y value for positive strand results
            MIN_STRAND_Y = -0.1  # Minimum Y value for negative strand results

            # Iterate over top 5 sorted predictions to create the plot
            for i, prediction in enumerate(st.session_state['on_target_results'][:5], start=1):  # Only top 5
                chrom, start, end, strand, transcript, exon, target, gRNA, prediction_score = prediction
                midpoint = (int(start) + int(end)) / 2

                # Vertical position based on rank, modified by strand
                y_value = (MAX_STRAND_Y - (i - 1) * VERTICAL_GAP) if strand == '1' or strand == '+' else (
                        MIN_STRAND_Y + (i - 1) * VERTICAL_GAP)

                fig.add_trace(go.Scatter(
                    x=[midpoint],
                    y=[y_value],
                    mode='markers+text',
                    marker=dict(symbol='triangle-up' if strand == '1' or strand == '+' else 'triangle-down',
                                size=12),
                    text=f"Rank: {i}",  # Text label
                    hoverinfo='text',
                    hovertext=f"Rank: {i}<br>Chromosome: {chrom}<br>Target Sequence: {target}<br>gRNA: {gRNA}<br>Start: {start}<br>End: {end}<br>Strand: {'+' if strand == '1' or strand == '+' else '-'}<br>Transcript: {transcript}<br>Prediction: {prediction_score:.4f}",
                ))

            # Update layout for clarity and interaction
            fig.update_layout(
                title='Top 5 gRNA Sequences by Prediction Score',
                xaxis_title='Genomic Position',
                yaxis_title='Strand',
                yaxis=dict(tickvals=[MAX_STRAND_Y, MIN_STRAND_Y], ticktext=['+', '-']),
                showlegend=False,
                hovermode='x unified',
            )

            # Display the plot
            st.plotly_chart(fig)

            if 'gene_sequence' in st.session_state and st.session_state['gene_sequence']:
                gene_symbol = st.session_state['current_gene_symbol']
                gene_sequence = st.session_state['gene_sequence']

                # Define file paths
                genbank_file_path = f"{gene_symbol}_crispr_targets.gb"
                bed_file_path = f"{gene_symbol}_crispr_targets.bed"
                csv_file_path = f"{gene_symbol}_crispr_predictions.csv"
                plot_image_path = f"{gene_symbol}_gtracks_plot.png"

                # Generate files
                cas12lstm.generate_genbank_file_from_df(df, gene_sequence, gene_symbol, genbank_file_path)
                cas12lstm.create_bed_file_from_df(df, bed_file_path)
                cas12lstm.create_csv_from_df(df, csv_file_path)

                # Prepare an in-memory buffer for the ZIP file
                zip_buffer = io.BytesIO()
                with zipfile.ZipFile(zip_buffer, 'w', zipfile.ZIP_DEFLATED) as zip_file:
                    # For each file, add it to the ZIP file
                    zip_file.write(genbank_file_path)
                    zip_file.write(bed_file_path)
                    zip_file.write(csv_file_path)

                # Important: move the cursor to the beginning of the BytesIO buffer before reading it
                zip_buffer.seek(0)

                # Specify the region you want to visualize
                min_start = df['Start Pos'].min()
                max_end = df['End Pos'].max()
                chromosome = df['Chr'].mode()[0]  # Assumes most common chromosome is the target
                region = f"{chromosome}:{min_start}-{max_end}"

                # Generate the pyGenomeTracks plot
                gtracks_command = f"gtracks {region} {bed_file_path} {plot_image_path}"
                subprocess.run(gtracks_command, shell=True)
                st.image(plot_image_path)

                # Display the download button for the ZIP file
                st.download_button(
                    label="Download GenBank, BED, CSV files as ZIP",
                    data=zip_buffer.getvalue(),
                    file_name=f"{gene_symbol}_files.zip",
                    mime="application/zip"
                )

elif selected_model == 'Cas13d':
        ENTRY_METHODS = dict(
        manual='Manual entry of single transcript',
        fasta="Fasta file upload (supports multiple transcripts if they have unique ID's)"
        )

        if __name__ == '__main__':
            # app initialization
            if 'mode' not in st.session_state:
                st.session_state.mode = tiger.RUN_MODES['all']
                st.session_state.disable_off_target_checkbox = True
            if 'entry_method' not in st.session_state:
                st.session_state.entry_method = ENTRY_METHODS['manual']
            if 'transcripts' not in st.session_state:
                st.session_state.transcripts = None
            if 'input_error' not in st.session_state:
                st.session_state.input_error = None
            if 'on_target' not in st.session_state:
                st.session_state.on_target = None
            if 'titration' not in st.session_state:
                st.session_state.titration = None
            if 'off_target' not in st.session_state:
                st.session_state.off_target = None

            # mode selection
            col1, col2 = st.columns([0.65, 0.35])
            with col1:
                st.radio(
                    label='What do you want to predict?',
                    options=tuple(tiger.RUN_MODES.values()),
                    key='mode',
                    on_change=mode_change_callback,
                    disabled=st.session_state.transcripts is not None,
                )
            with col2:
                st.checkbox(
                    label='Find off-target effects (slow)',
                    key='check_off_targets',
                    disabled=st.session_state.disable_off_target_checkbox or st.session_state.transcripts is not None
                )

            # transcript entry
            st.selectbox(
                label='How would you like to provide transcript(s) of interest?',
                options=ENTRY_METHODS.values(),
                key='entry_method',
                disabled=st.session_state.transcripts is not None
            )
            if st.session_state.entry_method == ENTRY_METHODS['manual']:
                st.text_input(
                    label='Enter a target transcript:',
                    key='manual_entry',
                    placeholder='Upper or lower case',
                    disabled=st.session_state.transcripts is not None
                )
            elif st.session_state.entry_method == ENTRY_METHODS['fasta']:
                st.file_uploader(
                    label='Upload a fasta file:',
                    key='fasta_entry',
                    disabled=st.session_state.transcripts is not None
                )

            # let's go!
            st.button(label='Get predictions!', on_click=initiate_run, disabled=st.session_state.transcripts is not None)
            progress = st.empty()

            # input error
            error = st.empty()
            if st.session_state.input_error is not None:
                error.error(st.session_state.input_error, icon="🚨")
            else:
                error.empty()

            # on-target results
            on_target_results = st.empty()
            if st.session_state.on_target is not None:
                with on_target_results.container():
                    st.write('On-target predictions:', st.session_state.on_target)
                    st.download_button(
                        label='Download on-target predictions',
                        data=convert_df(st.session_state.on_target),
                        file_name='on_target.csv',
                        mime='text/csv'
                    )
            else:
                on_target_results.empty()

            # titration results
            titration_results = st.empty()
            if st.session_state.titration is not None:
                with titration_results.container():
                    st.write('Titration predictions:', st.session_state.titration)
                    st.download_button(
                        label='Download titration predictions',
                        data=convert_df(st.session_state.titration),
                        file_name='titration.csv',
                        mime='text/csv'
                    )
            else:
                titration_results.empty()

            # off-target results
            off_target_results = st.empty()
            if st.session_state.off_target is not None:
                with off_target_results.container():
                    if len(st.session_state.off_target) > 0:
                        st.write('Off-target predictions:', st.session_state.off_target)
                        st.download_button(
                            label='Download off-target predictions',
                            data=convert_df(st.session_state.off_target),
                            file_name='off_target.csv',
                            mime='text/csv'
                        )
                    else:
                        st.write('We did not find any off-target effects!')
            else:
                off_target_results.empty()

            # keep trying to run model until we clear inputs (streamlit UI changes can induce race-condition reruns)
            if st.session_state.transcripts is not None:
                st.session_state.on_target, st.session_state.titration, st.session_state.off_target = tiger.tiger_exhibit(
                    transcripts=st.session_state.transcripts,
                    mode={v: k for k, v in tiger.RUN_MODES.items()}[st.session_state.mode],
                    check_off_targets=st.session_state.check_off_targets,
                    status_update_fn=progress_update
                )
                st.session_state.transcripts = None
                st.experimental_rerun()