|
|
|
|
|
import argparse |
|
import os |
|
import logging |
|
|
|
from utils import make_output_dir,write_2_log,log_time |
|
import map_2_HBDxBase as map_2_HBDxBase |
|
import annotate_from_mapping as annotate_from_mapping |
|
|
|
|
|
log = logging.getLogger(__name__) |
|
|
|
|
|
|
|
|
|
|
|
parser = argparse.ArgumentParser() |
|
parser.add_argument('--five_prime_adapter', type=str, default='GTTCAGAGTTCTACAGTCCGACGATC') |
|
parser.add_argument('--fasta_file', type=str, help="Required to provide: --fasta_file sequences_to_be_annotated.fa") |
|
args = parser.parse_args() |
|
if not args.fasta_file: |
|
parser.print_help() |
|
exit() |
|
five_prime_adapter = args.five_prime_adapter |
|
sequence_file = args.fasta_file |
|
|
|
|
|
@log_time(log) |
|
def main(five_prime_adapter, sequence_file): |
|
"""Executes 'make_anno'. |
|
1. Maps input sequences to HBDxBase, the human genome, and a collection of viral and bacterial genomes. |
|
2. Extracts information from mapping files. |
|
3. Generates annotation columns and final annotation dataframe. |
|
|
|
Uses: |
|
|
|
- sequence_file |
|
- five_prime_adapter |
|
|
|
""" |
|
output_dir = make_output_dir(sequence_file) |
|
os.chdir(output_dir) |
|
|
|
log_folder = "log" |
|
if not os.path.exists(log_folder): |
|
os.makedirs(log_folder) |
|
write_2_log(f"{log_folder}/make_anno.log") |
|
|
|
|
|
with open(f"{log_folder}/make_anno.log", "a") as ofile: |
|
ofile.write(f"Sequence file: {sequence_file}\n") |
|
|
|
map_2_HBDxBase.main("../../data/" + sequence_file) |
|
annotate_from_mapping.main(five_prime_adapter) |
|
|
|
|
|
main(five_prime_adapter, sequence_file) |
|
|
|
|