import os
import sys
from collections import OrderedDict
import argparse

# library for easier reading of penncnv entries
from penncnv import PenncnvEntry

def main(userargs):

    # get user args
    parser = argparse.ArgumentParser(prog='filter_penncnv',
                                     description='using callset file in output format produced by PennCNV, ' + \
                                                 'produce an output that meets user specified requirements ' + \
                                                 'regarding size and number of spanning snps. User has option ' + \
                                                 'to write output in BED format.')
    parser.add_argument('--length-min', action='store', type=int,
                        default=0,
                        help="minimum allowed size for CNV calls, default %(default)s.")
    parser.add_argument('--length-max', action='store', type=int,
                        default=float("inf"),
                        help="maximum allowed size for CNV calls, default %(default)s.")
    parser.add_argument("--numsnp-min", action='store', type=int,
                        default=0,
                        help="minimum number of snps required for CNV calls, default %(default)s.")
    parser.add_argument("--numsnp-max", action='store', type=int,
                        default=float("inf"),
                        help="maximum number of snps required for CNV calls, default %(default)s.")
    parser.add_argument("--chr-include", action='store', type=str,
                        default="1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,X,Y,MT",
                        help="chromosomes to subset on, default %(default)s.")
    parser.add_argument("--output-as-bed", action='store_true', default=False,
                        help="write output in BED format, default %(default)s")
    parser.add_argument('--iid-prefix-rm', action='store', type=str, default=None,
                        help='prefix to remove from presumed sample filename, ' + \
                             'to represent sample iid')
    parser.add_argument('--iid-postfix-rm', action='store', type=str, default=None,
                        help='postfix to remove from presumed sample filename, ' + \
                             'to represent sample iid')
    parser.add_argument("in_penncnv_txt", type=str,
                        help="input penncnv callset text file")
    args = parser.parse_args(userargs)

    # init filehandle to penncnv file
    if args.in_penncnv_txt in ("stdin","-"):
        in_fh = sys.stdin
    else:
        in_fh = open(args.in_penncnv_txt, "r")

    # define set with chromosomes to subset on 
    chr_include = set(args.chr_include.split(","))

    # for each line in penncnv file ..
    for penncnv_line in in_fh:

        # init as object
        entry = PenncnvEntry(penncnv_line)

        # if defined by user, remove sample id prefix and postfix
        if args.iid_prefix_rm != None:
            entry.iid = entry.iid.replace(args.iid_prefix_rm, '')
        if args.iid_postfix_rm != None:
            entry.iid = entry.iid.replace(args.iid_postfix_rm, '')

        # if entry doesn't map to chromosomes of interest, then skip
        if entry.chrom not in chr_include:
            continue

        # if entry doesn't meet size criteria, then skip 
        if entry.length < args.length_min or entry.length > args.length_max:
            continue 

        # if entry doesn't meet numsnp criteria, then skip
        if entry.numsnp < args.numsnp_min or entry.numsnp > args.numsnp_max: 
            continue

        # if entry survives filter, then print
        if args.output_as_bed == False:
            out_line = entry.line
        else:
            out_line =  entry.form_bed_line()
        print(out_line)

    # close filehandle
    in_fh.close()

    return


if __name__ == "__main__":
    userargs = sys.argv[1:]
    main(userargs)
