#!/usr/bin/env python3

from collections import OrderedDict
import argparse

def main():

    # parse user args
    parser = argparse.ArgumentParser()
    parser.add_argument("--no-chr", action="store_true", default=False,
                        help="Remove 'chr' from output chromosome name " + \
                             "[default %(default)s]")
    parser.add_argument("--telomere-size", action="store", type=int,
                        default=500000,
                        help="extend telomere edges by this many bases " + \
                             "[default %(default)s]")
    parser.add_argument("--telomere-extend", action="store", type=int,
                        default=0,
                        help="extend telomere edges by this many bases " + \
                             "[default %(default)s]")
    parser.add_argument("--centromere-extend", action="store", type=int,
                        default=500000,
                        help="extend centromere edges by this many bases " + \
                             "[default %(default)s]")
    parser.add_argument("ref_genome", action="store", type=str,
                        choices=["hg18","hg19","hg38"],
                        help="reference genome to extract coordinates for")
    args = parser.parse_args()

    # get centromere loci, chrom sizes
    centromere_loci = get_centromere_loci(ref_genome=args.ref_genome)
    chrom_sizes = get_chrom_sizes(ref_genome=args.ref_genome)

    # init list
    out_list = []

    # for each defined chromosome in centromere loci..
    for chrom_i in centromere_loci.keys():
        # get centromere start and end
        centromere_start_i = centromere_loci[chrom_i][0] - 1
        centromere_end_i = centromere_loci[chrom_i][0]
        # get coords for first telomere, at beginning of chromosome
        telomere1_start_i = 0
        telomere1_end_i = args.telomere_size + 1 
        telomere2_end_i = chrom_sizes[chrom_i]
        telomere2_start_i = telomere2_end_i - args.telomere_size - 1
        # extend centromere and telomere edges by user-defined amount
        centromere_start_i = centromere_start_i - args.centromere_extend
        centromere_end_i = centromere_end_i + args.centromere_extend
        telomere1_end_i = telomere1_end_i + args.telomere_extend
        telomere2_end_i = telomere2_end_i - args.telomere_extend
        # remove chr if desired by user
        if args.no_chr == True: chrom_i = chrom_i.replace("chr", "")
        # print coordinates to output in BED format in presumed order
        print(chrom_i, telomere1_start_i, telomere1_end_i, sep="\t")
        print(chrom_i, centromere_start_i, centromere_end_i, sep="\t")
        print(chrom_i, telomere2_start_i, telomere2_end_i, sep="\t")

    return

def get_centromere_loci(ref_genome="hg19"):
    if ref_genome == "hg19":
        # from  : 
        # http://penncnv.openbioinformatics.org/en/latest/misc/faq/
        # 12. How to remove CNV calls in centromeric and telomeric regions?
        centromere_loci = OrderedDict({
                                        "chr1":[121500000,128900000],
                                        "chr2":[90500000,96800000],
                                        "chr3":[87900000,93900000],
                                        "chr4":[48200000,52700000],
                                        "chr5":[46100000,50700000],
                                        "chr6":[58700000,63300000],
                                        "chr7":[58000000,61700000],
                                        "chr8":[43100000,48100000],
                                        "chr9":[47300000,50700000],
                                        "chr10":[38000000,42300000],
                                        "chr11":[51600000,55700000],
                                        "chr12":[33300000,38200000],
                                        "chr13":[16300000,19500000],
                                        "chr14":[16100000,19100000],
                                        "chr15":[15800000,20700000],
                                        "chr16":[34600000,38600000],
                                        "chr17":[22200000,25800000],
                                        "chr18":[15400000,19000000],
                                        "chr19":[24400000,28600000],
                                        "chr20":[25600000,29400000],
                                        "chr21":[10900000,14300000],
                                        "chr22":[12200000,17900000],
                                        "chrX":[58100000,63000000],
                                        "chrY":[11600000,13400000]
                                     })
    elif ref_genome == "hg18":
        # from  :
        # http://penncnv.openbioinformatics.org/en/latest/misc/faq/
        # 12. How to remove CNV calls in centromeric and telomeric regions?
        centromere_loci = OrderedDict({
                                        "chr1":[121100001,128000000],
                                        "chr2":[91000001,95700000],
                                        "chr3":[89400001,93200000],
                                        "chr4":[48700001,52400000],
                                        "chr5":[45800001,50500000],
                                        "chr6":[58400001,63400000],
                                        "chr7":[57400001,61100000],
                                        "chr8":[43200001,48100000],
                                        "chr9":[46700001,60300000],
                                        "chr10":[38800001,42100000],
                                        "chr11":[51400001,56400000],
                                        "chr12":[33200001,36500000],
                                        "chr13":[13500001,18400000],
                                        "chr14":[13600001,19100000],
                                        "chr15":[14100001,18400000],
                                        "chr16":[34400001,40700000],
                                        "chr17":[22100001,23200000],
                                        "chr18":[15400001,17300000],
                                        "chr19":[26700001,30200000],
                                        "chr20":[25700001,28400000],
                                        "chr21":[10000001,13200000],
                                        "chr22":[9600001,16300000],
                                        "chrX":[56600001,65000000],
                                        "chrY":[11200001,12500000]
                                     })
    elif ref_genome == "hg38":
        # base coordinates obtained from the UCSC genome tables browser :
        # https://genome.ucsc.edu/cgi-bin/hgTables
        # clade:Mammal, genome:Human, assembly:hg38,
        # group:Mapping and Sequencing, track:Chromosome Band (Ideogram),
        # table:cytoBandIdeo .
        # Centromere loci were defined as rows in the table with 
        # column 5 (gieStain) set as 'acen' .
        # coordinates for all rows with gieStain set as 'acen' were
        # merged using command 'bedtools merge -i coords.acen.bed'
        centromere_loci = OrderedDict({
                                        "chrY":[10300001,10600000],
                                        "chrX":[58100001,63800000],
                                        "chr1":[121700001,125100000],
                                        "chr2":[91800001,96000000],
                                        "chr3":[87800001,94000000],
                                        "chr4":[48200001,51800000],
                                        "chr5":[46100001,51400000],
                                        "chr6":[58500001,62600000],
                                        "chr7":[58100001,62100000],
                                        "chr8":[43200001,47200000],
                                        "chr9":[42200001,45500000],
                                        "chr10":[38000001,41600000],
                                        "chr11":[51000001,55800000],
                                        "chr12":[33200001,37800000],
                                        "chr13":[16500001,18900000],
                                        "chr14":[16100001,18200000],
                                        "chr15":[17500001,20500000],
                                        "chr16":[35300001,38400000],
                                        "chr17":[22700001,27400000],
                                        "chr18":[15400001,21500000],
                                        "chr19":[24200001,28100000],
                                        "chr20":[25700001,30400000],
                                        "chr21":[10900001,13000000],
                                        "chr22":[13700001,17400000]
                                     })
    else:
        pass
    return centromere_loci

def get_chrom_sizes(ref_genome="hg19"):
    # from :
    # https://hgdownload.cse.ucsc.edu/goldenpath/hg19/bigZips/hg19.chrom.sizes
    if ref_genome == "hg19":
        chrom_sizes = OrderedDict({
                                    "chrX":155270560,
                                    "chrY":59373566,
                                    "chr1":249250621,
                                    "chr2":243199373,
                                    "chr3":198022430,
                                    "chr4":191154276,
                                    "chr5":180915260,
                                    "chr6":171115067,
                                    "chr7":159138663,
                                    "chr8":146364022,
                                    "chr9":141213431,
                                    "chr10":135534747,
                                    "chr11":135006516,
                                    "chr12":133851895,
                                    "chr13":115169878,
                                    "chr14":107349540,
                                    "chr15":102531392,
                                    "chr16":90354753,
                                    "chr17":81195210,
                                    "chr18":78077248,
                                    "chr19":59128983,
                                    "chr20":63025520,
                                    "chr21":48129895,
                                    "chr22":51304566
                                 })
    elif ref_genome == "hg18":
        # from :
        # http://hgdownload.cse.ucsc.edu/goldenpath/hg18/bigZips/hg18.chrom.sizes
        chrom_sizes = OrderedDict({
                                    "chrX":154913754,
                                    "chrY":57772954,
                                    "chr1":247249719,
                                    "chr2":242951149,
                                    "chr3":199501827,
                                    "chr4":191273063,
                                    "chr5":180857866,
                                    "chr6":170899992,
                                    "chr7":158821424,
                                    "chr8":146274826,
                                    "chr9":140273252,
                                    "chr10":135374737,
                                    "chr11":134452384,
                                    "chr12":132349534,
                                    "chr13":114142980,
                                    "chr14":106368585,
                                    "chr15":100338915,
                                    "chr16":88827254,
                                    "chr17":78774742,
                                    "chr18":76117153,
                                    "chr19":63811651,
                                    "chr20":62435964,
                                    "chr21":46944323,
                                    "chr22":49691432
                                 })
    elif ref_genome == "hg38":
        # from :
        # http://hgdownload.cse.ucsc.edu/goldenpath/hg38/bigZips/hg38.chrom.sizes
        chrom_sizes = OrderedDict({
                                    "chrX":156040895,
                                    "chrY":57227415,
                                    "chr1":248956422,
                                    "chr2":242193529,
                                    "chr3":198295559,
                                    "chr4":190214555,
                                    "chr5":181538259,
                                    "chr6":170805979,
                                    "chr7":159345973,
                                    "chr8":145138636,
                                    "chr9":138394717,
                                    "chr10":133797422,
                                    "chr11":135086622,
                                    "chr12":133275309,
                                    "chr13":114364328,
                                    "chr14":107043718,
                                    "chr15":101991189,
                                    "chr16":90338345,
                                    "chr17":83257441,
                                    "chr18":80373285,
                                    "chr19":58617616,
                                    "chr20":64444167,
                                    "chr21":46709983,
                                    "chr22":50818468
                                 })
    else:
        pass
    return chrom_sizes

if __name__ == "__main__":
  main()
