
import sys
import argparse
from collections import OrderedDict

"""
from output of 'bedtools cluster' run on a BED file, form a BED file with one
line per cluster, where start0 is min(start0) of cluster members, and end is
max(end) of cluster members. Interval should represent the merger of all
features that were mapped to the cluster in question.
"""

def main():
    parser = argparse.ArgumentParser(
        description=__doc__,
        formatter_class=argparse.RawDescriptionHelpFormatter)
    parser.add_argument('--clustername-colnum', 
                        type=int, default=7,
                        help='Column number with cluster name ' +
                             "[default %(default)s]")
    parser.add_argument('--chr', help='Restrict to a subset of chromosomes. ' +
                        "Specify as comma-separated list [default %(default)s]")
    parser.add_argument('in_bed_clusters_txt', help='Input BED (supports "stdin").')
    args = parser.parse_args()    

    # open bed file or stdin
    if args.in_bed_clusters_txt == "stdin":
        in_fh = sys.stdin
    else:
        in_fh = open(args.in_bed_clusters_txt,"r")
    
    # init dict for storing cluster
    clusters=OrderedDict()

    # for each line ..
    for line in in_fh:
        pass
        data = line.rstrip().split()
        chrom = data[0]
        start0 = int(data[1])
        end = int(data[2])
        clustername = data[args.clustername_colnum - 1]
        if clustername not in clusters:
            clusters[clustername] = [chrom,float("inf"), -float("inf")]
        if start0 < clusters[clustername][1]:
            clusters[clustername][1] = start0
        if end > clusters[clustername][2]:
            clusters[clustername][2] = end
    
    # close filehandle
    in_fh.close()

    # print each cluster interval to stdout
    for cluster_i in clusters:
        [chrom_i, start0_i, end_i] = clusters[cluster_i] 
        print("\t".join([chrom_i,str(start0_i),str(end_i),cluster_i]))
    return

if __name__ == '__main__':
    main()
