import pandas as pd
import numpy as np
import sys

def parse_alignment(alignment_str):
    try:
        algn = np.array([int(x) if x != "." else np.nan for x in alignment_str.split(',')])
        return algn
    except Exception as e:
        return np.array([])

def main():
    fishnet_path = sys.argv[1]
    remora_path = sys.argv[2]
    out_path = sys.argv[3]

    if len(sys.argv) > 4:
        f5c_path = sys.argv[4]
        uncalled4_path = sys.argv[5]
    else: 
        f5c_path = None
        uncalled4_path = None

    print("Loading fishnet data...")
    dataset = "ref" if "ref" in fishnet_path else "query"
    data_fishnet = pd.read_parquet(fishnet_path)[["read_id", f"{dataset}_to_signal"]]
    data_fishnet = data_fishnet.rename(columns={f"{dataset}_to_signal": "alignment_fishnet"})

    print("Loading remora data...")
    data_remora = pd.read_csv(
        remora_path, 
        sep="\t", 
        header=None, 
        names=["read_id", "alignment_remora"],
        converters={"alignment_remora": parse_alignment}
    )

    print("Merging fishnet and remora data...")
    merged = pd.merge(data_fishnet, data_remora, how="outer", on="read_id")
    del(data_fishnet)
    del(data_remora)

    if f5c_path:
        print("Loading f5c data...")
        data_f5c = pd.read_csv(
            f5c_path,
            sep="\t", header=None, names=["read_id", "alignment_f5c"],
            converters={"alignment_f5c": parse_alignment}
        )

        print("Merging f5c data...")
        merged = pd.merge(merged, data_f5c, how="outer", on="read_id")
        del(data_f5c)

    if uncalled4_path:
        print("Loading uncalled4 data...")
        data_uncalled4 = pd.read_csv(
            uncalled4_path,
            sep="\t", header=None, names=["read_id", "alignment_uncalled4"],
            converters={"alignment_uncalled4": parse_alignment}
        )

        print("Merging uncalled4 data...")
        merged = pd.merge(merged, data_uncalled4, how="outer", on="read_id")
        del(data_uncalled4)

    print("Sorting and adjusting the remora alignment")
    merged = merged.sort_values("read_id").reset_index(drop=True)

    print(f"Writing the merged data to: {out_path}")
    merged.to_parquet(out_path)

if __name__=="__main__":
    main()