#!/bin/bash
set -euo pipefail

BASEDIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"

FISHNET="${BASEDIR}/../bin/fishnet"
SCRIPTDIR="${BASEDIR}/scripts"
ALIGNDIR="${BASEDIR}/alignments"
REFORMATDIR="${BASEDIR}/reformatted"
PICKLEDIR="${BASEDIR}/pickle_files"

POD5DIR="${BASEDIR}/../01_data/rna_m1a/pod5"

source "${BASEDIR}/../venv/bin/activate"
source "${BASEDIR}/.env"

function interpolate {
    local alignment_in=$1
    local outfile=$2
    local ref_name=$3
    local position=$4
    local distance_from_position=$5
    local pod5_file=$6

    local reference_region="${ref_name}:${position}-${distance_from_position}"

    echo "Calculating interpolationg for ${alignment_in} at ${reference_region}. Writing to ${outfile}..."

    $FISHNET reformat \
        --alignment "$alignment_in" \
        --out "$outfile" \
        --pod5 "$pod5_file" \
        --rna \
        --alignment-type "reference" \
        --positions-of-interest "$reference_region" \
        --strategy "interpolate" \
        --threads "$THREADS" \
        --force-overwrite \
        --output-shape "exploded"
}


function run_umap {
    local interp_path_a=$1
    local interp_path_b=$2
    local sample=$3
    local pickle_out=$4

    echo "Running UMAP for '${interp_path_a}' and '${interp_path_b}'..."

    python "${SCRIPTDIR}/prep_umap_plotting.py" \
        "$interp_path_a" \
        "$interp_path_b" \
        "$sample" \
        "$pickle_out"
}


for sample in "${SAMPLES[@]}"; do

    # Previously produced alignments
    aligned_file_mod="${ALIGNDIR}/mod_${sample}.parquet"
    aligned_file_unmod="${ALIGNDIR}/unmod_${sample}.parquet"

    for distance in 0 1 2 4; do

        # Fishnet align (with stats interpolate) output file
        interp_file_mod="${REFORMATDIR}/mod_${sample}_interp_${distance}.parquet"
        interp_file_unmod="${REFORMATDIR}/unmod_${sample}_interp_${distance}.parquet"

        # UMAP results in pickle format
        pickle_file="${PICKLEDIR}/${sample}_umap_${distance}.pkl"

        interpolate \
            "${ALIGNDIR}/mod_${sample}.parquet" \
            "$interp_file_mod" \
            "${sample^^}_mod" \
            "$MOD_SITE" \
            "$distance" \
            "${POD5DIR}/mod_${sample}.pod5"

        interpolate \
            "${ALIGNDIR}/unmod_${sample}.parquet" \
            "$interp_file_unmod" \
            "${sample^^}_unmod" \
            "$UNMOD_SITE" \
            "$distance" \
            "${POD5DIR}/unmod_${sample}.pod5"

        run_umap \
            "$interp_file_mod" \
            "$interp_file_unmod" \
            "$sample" \
            "$pickle_file"
    done
done