#!/bin/bash
set -euo pipefail

BASEDIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"

FISHNET="${BASEDIR}/../bin/fishnet"
SCRIPTDIR="${BASEDIR}/scripts"
ALIGNDIR="${BASEDIR}/alignments"
REFORMATDIR="${BASEDIR}/reformatted"
PICKLEDIR="${BASEDIR}/pickle_files"

DATADIR="${BASEDIR}/../01_data/rna_m1a"
POD5DIR="${DATADIR}/pod5"
BAMDIR="${DATADIR}/bam"

source "${BASEDIR}/../venv/bin/activate"
source "${BASEDIR}/.env"


function base_wise_stats {
    local alignment_in=$1
    local outfile=$2
    local ref_name=$3
    local position=$4
    local distance_from_position=$5
    local pod5_file=$6

    local reference_region="${ref_name}:${position}-${distance_from_position}"

    echo "Calculating base-wise statistics for ${alignment_in}. Writing to ${outfile}..."

    $FISHNET reformat \
        --alignment "$alignment_in" \
        --out "$outfile" \
        --pod5 "$pod5_file" \
        --rna \
        --alignment-type "reference" \
        --positions-of-interest "$reference_region" \
        --strategy "stats" \
        --stats "mean" "std" "dwell" \
        --threads "$THREADS" \
        --force-overwrite \
        --output-shape "melted"
}

function prep_for_base_wise_stats_plotting {
    local fishnet_stats_a=$1
    local fishnet_stats_b=$2
    local sample_name_a=$3
    local sample_name_b=$4
    local features=$5
    local outfile_percentiles=$6
    local outfile_stats=$7

    echo "Preparing '${fishnet_stats_a}' and '${fishnet_stats_b}' for base-wise stats plotting..."

    python "${SCRIPTDIR}/prep_base_wise_stats_plotting.py" \
        "$fishnet_stats_a" \
        "$fishnet_stats_b" \
        "$sample_name_a" \
        "$sample_name_b" \
        "$features" \
        "$outfile_percentiles" \
        "$outfile_stats"
}


for sample in "${SAMPLES[@]}"; do

    # Previously produced alignments
    aligned_file_mod="${ALIGNDIR}/mod_${sample}.parquet"
    aligned_file_unmod="${ALIGNDIR}/unmod_${sample}.parquet"

    # Fishnet align (with stats strategy) output file
    stats_file_mod="${REFORMATDIR}/mod_${sample}_stats.parquet"
    stats_file_unmod="${REFORMATDIR}/unmod_${sample}_stats.parquet"

    # Processed align data in pickle format
    pickle_file_stats="${PICKLEDIR}/${sample}_base_wise_stats_percentiles.pkl"
    pickle_file_percentiles="${PICKLEDIR}/${sample}_base_wise_stats_stats.pkl"

    base_wise_stats \
        "$aligned_file_mod" \
        "$stats_file_mod" \
        "${sample^^}_mod" \
        "$MOD_SITE" \
        "$HALF_WINDOW_SIZE" \
        "${POD5DIR}/mod_${sample}.pod5"

    base_wise_stats \
        "$aligned_file_unmod" \
        "$stats_file_unmod" \
        "${sample^^}_unmod" \
        "$UNMOD_SITE" \
        "$HALF_WINDOW_SIZE" \
        "${POD5DIR}/unmod_${sample}.pod5"

    prep_for_base_wise_stats_plotting \
        "${REFORMATDIR}/mod_${sample}_stats.parquet" \
        "${REFORMATDIR}/unmod_${sample}_stats.parquet" \
        "${sample}_mod" \
        "${sample}_unmod" \
        "mean,std,dwell" \
        "$pickle_file_stats" \
        "$pickle_file_percentiles"

done