#!/bin/bash
set -euo pipefail

BASEDIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
JOBDIR="${BASEDIR}/scripts/jobs"
RUNSCRIPTDIR="${BASEDIR}/scripts/run"
WORKDIR="${BASEDIR}/tmp"
STDDIR="${BASEDIR}/stdout"
LOGDIR="${BASEDIR}/logs"
RESULTDIR="${BASEDIR}/results"

HYPERFINE_BIN="${BASEDIR}/../bin/hyperfine"
F5C_BIN="${BASEDIR}/../bin/f5c"
REMORA_BIN="${BASEDIR}/../bin/remora"
FISHNET_BIN="${BASEDIR}/../bin/fishnet"

VENV_ACTIVATE="${BASEDIR}/../venv/bin/activate"

# Input data paths
KMERTABLEFILE="${BASEDIR}/../01_data/dna_giab/levels.txt"
REFFILE="${BASEDIR}/../01_data/dna_giab/ref.fa"
SUBSETDIR="${BASEDIR}/../01_data/dna_giab/subsets"

# Load variables (slurm memory, cpus and email)
source "${BASEDIR}/.env"

function generate_job {
    local tool="$1"
    local read_len="$2"
    local n_reads="$3"
    local alignment_type="$4"
    local n_threads="$5"

    local job_name="${tool}_${read_len}_${n_reads}_${alignment_type}_${n_threads}"
    local job_file="${JOBDIR}/${job_name}.sh"

    local subset_dir="${SUBSETDIR}/${read_len}_${n_reads}"
    local bam_path="${subset_dir}/subset.bam"
    local pod5_path="${subset_dir}/subset.pod5"
    local fastq_path="${subset_dir}/subset.fastq"
    local blow5_path="${subset_dir}/subset.blow5"

    local cmd=""
    case "$tool" in 
        remora)
            cmd="${RUNSCRIPTDIR}/remora_${alignment_type}.sh ${bam_path} ${pod5_path} ${KMERTABLEFILE} ${WORKDIR}/outfile.tsv ${REMORA_BIN}"
            ;;
        fishnet)
            cmd="${RUNSCRIPTDIR}/fishnet_${alignment_type}.sh ${bam_path} ${pod5_path} ${KMERTABLEFILE} ${WORKDIR}/outfile.parquet ${n_threads} ${FISHNET_BIN}"
            ;;
        uncalled4)
            cmd="${RUNSCRIPTDIR}/uncalled4_${alignment_type}.sh ${bam_path} ${pod5_path} ${KMERTABLEFILE} ${WORKDIR}/outfile.tsv ${REFFILE} ${n_threads}"
            ;;
        f5c)
            if [[ "$alignment_type" == "reference" ]]; then
                cmd="${RUNSCRIPTDIR}/f5c_${alignment_type}.sh ${fastq_path} ${bam_path} ${blow5_path} ${REFFILE} ${WORKDIR}/outfile.tsv ${n_threads} ${F5C_BIN}"
            else
                cmd="${RUNSCRIPTDIR}/f5c_${alignment_type}.sh ${fastq_path} ${blow5_path} ${WORKDIR}/outfile.tsv ${n_threads} ${F5C_BIN}"
            fi
            ;;
    esac

    cat > "$job_file" <<EOF
#!/bin/bash
#SBATCH --dependency=singleton
#SBATCH --job-name=FishnetBenchmark
#SBATCH --nodes=1
#SBATCH --ntasks=1
#SBATCH --output=${STDDIR}/${job_name}.stdOut
#SBATCH --error=${STDDIR}/${job_name}.stdErr
#SBATCH --cpus-per-task=${SLURM_CPU}
#SBATCH --mem=${SLURM_MEM}
#SBATCH --mail-type=FAIL
#SBATCH --mail-user=${SLURM_EMAIL}

set -euo pipefail

source ${VENV_ACTIVATE}

TIMESTAMP=\$(date +"%Y%m%d_%H%M%S")
LOGFILE="${LOGDIR}/${job_name}_\${TIMESTAMP}.log"

log() {
    echo "[\$(date '+%Y-%m-%d %H:%M:%S')] \$*" | tee -a "\$LOGFILE"
}

log "=== Starting Benchmark: ${job_name} ==="
log "Tool: ${tool}"
log "Read length: ${read_len}"
log "N reads: ${n_reads}"
log "Alignment type: ${alignment_type}"
log "Threads: ${n_threads}"
log "Command: ${cmd}"

# Clean workspace
rm -f "${WORKDIR}"/* 2>/dev/null || true

JSON_OUTPUT="${RESULTDIR}/${job_name}_\${TIMESTAMP}.json"

set +e
"$HYPERFINE_BIN" "${cmd}" \\
    --runs 3 \\
    --warmup 1 \\
    --prepare "rm -f ${WORKDIR}/* 2>/dev/null || true" \\
    --command-name "${job_name}" \\
    --export-json "\$JSON_OUTPUT" \\
    2>&1 | tee -a "\$LOGFILE"

EXIT_CODE=\$?
set -e

if [[ \$EXIT_CODE -eq 0 ]]; then
    log "SUCCESS: Benchmark completed"
    log "Results: \$JSON_OUTPUT"
    exit 0
else
    log "ERROR: Benchmark failed with exit code \$EXIT_CODE"
    exit \$EXIT_CODE
fi
EOF

    chmod +x "$job_file"
    echo "Generated: ${job_file}"
}


for read_len in short medium long; do
    for n_reads in 100 1000 10000 100000; do
        for alignment_type in reference query; do

            generate_job "remora" "$read_len" "$n_reads" "$alignment_type" 1

            for n_threads in 1 8 16 24; do
                for tool in fishnet f5c uncalled4; do
                    generate_job "$tool" "$read_len" "$n_reads" "$alignment_type" "$n_threads"
                done
            done
        done
    done
done

echo ""
echo "Job scripts generated in: $JOBDIR"
echo ""