# -*- coding: utf-8 -*-
"""
Calculation of acoustic indices using scikit-maad on WINDOWS (ultrasound).
Supplementary information S14-4 of:

Title: A workflow to optimize spatial sampling in ecoacoustic studies

Journal: Landscape Ecology

Authors: Martínez-Arias, V. M.1,3; Paniagua-Villada1,3, C.; Guerrero, M.J.; Daza, J. M.1

Author affiliations at the time the work was conducted: 
1 Grupo Herpetológico de Antioquia GHA, Instituto de Biología, Facultad de Ciencias Exactas y Naturales, Universidad de Antioquia UdeA, Calle 70 No. 52 - 21, Medellín, Colombia.
2 SISTEMIC, Facultad de Ingeniería, Universidad de Antioquia UdeA, Medellín, Colombia
3 Corporación Merceditas,  Calle 3 29A-11, casa 110, Medellín, Colombia


*Corresponding author: Victor M. Martínez Arias
Email: vmanuel.martinez@udea.edu.co


---

NOTES:
- Normalization added to ACI calculation (Fixes 'zeros' result).
- ACIft uses clumped matrix (Fixes identical values across scales).
- Uses Librosa for robust audio loading on Windows.
- Windows Multiprocessing Safety enabled.
- For the ultrasonic range, we computed ACI from amplitude spectrograms rather than power. This decision was motivated by the properties of the ACI formulation and by the need to avoid spurious inflation of variability and false positives in high-frequency, low-energy signals. In this case, ACI calculations followed the amplitude-based formulation with edge-effect correction as proposed by Farina and Li (2022), including temporal clumping and boundary correction, which are better suited for ultrasonic signals characterized by sparse, transient acoustic events.


"""
import os
import numpy as np
import pandas as pd
from pathlib import Path
from scipy import signal
from maad import features, util, sound
import librosa  # Changed from torchaudio for Windows stability
from tqdm import tqdm
import warnings
import concurrent.futures
import gc
import multiprocessing
import traceback

warnings.filterwarnings("ignore")

# ============================================================
# 1. MAIN CONFIGURATION & EXPERIMENTAL DESIGN
# ============================================================

# --- WINDOWS PATH CONFIGURATION ---
BASE_PATH = r"F:\REYZAMURO\DATOS\Pasivo\Ultrasonido"
OUTPUT_DIR = BASE_PATH
OUTPUT_FILE = os.path.join(OUTPUT_DIR, "ZAMURO_INDICES20251209_audible.xlsx")

# --- Parallel Processing ---
## Use this values as reference. Bigger batch size, will demand you higher ram capacity!

WORKERS = 14
BATCH_SIZE = 70

# --- EXPERIMENTAL GRID (Freq Bands) ---
FREQ_BANDS = [
    (24000,96000)
]

# --- EXPERIMENTAL GRID (Parameters) ---
NFFT_LIST = [2048]
ACI_J_BINS = [1]  # You can add [1, 3, 5] if you want temporal multi-scale

# --- CONSTANTS ---
OVERLAP = 0
NP_MIN_FREQ_DIST_LIST = [200]  # Min frequency for NP. Standard is 200

# IMPORTANT: The threshold is now applied to the NORMALIZED signal (0 to 1)
ACI_ENERGY_THRESHOLD = 0.01

# Bands for Fixed/Special Indices (NDSI, BI, etc.)
BIOPHONY_BAND = (24000, 96000)


# ============================================================
# 2. INDEX SELECTION (True = Compute, False = Skip)
# ============================================================

CALC_ACI = True
CALC_NP = True
CALC_BI = True


# ============================================================
# 3. HELPER FUNCTIONS
# ============================================================

def acoustic_complexity_index(S_amp, f, fmin, fmax, j_bin, filter_threshold=0): # This is the value if the ACI_threshold is not applied.
    """
    Computes ACItf (Temporal ACI) and ACIft (Spectral ACI)
    following Farina et al. (2021), with improvements:

    - Applies a relative energy filter after temporary normalization to [0, 1]
      (prevents zeroing out entire quiet files).
    - Uses clumped spectrogram (J-bins) for both ACItf and ACIft, to reflect
      scale dependence properly.
    """
    # 1. Filter by frequency band
    mask = (f >= fmin) & (f <= fmax)
    S_f = S_amp[mask].copy()

    # Safety: empty band
    if S_f.size == 0:
        return np.nan, np.nan

    # 2. Energy filter (relative to file maximum)
    if filter_threshold > 0:
        max_val = np.max(S_f)
        if max_val > 0:
            S_norm = S_f / max_val
            S_f[S_norm <= filter_threshold] = 0
        else:
            # Entire file effectively silent
            return 0, 0

    # 3. Temporal clumping
    n_freqs, n_times = S_f.shape
    n_clumps = n_times // j_bin

    if n_clumps > 0:
        # Truncate time axis to integer number of clumps
        S_cut = S_f[:, :n_clumps * j_bin]

        # Average j_bin frames → 1 clumped frame
        S_clumped = np.mean(S_cut.reshape(n_freqs, n_clumps, j_bin), axis=2)

        # -------------------------
        # ACItf (Temporal ACI)
        # -------------------------
        numerator_t = np.abs(np.diff(S_clumped, axis=1))
        denominator_t = S_clumped[:, :-1] + S_clumped[:, 1:]
        valid_mask_t = (S_clumped[:, :-1] > 0) & (S_clumped[:, 1:] > 0)

        aci_tf_matrix = np.zeros_like(numerator_t)
        aci_tf_matrix[valid_mask_t] = (
            numerator_t[valid_mask_t] / denominator_t[valid_mask_t]
        )
        val_tf = np.sum(aci_tf_matrix)

        # -------------------------
        # ACIft (Spectral ACI)
        # -------------------------
        numerator_f = np.abs(np.diff(S_clumped, axis=0))
        denominator_f = S_clumped[:-1, :] + S_clumped[1:, :]
        valid_mask_f = (S_clumped[:-1, :] > 0) & (S_clumped[1:, :] > 0)

        aci_ft_matrix = np.zeros_like(numerator_f)
        aci_ft_matrix[valid_mask_f] = (
            numerator_f[valid_mask_f] / denominator_f[valid_mask_f]
        )
        val_ft = np.sum(aci_ft_matrix)

        return val_tf, val_ft

    # If file is shorter than j_bin, not enough temporal bins to clump
    return np.nan, np.nan



# ============================================================
# 4. CORE PROCESSING (PER FILE)
# ============================================================

def process_single_file(file_path):
    row_data = {
        "Folder": file_path.parent.name,
        "Name": file_path.name
    }

    try:
        # 1. Load Audio using LIBROSA (Windows Friendly)
        # sr=None ensures we keep original sample rate (e.g., 96kHz, 192kHz)
        audio_np, fs = librosa.load(file_path, sr=None)

        nyquist = fs / 2

        # Check for essentially silent or empty files
        if audio_np.size == 0 or np.max(np.abs(audio_np)) == 0:
            return row_data

        # --- LOOP 1: NFFT SIZES ---
        for nfft in NFFT_LIST:

            f, t, S_complex = signal.spectrogram(
                audio_np, fs,
                nperseg=nfft, noverlap=OVERLAP, nfft=nfft,
                mode="complex"
            )
            S_amp = np.abs(S_complex)

            # ============================================================
            # ### SPECIAL INDICES
            # ============================================================

            S_power = S_amp ** 2

            if CALC_BI:
                try:
                    bi = features.bioacoustics_index(
                        S_power,
                        f,
                        flim=BIOPHONY_BAND
                    )
                    row_data[f"BI_Bio{BIOPHONY_BAND[0]}-{BIOPHONY_BAND[1]}_N{nfft}"] = bi
                except:
                    row_data[f"BI_Bio{BIOPHONY_BAND[0]}-{BIOPHONY_BAND[1]}"] = np.nan

            
                # --- A. ACIs ---
                if CALC_ACI:
                    for j_bin in ACI_J_BINS:
                        # ----------------------------------------------------
                        # 1. STANDARD ACI (Farina: Filtering + Edge Correction)
                        # ----------------------------------------------------
                        col_tf = f"ACItf_{prefix}_J{j_bin}"
                        col_ft = f"ACIft_{prefix}_J{j_bin}"
                        try:
                            acitf, acift = acoustic_complexity_index(
                                S_amp, f, f_min, f_max, j_bin, ACI_ENERGY_THRESHOLD
                            )
                            row_data[col_tf] = acitf
                            row_data[col_ft] = acift
                        except:
                            row_data[col_tf] = np.nan
                            row_data[col_ft] = np.nan

                     
                # --- B. NP ---
                if CALC_NP:
                    # NOTE: Assuming NP_MIN_FREQ_DIST_LIST is defined globally or imported. 
                    # If it's meant to be the constant NP_MIN_FREQ_DIST, check your variable names.
                    for dist_req in NP_MIN_FREQ_DIST_LIST:
                        col_np = f"NP_{prefix}_Dist{dist_req}"
                        try:
                            actual_dist = max(dist_req, f[1] - f[0])
                            np_val = features.number_of_peaks(
                                S_amp_f, f_sub, flim=(f_min, f_max), min_freq_dist=actual_dist
                            )
                            row_data[col_np] = np_val
                        except:
                            row_data[col_np] = np.nan
              
        del audio_np, S_complex, S_amp
        gc.collect()
        return row_data
    except Exception as e:
        print(f"❌ Error processing {file_path.name}: {e}")
        return row_data

# ============================================================
# 5. EXECUTION
# ============================================================

if __name__ == "__main__":
    multiprocessing.freeze_support()

    if not os.path.exists(OUTPUT_DIR):
        try:
            os.makedirs(OUTPUT_DIR)
        except OSError as e:
            print(f"Error creating directory: {e}")
            OUTPUT_DIR = os.getcwd()
            OUTPUT_FILE = os.path.join(OUTPUT_DIR, "ultrasound_indices.xlsx")

    path = Path(BASE_PATH)
    files = list(path.rglob("*.wav")) + list(path.rglob("*.WAV"))

    if len(files) > 0:
        print(f"Found {len(files)} files.")
        print("Starting Multi-Scale Analysis...")
        print(f"Active Computations: ACI={CALC_ACI}, NP={CALC_NP}, BI={CALC_BI}")

        all_results = []
        file_chunks = [files[i:i + BATCH_SIZE] for i in range(0, len(files), BATCH_SIZE)]

        for batch in file_chunks:
            with concurrent.futures.ProcessPoolExecutor(max_workers=WORKERS) as ex:
                results = list(tqdm(ex.map(process_single_file, batch), total=len(batch)))
            all_results.extend(results)
            gc.collect()

        print("\nConverting to DataFrame...")
        df = pd.DataFrame(all_results)

        cols = list(df.columns)
        cols.sort()
        if "Name" in cols: cols.insert(0, cols.pop(cols.index("Name")))
        if "Folder" in cols: cols.insert(0, cols.pop(cols.index("Folder")))
        df = df[cols]

        print(f"Saving to {OUTPUT_FILE}...")
        df.to_excel(OUTPUT_FILE, index=False)

        print("✅ Done!")
    else:
        print("❌ No files found.")