# -*- coding: utf-8 -*-
"""
Calculation of acoustic indices using scikit-maad on Linux.
Supplementary information S14-1 of:

Title: A workflow to optimize spatial sampling in ecoacoustic studies

Journal: Landscape Ecology

Authors: Martínez-Arias, V. M.1,3; Paniagua-Villada1,3, C.; Guerrero, M.J.; Daza, J. M.1

Author affiliations at the time the work was conducted: 
1 Grupo Herpetológico de Antioquia GHA, Instituto de Biología, Facultad de Ciencias Exactas y Naturales, Universidad de Antioquia UdeA, Calle 70 No. 52 - 21, Medellín, Colombia.
2 SISTEMIC, Facultad de Ingeniería, Universidad de Antioquia UdeA, Medellín, Colombia
3 Corporación Merceditas,  Calle 3 29A-11, casa 110, Medellín, Colombia


*Corresponding author: Victor M. Martínez Arias
Email: vmanuel.martinez@udea.edu.co


---

Please note that the Acoustic Complexity Index (ACI) was computed using different spectral representations depending on the frequency domain, reflecting a precautionary and domain-specific analytical strategy. For the audible range, ACI was calculated from power spectrograms (PSD). This choice was made to emphasize energetic contrasts in the audible spectrum and to evaluate the sensitivity of spatial sampling designs under the most conservative and contrasting conditions, thereby maximizing the detection of differences among sampling strategies. Power-based representations also ensured numerical stability and methodological consistency with other energy-based indices used in this study (e.g., BI, NP, and NDSI).

"""

import os
import numpy as np
import pandas as pd
from pathlib import Path
from scipy import signal
from maad import sound, features
import torchaudio
from tqdm import tqdm
import warnings
import concurrent.futures

warnings.filterwarnings("ignore")

# Define paths in the local file system (adjust according to your case)
BASE_PATH = os.path.expanduser("/soundscapefiles")  # Input path
OUTPUT_DIR = os.path.expanduser("soundscapefilesoutput")  # Output directory
OUTPUT_FILE = os.path.join(OUTPUT_DIR, "AcousticIndices.xlsx")  # Output file

#Checkpoint aid
## Frequencies
# 0-2 , BI, NDSI, Yes
# 2-12, yes
# 12-24, yes
# 0-24, yes

# Number of processors to use
WORKERS = max(1, os.cpu_count() - 4) #WORKERS = max(1, os.cpu_count() - 2)

# Create the output folder if it does not exist
os.makedirs(OUTPUT_DIR, exist_ok=True)

# Acoustic indices parameters
DEFAULT_FMIN = 0
DEFAULT_FMAX = 12000
BIOPHONY_BAND = (2000, 12000)
ANTHROPHONY_BAND = (0, 2000)
WINDOW_SIZE = 512
NFFT = 512
OVERLAP = 0
J_CLUSTER = 5  # 🔹 Number of clusters for temporal ACI

# Options to enable/disable index calculation
CALCULATE_NDSI = True  # Change to False if you do NOT want to calculate NDSI
CALCULATE_BI = True   # Change to False if you do NOT want to calculate BI


def acoustic_complexity_index(Sxx, f, fmin, fmax, J_CLUSTER):
    """ Calculates temporal ACI (ACItf) and frequency ACI (ACIft), ensuring frequency filtering. """
    # 📌 Filter frequencies within the defined range
    freq_mask = (f >= fmin) & (f <= fmax)
    Sxx_filtered = Sxx[freq_mask, :]  # Apply the filter

    if Sxx_filtered.size == 0:
        return np.nan, np.nan  # If the matrix is empty, return NaN

    num_bins = Sxx_filtered.shape[1] // J_CLUSTER
    if num_bins > 1:
        Sxx_clustered = np.mean(Sxx_filtered[:, :num_bins * J_CLUSTER].reshape(Sxx_filtered.shape[0], num_bins, J_CLUSTER), axis=2)
    else:
        Sxx_clustered = Sxx_filtered

    # 📌 Temporal ACI (ACItf)
    ACItf_xx = np.abs(np.diff(Sxx_clustered, axis=1)) / (Sxx_clustered[:, :-1] + Sxx_clustered[:, 1:] + 1e-10)
    ACItf_sum = np.sum(np.sum(ACItf_xx, axis=1))

    # 📌 Frequency ACI (ACIft)
    ACIft_xx = np.abs(np.diff(Sxx_filtered, axis=0)) / (Sxx_filtered[:-1, :] + Sxx_filtered[1:, :] + 1e-10)
    ACIft_sum = np.sum(np.sum(ACIft_xx, axis=1))

    return ACItf_sum, ACIft_sum


def calculate_H(audio, Fs, s, f, fmin, fmax):
    """Calculates Hf, Ht and total H, ensuring frequency filtering."""
    hilbert_env = np.abs(signal.hilbert(audio[0, :]))
    env_prob = hilbert_env / hilbert_env.sum()
    Ht = -np.sum(env_prob * np.log2(env_prob + 1e-10)) / np.log2(len(env_prob))

    freq_mask = (f >= fmin) & (f <= fmax)
    s_filtered = s[freq_mask, :].sum(axis=1)
    s_filtered = s_filtered / np.sum(s_filtered + 1e-10)
    Hf = -np.sum(s_filtered * np.log2(s_filtered + 1e-10)) / np.log2(len(s_filtered))

    H_total = Ht * Hf
    return Ht, Hf, H_total


def get_indices(file_path, fmin, fmax):
    """Processes an audio file and calculates acoustic indices."""
    try:
        audio, fs = torchaudio.load(file_path)
        if audio.shape[1] == 0:
            raise ValueError(f"Empty or invalid audio file: {file_path.name}")

        f, t, s = signal.spectrogram(audio[0, :], fs, nperseg=WINDOW_SIZE, noverlap=OVERLAP, nfft=NFFT)

        # 📌 Filter spectrogram within the frequency range
        freq_mask = (f >= fmin) & (f <= fmax)
        s_filtered = s[freq_mask, :]

        if s_filtered.size == 0 or np.isnan(s_filtered).all() or np.max(s_filtered) == 0:
            print(f"⚠️ Warning: Empty spectrogram in {file_path.name}. This file will be skipped.")
            return [file_path.name, file_path.parent.name] + [np.nan] * 8

        # 📌 ACI calculation with filtered frequencies
        ACItf_val, ACIft_val = acoustic_complexity_index(s, f, fmin, fmax, J_CLUSTER)

        # 📌 Bioacoustics Index (BI) only if enabled
        if CALCULATE_BI:
            BI_val = features.bioacoustics_index(s, f, flim=BIOPHONY_BAND)
        else:
            BI_val = 0  # Saved as 0 if BI is disabled

        # 📌 Number of peaks (NP) with filtered spectrogram
        try:
            NP_val = features.number_of_peaks(s_filtered, f[freq_mask], flim=(fmin, fmax),
                                                              min_freq_dist=max(200, f[1] - f[0] + 1))
        except ValueError as e:
            print(f"⚠️ Error in `number_of_peaks()` in {file_path.name}: {e}")
            NP_val = np.nan

        # 📌 Acoustic entropy calculation (H)
        Ht_val, Hf_val, H_val = calculate_H(audio, fs, s_filtered, f[freq_mask], fmin, fmax)

        # 📌 Soundscape Index (NDSI) only if enabled
        if CALCULATE_NDSI:
            NDSI_val = features.soundscape_index(s, f, flim_bioPh=BIOPHONY_BAND, flim_antroPh=ANTHROPHONY_BAND)[0]
        else:
            NDSI_val = 0  # Saved as 0 if NDSI is disabled

        return [file_path.name, file_path.parent.name, ACItf_val, ACIft_val, BI_val, NP_val, Ht_val, Hf_val, H_val, NDSI_val]

    except Exception as e:
        print(f"❌ Error processing {file_path}: {e}")
        return [file_path.name, file_path.parent.name] + [np.nan] * 8


# Parallel processing
if __name__ == "__main__":
    path = Path(BASE_PATH)
    files = list(path.rglob("*.[wW][aA][vV]"))
    print(f"Number of files: {len(files)}")

    with concurrent.futures.ProcessPoolExecutor(max_workers=WORKERS) as executor:
        results = list(tqdm(executor.map(get_indices, files, [DEFAULT_FMIN] * len(files), [DEFAULT_FMAX] * len(files)),
                            total=len(files), desc="Processing files"))

    # Save to Excel
    df = pd.DataFrame(results, columns=["Name", "Folder", "ACItf", "ACIft", "BI", "NP", "Ht", "Hf", "H", "NDSI"])
    df.to_excel(OUTPUT_FILE, index=False)
    print(f"Results saved in: {OUTPUT_FILE}")