import numpy as np import cartopy from cartopy import config import cartopy.feature as cfeature import cartopy.crs as ccrs from cartopy.mpl.gridliner import LONGITUDE_FORMATTER, LATITUDE_FORMATTER import numpy as np import datetime as dt import pandas as pd import matplotlib import matplotlib.pyplot as plt import matplotlib.lines as mlines import matplotlib.patches as mpatches from matplotlib import cm from matplotlib.colors import ListedColormap, LinearSegmentedColormap import xarray as xr import os import numpy as np import cfgrib from sklearn.preprocessing import MinMaxScaler from sklearn.model_selection import train_test_split from sklearn.metrics import r2_score from scipy.stats import pearsonr,bootstrap import xesmf as xe import glob import scipy.stats as st def bootstrapping(R : list,condition = False): """ Bootstrapping procedure on R coefficients for mean operation Args : list of R coefficient on which mean operation is to be implemented returns : Confidence intervals object for the mean operation lower bound = confidence_interval.low upper bound = confidence_interval.high """ #put data in sequence data = (R,) #calculate 95% bootstrapped confidence interval for median res = bootstrap(data, np.mean, confidence_level=0.95, random_state=0, n_resamples=1000, method='BCa') #view 95% boostrapped confidence interval if not condition: return res.confidence_interval if condition: return (res.confidence_interval.high - res.confidence_interval.low)/2 def bootstrapping_CI(files: list, condition = False, R_maps = False): """ Computing confidence intervals of mean operation for ACC coefficients using bootstrapping procedure Args : - list of files to go fetch the R coefficents -condition : boolean specifically for Pangu-Weather, as it doesn't produce precipitation forecasts we must replace the value of precipitations by 0. The original value of precipitation for Pangu were only mock data for the code to run smoothly return : -confidence intervals to be plotted with mean values of ACC """ files = [np.load(file) for file in files] #when computing mean we reshape over time and space if R_maps: files = [file.reshape(-1) for file in files] CIs = [bootstrapping(file) for file in files] if not condition: CI_symetric = [(confidence_interval.high - confidence_interval.low)/2 for confidence_interval in CIs] #CIs = [[confidence_interval.low,confidence_interval.high] for confidence_interval in CIs] return CI_symetric #replacing precipitation values for Pangu by 0 if condition: CIs[0] = 0 CI_symetric = [] for confidence_interval in CIs: if confidence_interval != 0: CI_symetric.append((confidence_interval.high - confidence_interval.low)/2) elif confidence_interval == 0: CI_symetric.append(0) return CI_symetric def compute_errors_mean_differences(samples_A, samples_B): #samples_A, samples_B = samples_A.to_array().values, samples_B.to_array().values alpha = 0.05 # significance level num_samples = 1000 count = 0 diffs = [] for i in range(num_samples): subsample_A = np.random.choice(samples_A,size=len(samples_A), replace=True) subsample_B = np.random.choice(samples_B,size=len(samples_B), replace=True) # calculate difference of means of 2 subsamples bootstrap_difference = np.mean(subsample_B) - np.mean(subsample_A) diffs.append(bootstrap_difference) confidence_intervals = np.percentile(diffs, [5, 95]) p_value = np.mean(diffs >= np.mean(samples_A) - np.mean(samples_B)) #print('p-value =', pvalue) return (confidence_intervals[1] - confidence_intervals[0])/2 #if pvalue < alpha: # print("Reject H0: mean values do differ significantly") #else: # print("Accept H0: mean values do not differ significantly") def data_mean(files : list, condition = False): files = [np.load(file) for file in files] ACC = [np.mean(file) for file in files] if not condition: return ACC if condition: ACC[0] = 0 return ACC def fisher_transform(r): """ Use Fisher transform on a R correlation coefficient r : float returns : Z float """ return np.log((1+r)/(1-r))/2 def upper_lower_bounds(z, percentage_confidence): """ Computes upper and lower bounds for an R correlation coefficient Args z : float which is the the Fisher Transform of R percentage_confidence : float in [0,1] representing condifence percentage i.e. 95% = 0.95 returns upper_bound (float), lower_bounds (float) """ #acc sample_size = 96*(240/10) #nefff to compute with other scripts alpha = 1 - percentage_confidence std_approx = 1/np.sqrt(sample_size - 3) lower = z - st.norm.ppf(.975)*std_approx upper = z + st.norm.ppf(.975)*std_approx lower_bound = (np.exp(2*lower) - 1)/(np.exp(2*lower) + 1) upper_bound = (np.exp(2*upper) - 1)/(np.exp(2*upper) + 1) #lower_bound = lower_bound if lower_bound > 0 else 0 #upper_bound = upper_bound if upper_bound > 0 else 0 return [lower_bound, upper_bound] def geographical_boxes(R ,min_lon : int, min_lat : int, max_lon : int, max_lat : int, name : str): min_lat, max_lat = max_lat, min_lat boxe = R.sel(latitude=slice(min_lat,max_lat), longitude=slice(min_lon,max_lon)).values mean_value = R.sel(latitude=slice(min_lat,max_lat), longitude=slice(min_lon,max_lon)).mean().values np.save(f'/home/vsansi01/R_boxes/boxe_{name}.npy',boxe) np.save(f'/home/vsansi01/R_boxes/mean_value_{name}.npy',mean_value) def computing_confidence_intervals(r, percentage_confidence): z = fisher_transform(r) bounds = upper_lower_bounds(z, percentage_confidence) return bounds def Reshape_arrays(data : xr.Dataset): print(np.swapaxes(np.squeeze(data.values),0,1).shape) return np.swapaxes(np.squeeze(data.values),0,1) def CI_for_plots(values, errors, up=False, down=False): #computing upper bound if up and not down: return np.array([value + error[1] for value,error in zip(values,errors)]) #computing lower bound if down and not up: return np.array([value - error[0] for value,error in zip(values,errors)]) def reading_Rxt(files,condition=False): """ Computing confidence intervals for mean longitudinal mean operation per latitudes Args : -list of 4 name files i.e 'R_GraphFT_t2m_week3','R_S2S_t2m_week3','R_GraphOP_t2m_week3','R_Pangu_t2m_week3' -condition : boolean to choose whether or not to import GraphOP, True = not download GraphOP Graphcast. Used because no GraphOP Graphcast for 4 weeks ahead. Return : """ files = [np.load(file) for file in files] print(files[0].shape) if not condition: return files[0],files[1],files[2],files[0] - files[1],files[0] - files[2] if condition: return files[0],files[2],files[0] - files[2] def computing_error_fisher_mean(files,condition = False): if not condition: Rxt_GraphFT,Rxt_GraphOP,Rxt_S2S,Rxt_GraphFT_GraphOP,Rxt_GraphFT_S2S = reading_Rxt(files, condition) errors_GraphFT = np.array([computing_confidence_intervals(r,0.95) for r in Rxt_GraphFT]) errors_GraphOP = np.array([computing_confidence_intervals(r,0.95) for r in Rxt_GraphOP]) errors_S2S = np.array([computing_confidence_intervals(r,0.95) for r in Rxt_S2S]) errors_GraphFT_GraphOP = np.array([computing_confidence_intervals(r,0.95) for r in Rxt_GraphFT_GraphOP]) errors_GraphFT_S2S = np.array([computing_confidence_intervals(r,0.95) for r in Rxt_GraphFT_S2S]) return errors_GraphFT,errors_GraphOP,errors_S2S,errors_GraphFT_GraphOP,errors_GraphFT_S2S if condition: Rxt_GraphFT,Rxt_S2S,Rxt_GraphFT_S2S = reading_Rxt(files, condition) errors_GraphFT = np.array([computing_confidence_intervals(r,0.95) for r in Rxt_GraphFT]) errors_S2S = np.array([computing_confidence_intervals(r,0.95) for r in Rxt_S2S]) errors_GraphFT_S2S = np.array([computing_confidence_intervals(r,0.95) for r in Rxt_GraphFT_S2S]) return errors_GraphFT,errors_S2S,errors_GraphFT_S2S if __name__ == "__main__": print(computing_confidence_intervals(0.2,0.95))