# %%
# Script for counting the number of ions in bridging and pairing sites

import sys
import glob
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import MDAnalysis as mda
from MDAnalysis.analysis import distances
from MDAnalysis.transformations import unwrap
from MDAnalysis.analysis import msd

bigfolder = 'zeta-for-analysis/'
folders = ['first/', 'second/', 'third/', 'fourth/']

# initialization what to store in the final file -- all folders!
final_folder = []
final_file = []

final_n_imm1 = []
final_n_imm2 = []
final_n_imm3 = []
final_n_imm_oth = []
final_n_mob = []
final_n_mid = []

final_avg_oh_imm1 = []
final_avg_oh_imm2 = []
final_avg_oh_imm3 = []
final_avg_oh_imm_oth = []
final_avg_oh_mob = []
final_avg_oh_mid = []

final_avg_osi_imm1 = []
final_avg_osi_imm2 = []
final_avg_osi_imm3 = []
final_avg_osi_imm_oth = []
final_avg_osi_mob = []
final_avg_osi_mid = []

final_avg_ow_imm1 = []
final_avg_ow_imm2 = []
final_avg_ow_imm3 = []
final_avg_ow_imm_oth = []
final_avg_ow_mob = []
final_avg_ow_mid = []

final_avg_d_sichain_imm1 = []
final_avg_d_sichain_imm2 = []
final_avg_d_sichain_imm3 = []
final_avg_d_sichain_imm_oth = []
final_avg_d_sichain_mob = []
final_avg_d_sichain_mid = []

final_avg_cl_d_sichain = []

for folder in folders:

    subfolder = f'{bigfolder}{folder}'
    print(f'\n******* Currently in folder "{subfolder}" ********')

    # Need to select the traj and initial files in the directory
    init_files = sorted(glob.glob(f"{subfolder}ZP-finish*"))
    traj_files = sorted(glob.glob(f"{subfolder}equil*"))
    name_files_traj = []
    # Rename the traj files so that readable with mdanalysis
    for traj_file in traj_files:
        splitted = traj_file.split('.')
        # print(splitted)
        if splitted[-1] != 'lammpsdump':
            os.rename(traj_file, f'{splitted[0]}.lammpsdump') # Changing trajectory file name to .lammpsdump 
            name_files_traj.append(f'{splitted[0]}.lammpsdump')
        else:
            name_files_traj.append(traj_file)

    for name_file_initial, name_file_traj in zip(init_files, name_files_traj):
        print('Reading files: ', name_file_initial, name_file_traj)

        #### read num atoms, bonds, angles
        in_file = open(name_file_initial, 'r')
        lines = in_file.readlines()
        for line in lines:
            if 'atoms' in line:
                splitted = line.split(' ')
                natoms = int(splitted[0])
                print('\nnatoms = ',natoms)
            if 'bonds' in line:
                splitted = line.split(' ')
                nbonds = int(splitted[0])
                print('nbonds = ',nbonds)
            if 'angles' in line:
                splitted = line.split(' ')
                nangles = int(splitted[0])
                print('nangles = ',nangles)
        ####
        u_in = mda.Universe(name_file_initial) # defined from the initial structure
        df_in = pd.read_csv(name_file_initial, sep='\s+', skiprows=35, nrows=natoms, names=['id', 'mol', 'type', 'charge', 'x', 'y', 'z', 'vx', 'vy', 'vz'])
        df_angles = pd.read_csv(name_file_initial, sep='\s+', skiprows=35+natoms+3+natoms+3+nbonds+3, nrows=nangles, names=['id','type','id_si','id_o','id_h'])

        df_sol = df_in[df_in['mol']==2134]
        df_ca_sol = df_sol[df_sol['type']==17]
        df_o_oh = df_sol[df_sol['type']==11]
        df_osi = df_sol[df_sol['type']==15]

        ids_ca_sol = df_ca_sol['id'].values
        ids_oh_sol = df_o_oh['id'].values

        #### select the ids of the OH (in solution) bonded to Si
        ids_oh_bound_si = [6893, 6973, 6542, 6603, 22, 243, 308, 594]
        ####

        # Select the ids of the Si in the chains on the surfaces
        def group(a,thr):
            x = np.sort(a)
            diff = x[1:]-x[:-1]
            gps = np.concatenate([[0],np.cumsum(diff>=thr)])
            return [x[gps==i] for i in range(gps[-1]+1)]

        z_si = []
        for z_coord in df_in.loc[df_in['type']==10, 'z'].values:
            z_si.append(z_coord)
        # print(z_si)

        g = group(z_si, 0.45)
        # print(len(g))
        z_chain = []
        num_chains = 0
        for z_i in g:
            if len(z_i)>25:
                # print(np.mean(z_i), '--> chain len =',len(z_i))
                z_chain.append(np.mean(z_i))
                num_chains += 1
            # else:
                # print(np.mean(z_i), 'num atoms =', len(z_i))

        assert num_chains==6, "The number of Si-chains is NOT 6!!!"

        z_chain_bot = z_chain[2]
        z_chain_top = z_chain[3]
        print('\nSi chain bottom, z =', z_chain_bot)
        print('Si chain top, z =', z_chain_top)

        # I can select the ids here, just to make sure things are not gonna change during the run
        ids_top_chain = []
        ids_bot_chain = []
        # check_z_bot = 0
        for _, si_atom in df_in.loc[df_in['type']==10].iterrows():
            # print(si_atom)
            if abs(si_atom['z']-z_chain_bot)<1.25:
                ids_bot_chain.append(si_atom['id'])
                # check_z_bot += si_atom['z']
                # print(si_atom['z'])
            elif abs(si_atom['z']-z_chain_top)<1.25:
                ids_top_chain.append(si_atom['id'])


        # %%
        ############## NEW DEFINITIONS - MOBILE/IMMOBILE ##############
        ############# (instead of inner, outer, diffuse) ##############
        ######################## by using MSD #########################

        def unwrap(vector):
            def internal_func(ts):
                """Unwraps"""
                ts.positions += np.array(vector)
                return ts
            return internal_func

        u_unwrap_final = mda.Universe(name_file_traj, atom_style="id type mass charge element xu yu zu vx vy vz", in_memory=True) #, transformations=[unwrap(final_move)])
        ca_17_uw = u_unwrap_final.select_atoms('type 17')
        ids_ca_sol_uw = []
        for id_ca in ca_17_uw.ids:
            if id_ca in ids_ca_sol:
                ids_ca_sol_uw.append(id_ca)
                
        for ts in np.arange(0,len(u_unwrap_final.trajectory)-1):
            
            #### calculate vector
            final_move = []
            for ind, atom in enumerate(u_unwrap_final.atoms):
                
                move_by = np.array([0., 0., 0.])
                u_unwrap_final.trajectory[ts]
                pos = atom.position
                u_unwrap_final.trajectory[ts+1]
                pos_new = atom.position
                
                # checking only type 17
                if atom.id in ids_ca_sol:
                    # print(atom.id, ':', pos)
                    # print(atom.id, ':', pos_new)      

                    # x-direction
                    if pos[0] - pos_new[0] > u_unwrap_final.dimensions[0]/2:
                        # print(abs(pos[0] - pos_new[0]), '<', u_traj_unwrap.dimensions[0])
                        a = u_unwrap_final.dimensions[0] - pos[0]
                        b = pos_new[0]
                        # print('unwrapped pos = ', pos[0] + a + b)
                        # u_traj_unwrap.atoms[ind].position = pos[0] + a + b
                        move_by += np.array([(pos[0] - pos_new[0])+a+b, 0., 0.])
                    elif pos_new[0] - pos[0] > u_unwrap_final.dimensions[0]/2:
                        a = pos[0]
                        b = u_unwrap_final.dimensions[0] - pos_new[0]
                        # print('unwrapped pos = ', pos[0] - a - b)
                        # u_traj_unwrap.atoms[ind].position = pos[0] - a - b
                        move_by += np.array([-(pos_new[0] - pos[0])-a-b, 0., 0.])

                    # y-direction
                    if pos[1] - pos_new[1] > u_unwrap_final.dimensions[1]/2:
                        a = u_unwrap_final.dimensions[1] - pos[1]
                        b = pos_new[1]
                        # print('unwrapped pos = ', pos[1] + a + b)
                        # u_traj_unwrap.atoms[ind].position = pos[1] + a + b
                        move_by += [0., (pos[1] - pos_new[1])+a+b, 0.]
                    elif pos_new[1] - pos[1] > u_unwrap_final.dimensions[1]/2:
                        a = pos[1]
                        b = u_unwrap_final.dimensions[1] - pos_new[1]
                        # print('unwrapped pos = ', pos[1] - a - b)
                        # u_traj_unwrap.atoms[ind].position = pos[1] - a - b
                        move_by += np.array([0., -(pos_new[1] - pos[1])-a-b, 0.])

                    # print('\n')
                final_move.append(move_by)

            u_unwrap_final.trajectory[ts+1].positions += final_move

        ############# calculate MSD only in xy plane for unwrapped Ca-coordinates ###########
        msd_all_xy_uw = []
        msd_mean_xy_uw = []
        ca_17_uw = u_unwrap_final.select_atoms('type 17')
        for ca_atom in ca_17_uw:
            # calculate MSD
            if ca_atom.id in ids_ca_sol_uw:
                print('Ca ID =', ca_atom.id)
                MSD = msd.EinsteinMSD(u_unwrap_final, f'id {ca_atom.id}', 'xy')
                MSD.run()
                msd_ca =  MSD.results.timeseries
                msd_all_xy_uw.append(msd_ca)
                msd_mean_xy_uw.append(msd_ca.mean())

        # plus create a dataframe with the info for every Ca
        df_ca_final = pd.DataFrame()
        df_ca_final['id'] = sorted(df_ca_sol['id'])
        df_ca_final['label'] = [ '' for _ in np.arange(len(df_ca_final['id']))]
        df_ca_final['avg_osi'] = [ 0 for _ in np.arange(len(df_ca_final['id']))]
        df_ca_final['avg_oh'] = [ 0 for _ in np.arange(len(df_ca_final['id']))]
        df_ca_final['avg_ow'] = [ 0 for _ in np.arange(len(df_ca_final['id']))]
        df_ca_final['avg_d_sichain'] = [ 0 for _ in np.arange(len(df_ca_final['id']))]

        # create a variable to store the avg distance in z direction of all Cl atoms from Si-chain
        avg_d_cl = 0

        ids_mob = []
        ids_imm = []
        ids_mid = []
        for msd_i, id_i in zip(msd_mean_xy_uw, ids_ca_sol_uw):
            if msd_i < 1:
                ids_imm.append(id_i)
            elif msd_i > 50:
                ids_mob.append(id_i)
            else:
                ids_mid.append(id_i)

        nframes = 801
        timestep = 1 # this needs to be the actual time between frames
        lagtimes = np.arange(nframes)*timestep # make the lag-time axis
        fig, axs = plt.subplots(2, 3, figsize=(13,8))
        axs[0][0].set_title('Ca immobile')
        axs[0][1].set_title('Ca mobile')
        axs[0][2].set_title('Ca middle')

        # plot the mean MSD
        for msd_i, id_i in zip(msd_all_xy_uw,ids_ca_sol_uw):
            if id_i in ids_imm:
                axs[0][0].plot(lagtimes[1:-2], np.mean(msd_i[1:-2])*np.ones(len(msd_i[1:-2])), ls="-", label=f'id = {id_i}')
                axs[0][0].annotate(id_i, (0, np.mean(msd_i[1:-2])))
                axs[1][0].plot(lagtimes[1:-2], msd_i[1:-2], ls="-", label=f'id = {id_i}')

            elif id_i in ids_mob:
                axs[0][1].plot(lagtimes[1:-2], np.mean(msd_i[1:-2])*np.ones(len(msd_i[1:-2])), ls="-", label=f'id = {id_i}')
                axs[0][1].annotate(id_i, (0, np.mean(msd_i[1:-2])))
                axs[1][1].plot(lagtimes[1:-2], msd_i[1:-2], ls="-", label=f'id = {id_i}')

            elif id_i in ids_mid:
                axs[0][2].plot(lagtimes[1:-2], np.mean(msd_i[1:-2])*np.ones(len(msd_i[1:-2])), ls="-", label=f'id = {id_i}')
                axs[0][2].annotate(id_i, (0, np.mean(msd_i[1:-2])))
                axs[1][2].plot(lagtimes[1:-2], msd_i[1:-2], ls="-", label=f'id = {id_i}')

        axs[0][0].set_xlabel('time')
        axs[0][1].set_xlabel('time')
        axs[0][2].set_xlabel('time')
        axs[0][0].set_ylabel('mean MSD xy')
        axs[1][0].set_ylabel('MSD xy')
        plt.savefig(f'{subfolder}MSD.png')


        # load trajectory 
        u_traj = mda.Universe(name_file_traj, atom_style="id type mass charge element xu yu zu vx vy vz", dt=1.0) 
        nsteps = len(u_traj.trajectory)
        print('Number of steps in the trajectory = ', nsteps)

        o_11 = u_traj.select_atoms('type 11') # O(oh)
        o_13 = u_traj.select_atoms('type 13') # O(w)
        o_15 = u_traj.select_atoms('type 15') # O(Si)
        ca_17 = u_traj.select_atoms('type 17')
        si_10 = u_traj.select_atoms('type 10')
        cl_9 = u_traj.select_atoms('type 9')
        n_cl = cl_9.n_atoms

        # looping over the trajectory
        count = 0
        for i in np.arange(1,nsteps,10):
            u_traj.trajectory[i]            

            ################ INTERNAL DIFFERENTIATION BETWEEN IMMOBILE Ca ################
            # divide Ca between imm-coord-O(Si) = imm1, imm-coord-O(oh) = imm2, imm-coord-both = imm3
            ids_imm1 = []
            ids_imm2 = []
            ids_imm3 = []

            ##### oh coordination
            dist_arr = distances.distance_array(o_11.positions, ca_17.positions, box=u_traj.dimensions) 
            df_dist_o11_ca = pd.DataFrame(dist_arr)
            # replace the diagonal and the distances identifying the bridging sites with NaN
            df_dist_o11_ca_arr = np.where(df_dist_o11_ca < 0.001, np.NaN, df_dist_o11_ca)
            ind_oh = np.where(df_dist_o11_ca_arr < 3.16)

            ids_pair_ca_oh = []
            dict_ca_oh = {}
            for row, col in zip(ind_oh[0], ind_oh[1]):
                if ((o_11[row]).id in ids_oh_sol) and ((o_11[row]).id not in ids_oh_bound_si): # excluding the o(oh) bonded to Si!!
                    if [(ca_17[col]).id, (o_11[row]).id] not in ids_pair_ca_oh:
                        ids_pair_ca_oh.append([(o_11[row]).id, (ca_17[col]).id])
                        if (ca_17[col]).id in ids_imm:
                            ids_imm2.append((ca_17[col]).id)
                        # dictionary with all the O(oh) bonded to a Ca                    
                        if (ca_17[col]).id not in dict_ca_oh.keys():
                            dict_ca_oh[(ca_17[col]).id] = [(o_11[row]).id]
                        else:
                            dict_ca_oh[(ca_17[col]).id].append((o_11[row]).id)

            ##### o(Si) coordination
            dist_arr = distances.distance_array(o_15.positions, ca_17.positions, box=u_traj.dimensions) 
            df_dist_o15_ca = pd.DataFrame(dist_arr)
            # replace the diagonal and the distances identifying the bridging sites with NaN
            df_dist_o15_ca_arr = np.where(df_dist_o15_ca < 0.001, np.NaN, df_dist_o15_ca)
            ind_oh = np.where(df_dist_o15_ca_arr < 3.16)

            ids_pair_ca_osi = []
            dict_ca_osi = {}
            for row, col in zip(ind_oh[0], ind_oh[1]):
                if [(ca_17[col]).id, (o_15[row]).id] not in ids_pair_ca_osi:
                    ids_pair_ca_osi.append([(o_15[row]).id, (ca_17[col]).id])
                    if (ca_17[col]).id in ids_imm:
                        ids_imm1.append((ca_17[col]).id)
                    # dictionary with all the O(si) bonded to a Ca                    
                    if (ca_17[col]).id not in dict_ca_osi.keys():
                        dict_ca_osi[(ca_17[col]).id] = [(o_15[row]).id]
                    else:
                        dict_ca_osi[(ca_17[col]).id].append((o_15[row]).id)
            
            ##### o(w) coordination
            dist_arr = distances.distance_array(o_13.positions, ca_17.positions, box=u_traj.dimensions) 
            df_dist_o13_ca = pd.DataFrame(dist_arr)
            # replace the diagonal and the distances identifying the bridging sites with NaN
            df_dist_o13_ca_arr = np.where(df_dist_o13_ca < 0.001, np.NaN, df_dist_o13_ca)
            ind_oh = np.where(df_dist_o13_ca_arr < 3.16)

            ids_pair_ca_ow = []
            dict_ca_ow = {}
            for row, col in zip(ind_oh[0], ind_oh[1]):
                if [(ca_17[col]).id, (o_13[row]).id] not in ids_pair_ca_ow:
                    ids_pair_ca_ow.append([(o_13[row]).id, (ca_17[col]).id])
                    # dictionary with all the O(w) bonded to a Ca                    
                    if (ca_17[col]).id not in dict_ca_ow.keys():
                        dict_ca_ow[(ca_17[col]).id] = [(o_13[row]).id]
                    else:
                        dict_ca_ow[(ca_17[col]).id].append((o_13[row]).id)
            ####

            # calculate the coordination number for this step of the traj
            for key in dict_ca_oh.keys():
                # print(key)
                df_ca_final.loc[df_ca_final['id']==key, 'avg_oh'] = df_ca_final.loc[df_ca_final['id']==key, 'avg_oh'].values + len(dict_ca_oh[key])    
            for key in dict_ca_osi.keys():
                # print(key)
                df_ca_final.loc[df_ca_final['id']==key, 'avg_osi'] = df_ca_final.loc[df_ca_final['id']==key, 'avg_osi'].values + len(dict_ca_osi[key])
            for key in dict_ca_ow.keys():
                # print(key)
                df_ca_final.loc[df_ca_final['id']==key, 'avg_ow'] = df_ca_final.loc[df_ca_final['id']==key, 'avg_ow'].values + len(dict_ca_ow[key])


            ################################################
            ############# z mean from Si-chain #############

            #### calculate the z of the surface Si-chains
            z_si_chain_bot = 0
            z_si_chain_top = 0
            num_si_chain_bot = 0
            num_si_chain_top = 0
            for si_atom in si_10:
                if si_atom.id in ids_bot_chain:
                    z_si_chain_bot += si_atom.position[2]
                    num_si_chain_bot += 1
                    # print('BOTTOM:', si_atom.id, 'z = ', si_atom.position)
                elif si_atom.id in ids_top_chain:
                    z_si_chain_top += si_atom.position[2]
                    num_si_chain_top += 1
            # print('num Si = ', num_si_chain_bot)
            z_si_chain_bot /= num_si_chain_bot
            z_si_chain_top /= num_si_chain_top
            # print('z_si_chain_bottom = ', z_si_chain_bot)
            # print('z_si_chain_top = ', z_si_chain_top, '\n')
            #####

            # ---- z-distance per Ca ----
            for ca_atom in ca_17:
                if ca_atom.id in df_ca_final['id'].values:
                    if ca_atom.position[2] < u_traj.dimensions[2]/2:
                        d = (ca_atom.position[2] - z_si_chain_bot)
                    else:
                        d = abs(ca_atom.position[2] - z_si_chain_top)
                    df_ca_final.loc[df_ca_final['id']==ca_atom.id, 'avg_d_sichain'] = df_ca_final.loc[df_ca_final['id']==ca_atom.id, 'avg_d_sichain'].values + d

            # ---- z-distance per Cl ----
            d_cl = 0
            for cl_atom in cl_9:
                if cl_atom.position[2] < u_traj.dimensions[2]/2:
                    d = (cl_atom.position[2] - z_si_chain_bot)
                else:
                    d = abs(cl_atom.position[2] - z_si_chain_top)
                d_cl += d
            d_cl /= n_cl    
            avg_d_cl += d_cl # already avg over the amount of Cl!

            
            count += 1
        # end loop on trajectory

        df_ca_final['avg_oh'] = df_ca_final['avg_oh'].values/count
        df_ca_final['avg_ow'] = df_ca_final['avg_ow'].values/count
        df_ca_final['avg_osi'] = df_ca_final['avg_osi'].values/count
        df_ca_final['avg_d_sichain'] = df_ca_final['avg_d_sichain'].values/count
        avg_d_cl /= count

        # define imm1, imm2, imm3 Ca
        for ind, row in df_ca_final.iterrows():
            if row['avg_osi'] > 0.6 and row['avg_oh']< 0.1:
                df_ca_final.iloc[ind,1] = 'IMM1'
            if row['avg_oh'] > 0.6 and row['avg_osi']< 0.1:
                df_ca_final.iloc[ind,1] = 'IMM2'
            if row['avg_osi'] > 0.6 and row['avg_oh']> 0.6:
                df_ca_final.iloc[ind,1] = 'IMM3'
            elif row['id'] in ids_mob:
                df_ca_final.iloc[ind,1] = 'MOB'
            elif row['id'] in ids_mid:
                df_ca_final.iloc[ind,1] = 'mid'
        # df_ca_final

        df_imm1 = df_ca_final.loc[df_ca_final['label']=='IMM1']
        df_imm2 = df_ca_final.loc[df_ca_final['label']=='IMM2']
        df_imm3 = df_ca_final.loc[df_ca_final['label']=='IMM3']
        df_imm_oth = df_ca_final.loc[df_ca_final['label']=='']
        df_mob = df_ca_final.loc[df_ca_final['label']=='MOB']
        df_mid = df_ca_final.loc[df_ca_final['label']=='mid']

        ### imm1
        tot_osi = 0
        tot_oh = 0
        tot_ow = 0
        tot_z = 0
        for _,row in df_imm1.iterrows():
            tot_osi += row['avg_osi']
            tot_oh += row['avg_oh']
            tot_ow += row['avg_ow']
            tot_z += row['avg_d_sichain']

        if len(df_imm1)!= 0:
            n_imm1 = len(df_imm1)
            avg_osi_imm1 = (tot_osi/len(df_imm1))
            avg_oh_imm1 = (tot_oh/len(df_imm1))
            avg_ow_imm1 = (tot_ow/len(df_imm1))
            avg_z_imm1 = (tot_z/len(df_imm1))
        else:
            n_imm1 = 0
            avg_osi_imm1 = 0
            avg_oh_imm1 = 0
            avg_ow_imm1 = 0
            avg_z_imm1 = 0

        ##### imm2
        tot_osi = 0
        tot_oh = 0
        tot_ow = 0
        tot_z = 0
        for _,row in df_imm2.iterrows():
            tot_osi += row['avg_osi']
            tot_oh += row['avg_oh']
            tot_ow += row['avg_ow']
            tot_z += row['avg_d_sichain']

        if len(df_imm2)!= 0:
            n_imm2 = len(df_imm2)
            avg_osi_imm2 = (tot_osi/len(df_imm2))
            avg_oh_imm2 = (tot_oh/len(df_imm2))
            avg_ow_imm2 = (tot_ow/len(df_imm2))
            avg_z_imm2 = (tot_z/len(df_imm2))
        else:
            n_imm2 = 0
            avg_osi_imm2 = 0
            avg_oh_imm2 = 0
            avg_ow_imm2 = 0
            avg_z_imm2 = 0

        #### imm3
        tot_osi = 0
        tot_oh = 0
        tot_ow = 0
        tot_z = 0
        for _,row in df_imm3.iterrows():
            tot_osi += row['avg_osi']
            tot_oh += row['avg_oh']
            tot_ow += row['avg_ow']
            tot_z += row['avg_d_sichain']

        if len(df_imm3)!= 0:
            n_imm3 = len(df_imm3)
            avg_osi_imm3 = (tot_osi/len(df_imm3))
            avg_oh_imm3 = (tot_oh/len(df_imm3))
            avg_ow_imm3 = (tot_ow/len(df_imm3))
            avg_z_imm3 = (tot_z/len(df_imm3))
        else:
            n_imm3 = 0
            avg_osi_imm3 = 0
            avg_oh_imm3 = 0
            avg_ow_imm3 = 0
            avg_z_imm3 = 0

        #### imm_oth
        tot_osi = 0
        tot_oh = 0
        tot_ow = 0
        tot_z = 0
        for _,row in df_imm_oth.iterrows():
            tot_osi += row['avg_osi']
            tot_oh += row['avg_oh']
            tot_ow += row['avg_ow']
            tot_z += row['avg_d_sichain']

        if len(df_imm_oth)!= 0:
            n_imm_oth = len(df_imm_oth)
            avg_osi_imm_oth = (tot_osi/len(df_imm_oth))
            avg_oh_imm_oth = (tot_oh/len(df_imm_oth))
            avg_ow_imm_oth = (tot_ow/len(df_imm_oth))
            avg_z_imm_oth = (tot_z/len(df_imm_oth))
        else:
            n_imm_oth = 0
            avg_osi_imm_othn_imm_oth = 0
            avg_oh_imm_othn_imm_oth = 0
            avg_ow_imm_othn_imm_oth = 0
            avg_z_imm_othn_imm_oth = 0
        
        #### mob
        tot_osi = 0
        tot_oh = 0
        tot_ow = 0
        tot_z = 0
        for _,row in df_mob.iterrows():
            tot_osi += row['avg_osi']
            tot_oh += row['avg_oh']
            tot_ow += row['avg_ow']
            tot_z += row['avg_d_sichain']

        if len(df_mob)!= 0:
            n_mob = len(df_mob)
            avg_osi_mob = (tot_osi/len(df_mob))
            avg_oh_mob = (tot_oh/len(df_mob))
            avg_ow_mob = (tot_ow/len(df_mob))
            avg_z_mob = (tot_z/len(df_mob))
        else:
            n_mob = 0
            avg_osi_mob = 0
            avg_oh_mob = 0
            avg_ow_mob = 0
            avg_z_mob = 0

        #### mid
        tot_osi = 0
        tot_oh = 0
        tot_ow = 0
        tot_z = 0
        for _,row in df_mid.iterrows():
            tot_osi += row['avg_osi']
            tot_oh += row['avg_oh']
            tot_ow += row['avg_ow']
            tot_z += row['avg_d_sichain']

        if len(df_mid)!= 0:
            n_mid = len(df_mid)
            avg_osi_mid = (tot_osi/len(df_mid))
            avg_oh_mid = (tot_oh/len(df_mid))
            avg_ow_mid = (tot_ow/len(df_mid))
            avg_z_mid = (tot_z/len(df_mid))
        else:
            n_mid = 0
            avg_osi_mid = 0
            avg_oh_mid = 0
            avg_ow_mid = 0
            avg_z_mid = 0

        # save single dataset
        path = name_file_traj.split('.')[0]
        name = path.split('/')[-1]
        df_ca_final.to_csv(f'{subfolder}data-{name}.csv', sep=' ')

        print('     #ions  avg_osi_coord    avg_oh_coord    avg_ow_coord    avg_dist_sichain')
        print('IMM1:', n_imm1, avg_osi_imm1, avg_oh_imm1, avg_ow_imm1, avg_z_imm1)
        print('IMM2:', n_imm2, avg_osi_imm2, avg_oh_imm2, avg_ow_imm2, avg_z_imm2)
        print('IMM3:', n_imm3, avg_osi_imm3, avg_oh_imm3, avg_ow_imm3, avg_z_imm3)
        print('IMM_oth:', n_imm_oth, avg_osi_imm_oth, avg_oh_imm_oth, avg_ow_imm_oth, avg_z_imm_oth)
        print('MOB:', n_mob, avg_osi_mob, avg_oh_mob, avg_ow_mob, avg_z_mob)
        print('mid:', n_mid, avg_osi_mid, avg_oh_mid, avg_ow_mid, avg_z_mid)

        # store values for this folder
        final_folder.append(folder)
        final_file.append(name_file_traj)

        final_n_imm1.append(n_imm1)
        final_n_imm2.append(n_imm2)
        final_n_imm3.append(n_imm3)
        final_n_imm_oth.append(n_imm_oth)
        final_n_mob.append(n_mob)
        final_n_mid.append(n_mid)

        final_avg_oh_imm1.append(avg_oh_imm1)
        final_avg_oh_imm2.append(avg_oh_imm2)
        final_avg_oh_imm3.append(avg_oh_imm3)
        final_avg_oh_imm_oth.append(avg_oh_imm_oth)
        final_avg_oh_mob.append(avg_oh_mob)
        final_avg_oh_mid.append(avg_oh_mid)

        final_avg_osi_imm1.append(avg_osi_imm1)
        final_avg_osi_imm2.append(avg_osi_imm2)
        final_avg_osi_imm3.append(avg_osi_imm3)
        final_avg_osi_imm_oth.append(avg_osi_imm_oth)
        final_avg_osi_mob.append(avg_osi_mob)
        final_avg_osi_mid.append(avg_osi_mid)

        final_avg_ow_imm1.append(avg_ow_imm1)
        final_avg_ow_imm2.append(avg_ow_imm2)
        final_avg_ow_imm3.append(avg_ow_imm3)
        final_avg_ow_imm_oth.append(avg_ow_imm_oth)
        final_avg_ow_mob.append(avg_ow_mob)
        final_avg_ow_mid.append(avg_ow_mid)

        final_avg_d_sichain_imm1.append(avg_z_imm1)
        final_avg_d_sichain_imm2.append(avg_z_imm2)
        final_avg_d_sichain_imm3.append(avg_z_imm3)
        final_avg_d_sichain_imm_oth.append(avg_z_imm_oth)
        final_avg_d_sichain_mob.append(avg_z_mob)
        final_avg_d_sichain_mid.append(avg_z_mid)

        final_avg_cl_d_sichain.append(avg_d_cl)

# assign the values to the columns of the dataframe
df_final = pd.DataFrame()
df_final['folder'] = final_folder
df_final['file'] = final_file
df_final['n_imm1'] = final_n_imm1
df_final['n_imm2'] = final_n_imm2
df_final['n_imm3'] = final_n_imm3
df_final['n_imm_oth'] = final_n_imm_oth
df_final['n_mob'] = final_n_mob
df_final['n_mid'] = final_n_mid
df_final['avg_oh_imm1'] = final_avg_oh_imm1
df_final['avg_oh_imm2'] = final_avg_oh_imm2
df_final['avg_oh_imm3'] = final_avg_oh_imm3
df_final['avg_oh_imm_oth'] = final_avg_oh_imm_oth
df_final['avg_oh_mob'] = final_avg_oh_mob
df_final['avg_oh_mid'] = final_avg_oh_mid

df_final['avg_osi_imm1'] = final_avg_osi_imm1
df_final['avg_osi_imm2'] = final_avg_osi_imm2
df_final['avg_osi_imm3'] = final_avg_osi_imm3
df_final['avg_osi_imm_oth'] = final_avg_osi_imm_oth
df_final['avg_osi_mob'] = final_avg_osi_mob
df_final['avg_osi_mid'] = final_avg_osi_mid

df_final['avg_ow_imm1'] = final_avg_ow_imm1
df_final['avg_ow_imm2'] = final_avg_ow_imm2
df_final['avg_ow_imm3'] = final_avg_ow_imm3
df_final['avg_ow_imm_oth'] = final_avg_ow_imm_oth
df_final['avg_ow_mob'] = final_avg_ow_mob
df_final['avg_ow_mid'] = final_avg_ow_mid

df_final['avg_z_imm1'] = final_avg_d_sichain_imm1
df_final['avg_z_imm2'] = final_avg_d_sichain_imm2
df_final['avg_z_imm3'] = final_avg_d_sichain_imm3
df_final['avg_z_imm_oth'] = final_avg_d_sichain_imm_oth
df_final['avg_z_mob'] = final_avg_d_sichain_mob
df_final['avg_z_mid'] = final_avg_d_sichain_mid

df_final['avg_Cl_z_dist'] = final_avg_cl_d_sichain

# save final file
df_final.to_csv(f'{bigfolder}data_ZPanalysis_withCl.csv', sep=' ')


# ## to read/load the file
# import pandas as pd
# df_final = pd.read_csv('zeta-for-analysis/data_ZPanalysis.csv', delim_whitespace=True,)
# # df_in = pd.read_csv(name_file_initial, delim_whitespace=True, skiprows=35, nrows=natoms, names=['id', 'mol', 'type', 'charge', 'x', 'y', 'z', 'vx', 'vy', 'vz'])

# df_final


