import pandas as pd
import cdsapi
import os
import shutil
import math
from concurrent.futures import ThreadPoolExecutor

# 文件路径
file_path_0 = r"E:/2025.1 中国电厂结果"
file_path = r'D:/ProgramData/MF-IME/CODE/data/Togtoh_table.xlsx'
sheet_name = 'Sheet1'
file_path_0 = r"E:/2024.12 中国电厂数据"
file_path = r'D:/ProgramData/MF-IME/CODE/data/Xinjiang-multisource.xlsx'
sheet_name = 'Sheet1'
file_path_0 = r"F:/2024.7 美国核查结果"
file_path = r'D:/ProgramData/MF-IME/CODE/data/美国电厂GF5.xlsx'
sheet_name = '2025.1'

# 获取大气质量的函数
def Get_Mass(date, Facility, lat, lon):
    # 将Timestamp对象转换为日期和时间的字符串
    date_str = date.strftime("%Y-%m-%d")
    time_str_0 = (date - pd.Timedelta(hours=1)).strftime("%H:00")
    time_str_1 = date.strftime("%H:00")
    time_str_2 = (date + pd.Timedelta(hours=1)).strftime("%H:00")
    time_str_3 = (date + pd.Timedelta(hours=2)).strftime("%H:00")

    # 解析日期字符串
    date_parts = date_str.split('-')
    year, month, day = date_parts

    # 计算包含位置的上下左右取整格网
    lat_min = math.floor(lat)
    lat_max = lat_min + 1
    lon_min = math.floor(lon)
    lon_max = lon_min + 1

    # 构建文件名
    filename = f'{Facility}_{year}{month}{day}_SL.nc'

    # 初始化 CDS API 客户端
    c = cdsapi.Client()

    # 调用 CDS API 获取数据
    c.retrieve(
        'reanalysis-era5-single-levels',
        {
            'product_type': 'reanalysis',
            'variable': 'vertical_integral_of_mass_of_atmosphere',
            'year': year,
            'month': month,
            'day': day,
            'time': [time_str_0, time_str_1, time_str_2, time_str_3],
            'area': [lat_max, lon_min, lat_min, lon_max],  # 修正顺序
            'data_format': 'netcdf',  # 按照新方式指定数据格式
            'download_format': 'unarchived',  # 确保以未压缩格式下载
        },
        filename
    )
    return filename

# 处理每一行数据的函数
def process_row(row):
    date = row['UTC']
    Facility = row['facility']
    lat = row['Latitude']
    lon = row['Longitude']
    foldername = row['foldername']
    sourcename = row['facility']

    # 创建文件夹路径
    search_file_path = os.path.join(file_path_0, foldername, Facility)
    if not os.path.exists(search_file_path):
        os.makedirs(search_file_path)
        print(f"成功创建路径: {search_file_path}")

    Files = os.listdir(search_file_path)

    # 构建文件名
    date_str = date.strftime("%Y-%m-%d")
    date_parts = date_str.split('-')
    year, month, day = date_parts
    filename = f'{Facility}_{year}{month}{day}_SL.nc'

    # 检查文件是否已存在
    if filename in Files:
        print(f"{foldername} already has nc file")
    else:
        # 下载文件
        filename = Get_Mass(date, Facility, lat, lon)

        target_folder = os.path.join(file_path_0, foldername)
        target_file_path = os.path.join(target_folder, sourcename, filename)

        # 移动文件
        source_file_path = os.path.join('./', filename)

        if os.path.exists(source_file_path):
            shutil.move(source_file_path, target_file_path)
            print(f"File successfully moved to target subdirectory: {target_folder}")
        else:
            print(f"File does not exist: {source_file_path}")

# 读取 Excel 数据
data = pd.read_excel(file_path, sheet_name=sheet_name)

# 创建线程池对象，线程数为40
with ThreadPoolExecutor(max_workers=40) as executor:
    # 提交每行数据的处理
    futures = [executor.submit(process_row, row) for _, row in data.iterrows()]

    # 等待所有任务完成
    for future in futures:
        future.result()
