# Python Code for Gene Expression Analysis

# Step 1: Load required libraries
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from scipy.stats import ttest_ind
import gseapy as gp

# Step 2: Load Gene Expression Data
file_path = "Gene_Expression_Analysis.xlsx"
df = pd.read_excel(file_path)

# Step 3: Define Control and PCOS groups
control_cols = [col for col in df.columns if "Control" in col]
pcos_cols = [col for col in df.columns if "PCOS" in col]

# Step 4: Compute Mean, Fold Change, and log2FC
df["Mean_Control"] = df[control_cols].mean(axis=1)
df["Mean_PCOS"] = df[pcos_cols].mean(axis=1)
df["Fold_Change"] = df["Mean_PCOS"] / df["Mean_Control"]
df["log2FC"] = np.log2(df["Fold_Change"])

# Step 5: Compute p-values using t-test
df["p_value"] = df.apply(lambda row: ttest_ind(row[control_cols], row[pcos_cols])[1], axis=1)

# Step 6: Assign Regulation Type
df["Regulation"] = np.where(df["log2FC"] > 0, "Upregulated", "Downregulated")
df["Significance"] = np.where(df["p_value"] < 0.05, "Significantly Regulated", "Not Significant")

# Step 7: Filter Top 10 DEGs
deg_df = df[df["Significance"] == "Significantly Regulated"]
top_genes = deg_df.nlargest(10, "log2FC")["id"].tolist()

# Step 8: Plot Expression Boxplots for Top 10 DEGs
for gene in top_genes:
    plt.figure(figsize=(8, 5))
    sns.boxplot(data=df[df["id"] == gene][control_cols + pcos_cols].melt(), x="variable", y="value")
    plt.title(f"Expression of {gene}")
    plt.xticks(rotation=90)
    plt.show()

# Step 9: Perform Gene Set Enrichment Analysis (GSEA)
gene_list = deg_df["id"].tolist()
enrich_results = gp.enrichr(gene_list=gene_list, gene_sets=['KEGG_2021_Human', 'GO_Biological_Process_2021'])
print(enrich_results.results)

# Step 10: Save Gene List for STRING PPI Network Analysis
with open("DEG_Gene_List.txt", "w") as file:
    for gene in gene_list:
        file.write(gene + "\n")

print("Analysis Completed! You can now use the DEG_Gene_List.txt file for PPI analysis on STRING.")