# Python Code for Gene Expression Analysis # Step 1: Load required libraries import pandas as pd import numpy as np import seaborn as sns import matplotlib.pyplot as plt from scipy.stats import ttest_ind import gseapy as gp # Step 2: Load Gene Expression Data file_path = "Gene_Expression_Analysis.xlsx" df = pd.read_excel(file_path) # Step 3: Define Control and PCOS groups control_cols = [col for col in df.columns if "Control" in col] pcos_cols = [col for col in df.columns if "PCOS" in col] # Step 4: Compute Mean, Fold Change, and log2FC df["Mean_Control"] = df[control_cols].mean(axis=1) df["Mean_PCOS"] = df[pcos_cols].mean(axis=1) df["Fold_Change"] = df["Mean_PCOS"] / df["Mean_Control"] df["log2FC"] = np.log2(df["Fold_Change"]) # Step 5: Compute p-values using t-test df["p_value"] = df.apply(lambda row: ttest_ind(row[control_cols], row[pcos_cols])[1], axis=1) # Step 6: Assign Regulation Type df["Regulation"] = np.where(df["log2FC"] > 0, "Upregulated", "Downregulated") df["Significance"] = np.where(df["p_value"] < 0.05, "Significantly Regulated", "Not Significant") # Step 7: Filter Top 10 DEGs deg_df = df[df["Significance"] == "Significantly Regulated"] top_genes = deg_df.nlargest(10, "log2FC")["id"].tolist() # Step 8: Plot Expression Boxplots for Top 10 DEGs for gene in top_genes: plt.figure(figsize=(8, 5)) sns.boxplot(data=df[df["id"] == gene][control_cols + pcos_cols].melt(), x="variable", y="value") plt.title(f"Expression of {gene}") plt.xticks(rotation=90) plt.show() # Step 9: Perform Gene Set Enrichment Analysis (GSEA) gene_list = deg_df["id"].tolist() enrich_results = gp.enrichr(gene_list=gene_list, gene_sets=['KEGG_2021_Human', 'GO_Biological_Process_2021']) print(enrich_results.results) # Step 10: Save Gene List for STRING PPI Network Analysis with open("DEG_Gene_List.txt", "w") as file: for gene in gene_list: file.write(gene + "\n") print("Analysis Completed! You can now use the DEG_Gene_List.txt file for PPI analysis on STRING.")