If we look at the first PCs, all the biological differences appear very strong so technical artifacts are likely to have little influence. When I perform further normalization I see no drastic change in the PCA plots .
pca = prcomp(log2(t(data)),scale = TRUE)
summary(pca)
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6
## Standard deviation 22.1978 14.3094 12.9749 11.04819 9.70028 8.61227
## Proportion of Variance 0.3705 0.1540 0.1266 0.09178 0.07075 0.05577
## Cumulative Proportion 0.3705 0.5244 0.6510 0.74279 0.81354 0.86931
## PC7 PC8 PC9 PC10 PC11 PC12
## Standard deviation 7.14468 6.70831 5.76817 5.05142 4.35730 1.543e-13
## Proportion of Variance 0.03838 0.03384 0.02502 0.01919 0.01428 0.000e+00
## Cumulative Proportion 0.90769 0.94152 0.96654 0.98572 1.00000 1.000e+00
prop = signif(summary(pca)$importance['Proportion of Variance',],2)
df = data.frame(pca$x, design[colnames(data),])
p1 = ggplot(df, aes(x = PC1, y = PC2, colour = Group)) + geom_point(size = 3) + theme_classic() + xlab(paste('PC1 (',prop['PC1'],'%)',sep = ''))+ ylab(paste('PC2 (',prop['PC2'],'%)',sep = ''))
p2 = ggplot(df, aes(x = PC3, y = PC4, colour = Group)) + geom_point(size = 3) + theme_classic() + xlab(paste('PC3 (',prop['PC3'],'%)',sep = ''))+ ylab(paste('PC4 (',prop['PC4'],'%)',sep = ''))
library(gridExtra)
grid.arrange(p1,p2)
library(org.Hs.eg.db)
#Read in pathways
keys = unlist(lapply(strsplit(rownames(data),'\\|'),function(x)x[2]))
names(keys) = rownames(data)
keysR = rownames(data)
names(keysR) = keys
mapKeys = mapIds(org.Hs.eg.db, keys = keys,column = 'ENTREZID', keytype = "UNIPROT",multiVals="CharacterList")
mapKeysRev = unlist(reverseSplit(mapKeys))
#map = unlist(lapply(mapKeys,function(x)x[1]))
#mapR = names(map)
#names(mapR) = map
Uniprot2prot = unlist(lapply(strsplit(rownames(data),'\\|'),function(x)x[3]))
Uniprot2prot = unlist(lapply(strsplit(Uniprot2prot,'_'),function(x)x[1]))
names(Uniprot2prot) = rownames(data)
library(org.Hs.eg.db)
file = read.delim('homo-sapiens-9606.txt',sep = '$',col.names=FALSE)
file = apply(file,1,function(x)unlist(strsplit(x,'\t')))
def = unlist(lapply(file,function(x)x[2]))
names(def) = unlist(lapply(file,function(x)x[1]))
names(file) = unlist(lapply(file,function(x)x[1]))
file = lapply(file,function(x)unlist(strsplit(unlist(strsplit(x,':')),',')))
#file = lapply(file,intersect,keys)
#PC2symbol = lapply(file,function(x)keysR[x])
u = unique(keys(org.Hs.eg.db,"UNIPROT"))
keysRU = keysR[u]
names(keysRU) = u
keysRU[is.na(keysRU)] = 1:sum(is.na(keysRU))
file = lapply(file,intersect,keys(org.Hs.eg.db,"UNIPROT"))
PC2symbol = lapply(file,function(x)keysRU[x])
PClen = unlist(lapply(PC2symbol,length))
vec = pca$rot[,1]
wtest.1 = unlist(lapply(PC2symbol[PClen>5&PClen<5000],function(x)wilcoxGST(names(vec)%in%x,vec,alternative = 'mixed')))
vec = pca$rot[,2]
wtest.2 = unlist(lapply(PC2symbol[PClen>5&PClen<5000],function(x)wilcoxGST(names(vec)%in%x,vec,alternative = 'mixed')))
#plot(-log10(wtest.1),-log10(wtest.2))
u = names(wtest.1)
OR = data.frame(Pathway = u,Pvalue.PC1 = signif(wtest.1[u],2), AdjustedPvalue.PC1 = signif(p.adjust(wtest.1[u],'BH'),2),Pvalue.PC2 = signif(wtest.2[u],2), AdjustedPvalue.PC2 = signif(p.adjust(wtest.2[u],'BH'),2), NumberInPathway = PClen[u])
OR = OR[order(OR$Pvalue.PC1),]
We find 33 Pathway Commons pathways are significant changed for PC1 and 5 changed for PC2.
DT::datatable(OR, escape = FALSE,
extensions = 'Buttons',
options = list(
paging = TRUE,
searching = TRUE,
fixedColumns = TRUE,
autoWidth = TRUE,
ordering = TRUE,
dom = 'Bfrtip',
buttons = c('copy', 'csv', 'excel')
)
)