## This source code file is part of the analysis of variable protein complexes (VariableComplexes).
## Copyright (C) 2016 Murat Iskar, Alessandro Ori
##
## This program is free software: you can redistribute it and/or modify
## it under the terms of the GNU General Public License as published by
## the Free Software Foundation, either version 3 of the License, or
## (at your option) any later version.
##
## This program is distributed in the hope that it will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
## GNU General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with this program. If not, see .
##
## Please check the publication "Spatiotemporal variation of mammalian protein complex stoichiometries", XXX
## and the supplementary website: www.bork.embl.de/variable_complexes
##
## higher-co-expression.R
## v1
## Murat Iskar
## Alessandro Ori
## 20.1.2016
##
##
## input files:
## 11-cell-line-dataset-only-Protein-complexes.txt
## matched-protein-complexes-11-cell-line-dataset.txt
##
## output files:
## complex-wise-correlation.pdf
## Higher-correlation-11-cell-only-complex.pdf
##
options(max.print=200)
options(stringsAsFactors=FALSE)
library(ggplot2)
library(corrplot)
#input files:
dataset = read.table("input-files/11-cell-line-dataset-only-Protein-complexes.txt",sep="\t")
comps = read.table("input-files/matched-protein-complexes-11-cell-line-dataset.txt",header=TRUE,sep="\t")
# calculating pearson correlation between all the proteins considered as protein members
compcor = cor(t(dataset),method="pearson",use="pairwise.complete.obs")
# a matrix same size as the correlation matrix
samecomplexmat = compcor
# background value was set to zero, indicating as protein pair are not members of the same complex.
samecomplexmat[] = "different comp."
# Protein pairs from the same protein complex were labeled as positive in the samecomplexmat.
uniqcomp = unique(comps[,1])
for(i in 1:length(uniqcomp))
{
#retrieve all members of a protein complex
g = comps[comps[,1]%in%uniqcomp[i],3]
g = g[g%in%rownames(samecomplexmat)]
samecomplexmat[as.character(g),as.character(g)] = "same comp.";
}
# we exclude the diagonal of the matrix from the plot.
diag(compcor) = NA
# preparing the data frame.
df<-data.frame(correlationvalue = c(compcor),fromsamecomplex = as.factor(c(samecomplexmat)))
# plot the data using ggplot.
ggplot(df, aes(x=correlationvalue, fill=fromsamecomplex))+ylab("Density")+xlab("Pearson correlation") + geom_density(alpha=.3)+ theme_bw() +scale_x_continuous(limits=c(-1, 1), breaks=c(-1,-0.5,0,0.5,1))
ggsave(filename="output-files/Higher-correlation-11-cell-only-complex.pdf",height=5,width=8)
#########################################################################
# calculating average co-expression between complexes:
compcor = cor(t(dataset),method="pearson",use="pairwise.complete.obs")
diag(compcor) = NA
uniqcomp = unique(comps[,1])
complexescor = mat.or.vec(nr=length(uniqcomp),nc=length(uniqcomp))
# we perform pairwise comparison of complexes and check the median correlation between their protein members.
for(i in 1:length(uniqcomp))
{
for(k in 1:length(uniqcomp))
{
# if the number of quantified members were not enough for both complexes, the summary correlation value was set to NA.
summarycor = NA
# protein members of the query protein complex (i)
l1 = comps[comps[,1]%in%uniqcomp[i],3];
l1 = l1[l1%in%rownames(samecomplexmat)]
# protein members of the subject protein complex (k)
l2 = comps[comps[,1]%in%uniqcomp[k],3];
l2 = l2[l2%in%rownames(samecomplexmat)]
if(length(l1) > 0 && length(l2) > 0)
{
# we calculate the median Pearson correlation between all the protein pairs of two complexes.
# we expect the correlation matrix to be symmetrical, so we retrieve the same values twice.
summarycor = median(c(median(compcor[l1,l2],na.rm=TRUE),median(compcor[l2,l1],na.rm=TRUE)))
}
complexescor[i,k] = summarycor
}
}
# row and column names were updated accordingly.
colnames(complexescor) = uniqcomp
rownames(complexescor) = uniqcomp
# clustering of the complexes using hierarchical clustering.
h = hclust(as.dist((1-complexescor)),method="average")
complexescor = complexescor[h$order,h$order]
# The correlation matrix was visualized using the scale from -0.6 to 0.6. That is why, maximum value was set to 0.6
complexescor[complexescor>0.6] = 0.6
complexescor[complexescor<(-0.6)] = (-0.6)
# generating the heatmap of the correlation matrix:
pdf("output-files/complex-wise-correlation.pdf",width=40,height=40)
corrplot(complexescor, order="original", is.corr = FALSE, cl.lim=c(-0.6,0.6),method="square")
dev.off()
writeLines(capture.output(sessionInfo()), "sessionInfo.txt")