## This source code file is part of the analysis of variable protein complexes (VariableComplexes). ## Copyright (C) 2016 Murat Iskar, Alessandro Ori ## ## This program is free software: you can redistribute it and/or modify ## it under the terms of the GNU General Public License as published by ## the Free Software Foundation, either version 3 of the License, or ## (at your option) any later version. ## ## This program is distributed in the hope that it will be useful, ## but WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ## GNU General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with this program. If not, see . ## ## Please check the publication "Spatiotemporal variation of mammalian protein complex stoichiometries", XXX ## and the supplementary website: www.bork.embl.de/variable_complexes ## ## higher-co-expression.R ## v1 ## Murat Iskar ## Alessandro Ori ## 20.1.2016 ## ## ## input files: ## 11-cell-line-dataset-only-Protein-complexes.txt ## matched-protein-complexes-11-cell-line-dataset.txt ## ## output files: ## complex-wise-correlation.pdf ## Higher-correlation-11-cell-only-complex.pdf ## options(max.print=200) options(stringsAsFactors=FALSE) library(ggplot2) library(corrplot) #input files: dataset = read.table("input-files/11-cell-line-dataset-only-Protein-complexes.txt",sep="\t") comps = read.table("input-files/matched-protein-complexes-11-cell-line-dataset.txt",header=TRUE,sep="\t") # calculating pearson correlation between all the proteins considered as protein members compcor = cor(t(dataset),method="pearson",use="pairwise.complete.obs") # a matrix same size as the correlation matrix samecomplexmat = compcor # background value was set to zero, indicating as protein pair are not members of the same complex. samecomplexmat[] = "different comp." # Protein pairs from the same protein complex were labeled as positive in the samecomplexmat. uniqcomp = unique(comps[,1]) for(i in 1:length(uniqcomp)) { #retrieve all members of a protein complex g = comps[comps[,1]%in%uniqcomp[i],3] g = g[g%in%rownames(samecomplexmat)] samecomplexmat[as.character(g),as.character(g)] = "same comp."; } # we exclude the diagonal of the matrix from the plot. diag(compcor) = NA # preparing the data frame. df<-data.frame(correlationvalue = c(compcor),fromsamecomplex = as.factor(c(samecomplexmat))) # plot the data using ggplot. ggplot(df, aes(x=correlationvalue, fill=fromsamecomplex))+ylab("Density")+xlab("Pearson correlation") + geom_density(alpha=.3)+ theme_bw() +scale_x_continuous(limits=c(-1, 1), breaks=c(-1,-0.5,0,0.5,1)) ggsave(filename="output-files/Higher-correlation-11-cell-only-complex.pdf",height=5,width=8) ######################################################################### # calculating average co-expression between complexes: compcor = cor(t(dataset),method="pearson",use="pairwise.complete.obs") diag(compcor) = NA uniqcomp = unique(comps[,1]) complexescor = mat.or.vec(nr=length(uniqcomp),nc=length(uniqcomp)) # we perform pairwise comparison of complexes and check the median correlation between their protein members. for(i in 1:length(uniqcomp)) { for(k in 1:length(uniqcomp)) { # if the number of quantified members were not enough for both complexes, the summary correlation value was set to NA. summarycor = NA # protein members of the query protein complex (i) l1 = comps[comps[,1]%in%uniqcomp[i],3]; l1 = l1[l1%in%rownames(samecomplexmat)] # protein members of the subject protein complex (k) l2 = comps[comps[,1]%in%uniqcomp[k],3]; l2 = l2[l2%in%rownames(samecomplexmat)] if(length(l1) > 0 && length(l2) > 0) { # we calculate the median Pearson correlation between all the protein pairs of two complexes. # we expect the correlation matrix to be symmetrical, so we retrieve the same values twice. summarycor = median(c(median(compcor[l1,l2],na.rm=TRUE),median(compcor[l2,l1],na.rm=TRUE))) } complexescor[i,k] = summarycor } } # row and column names were updated accordingly. colnames(complexescor) = uniqcomp rownames(complexescor) = uniqcomp # clustering of the complexes using hierarchical clustering. h = hclust(as.dist((1-complexescor)),method="average") complexescor = complexescor[h$order,h$order] # The correlation matrix was visualized using the scale from -0.6 to 0.6. That is why, maximum value was set to 0.6 complexescor[complexescor>0.6] = 0.6 complexescor[complexescor<(-0.6)] = (-0.6) # generating the heatmap of the correlation matrix: pdf("output-files/complex-wise-correlation.pdf",width=40,height=40) corrplot(complexescor, order="original", is.corr = FALSE, cl.lim=c(-0.6,0.6),method="square") dev.off() writeLines(capture.output(sessionInfo()), "sessionInfo.txt")