## This source code file is part of the analysis of variable protein complexes (VariableComplexes).
## Copyright (C) 2016 Murat Iskar, Alessandro Ori
##
## This program is free software: you can redistribute it and/or modify
## it under the terms of the GNU General Public License as published by
## the Free Software Foundation, either version 3 of the License, or
## (at your option) any later version.
##
## This program is distributed in the hope that it will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
## GNU General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with this program. If not, see .
##
## Please check the publication "Spatiotemporal variation of mammalian protein complex stoichiometries", XXX
## and the supplementary website: www.bork.embl.de/variable_complexes
##
## identifying_differentially_expressed_stoichiometric_members.R
## v1
## Murat Iskar
## Alessandro Ori
## 20.1.2016
##
##
## input files:
## 11-cell-lines-limma-results-adj-P-values.txt
## complexes-hansson-11cell-ver5-ordered-all.txt
## matched-protein-complexes-11-cell-line-dataset.txt
## matched-protein-complexes-reprogramming-dataset.txt
## reprogramming-limma-results-adj-P-values.txt
##
## output files:
## variable-complexes-heatmap-with-key.pdf
## Variable-complexes-ratios-of-dynamic-subunits.txt
## venn-diagram-common-stoichiometric-members.pdf
options(max.print=200)
options(stringsAsFactors=FALSE)
library(VennDiagram)
library(gplots)
# loading the limma results for 11 cell line and the reprogramming dataset:
reprogpval=read.table("input-files/reprogramming-limma-results-adj-P-values.txt",sep="\t")
celllinespval=read.table("input-files/11-cell-lines-limma-results-adj-P-values.txt",sep="\t")
# Protein complex definitions, matched Ensembl IDs with Uniprot IDs and mouse orthologs (Ensembl protein IDS)
reprogcomps=read.table("input-files/matched-protein-complexes-reprogramming-dataset.txt",sep="\t",header=TRUE)
celllinecomps=read.table("input-files/matched-protein-complexes-11-cell-line-dataset.txt",sep="\t",header=TRUE)
# adjusted P value threshold set as: 0.05
pvalthr=0.05
# found proteins that were differentially expressed in at least one condition
reprogsig = apply(reprogpval0
celllinesig = apply(celllinespval0
# significant in both datasets:
commonset = rownames(reprogpval)[rownames(reprogpval)%in%rownames(celllinespval)]
reprogsiglist = unique(rownames(reprogpval)[reprogsig])
celllinesiglist = unique(rownames(celllinespval)[celllinesig])
commoncount = sum(unique(rownames(reprogpval)[reprogsig])%in%unique(rownames(celllinespval)[celllinesig]))
venn1 = sum(reprogsiglist%in%commonset)
venn2 = sum(celllinesiglist%in%commonset)
#exporting the venn diagram for protein members of complexes identified as differentially expressed in both datasets
pdf("output-files/venn-diagram-common-stoichiometric-members.pdf",height=4,width=6)
draw.pairwise.venn(venn1,venn2,commoncount, category = c( "11 cell line","Reprogramming"),scaled =FALSE)
dev.off()
###############################
# calculating the ratio of dynamic members in each complex:
# this combined complex definition contains only proteins that was quantified in any of the 11 cell line or the reprogramming dataset.
# this can be considered as the union of all the proteins that was considered in our analysis.
comps<-read.table("input-files/complexes-hansson-11cell-ver5-ordered-all.txt",sep="\t")
# for complex-based analysis, we first generate the unique set of complexes.
uniqcomp<-unique(comps[,1])
aver<-c()
reprogvariableaver<-c()
celllinesvariableaver<-c()
for(i in 1:length(uniqcomp))
{
#protein members for the complex were retrieved.
members<-comps[comps[,1]==uniqcomp[i],2]
#we calculate the ratio of protein members that were differentially expressed in the protein complex.
reprogvariableaver[i]<-c(sum(reprogsig[as.character(members)]>0,na.rm=TRUE)/sum(members%in%names(reprogsig)))
#we calculate the ratio of protein members that were differentially expressed in the protein complex.
celllinesvariableaver[i]<-c(sum(celllinesig[as.character(members)]>0,na.rm=TRUE)/sum(members%in%names(celllinesig)))
}
#replacing the value with NA if the protein complex (less than 5 members) was not considered for that proteomic dataset.
reprogvariableaver[!uniqcomp%in%unique(c(as.character(unique(reprogcomps[,1]))))]=NA
celllinesvariableaver[!uniqcomp%in%unique(c(as.character(unique(celllinecomps[,1]))))]=NA
# calculating the average of values from both datasets:
variableaver=apply(cbind(reprogvariableaver,celllinesvariableaver),1,function(x)mean(x,na.rm=TRUE))
#preparing the output file.
variablecomp=cbind(uniqcomp,reprogvariableaver,celllinesvariableaver,variableaver)
colnames=c("ProteinComplexes","RatioinReprogramming","Ratioin11cellline","AverageValue")
#writing the output file.
write.table(variablecomp,"output-files/Variable-complexes-ratios-of-dynamic-subunits.txt",sep="\t",col.names=FALSE,row.names=FALSE)
# generating heatmap from the average value:
heatdata<-sort(as.numeric(variableaver))
#plotted as matrix, so we plot twice to modify later in illustrator.
heatdata=rbind(heatdata,heatdata)
colnames(heatdata)=uniqcomp
#key for the heatmap plotted separately:
pdf("output-files/variable-complexes-heatmap-with-key.pdf",width=30,height=15)
heatmap.2(heatdata,dendrogram="none",Colv=NULL,Rowv=NULL,col=colorRampPalette(rev(c("red","red","Salmon","Salmon","white","grey20")))(100),breaks=seq(0,1,by=0.01), na.color="black",
tracecol=NULL,trace="column",hline=TRUE ,revC=TRUE,key=TRUE,cexCol=1,cexRow=0.15)
dev.off()
writeLines(capture.output(sessionInfo()), "sessionInfo.txt")