## This source code file is part of the analysis of variable protein complexes (VariableComplexes). ## Copyright (C) 2016 Murat Iskar, Alessandro Ori ## ## This program is free software: you can redistribute it and/or modify ## it under the terms of the GNU General Public License as published by ## the Free Software Foundation, either version 3 of the License, or ## (at your option) any later version. ## ## This program is distributed in the hope that it will be useful, ## but WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ## GNU General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with this program. If not, see . ## ## Please check the publication "Spatiotemporal variation of mammalian protein complex stoichiometries", XXX ## and the supplementary website: www.bork.embl.de/variable_complexes ## ## identifying_differentially_expressed_stoichiometric_members.R ## v1 ## Murat Iskar ## Alessandro Ori ## 20.1.2016 ## ## ## input files: ## 11-cell-lines-limma-results-adj-P-values.txt ## complexes-hansson-11cell-ver5-ordered-all.txt ## matched-protein-complexes-11-cell-line-dataset.txt ## matched-protein-complexes-reprogramming-dataset.txt ## reprogramming-limma-results-adj-P-values.txt ## ## output files: ## variable-complexes-heatmap-with-key.pdf ## Variable-complexes-ratios-of-dynamic-subunits.txt ## venn-diagram-common-stoichiometric-members.pdf options(max.print=200) options(stringsAsFactors=FALSE) library(VennDiagram) library(gplots) # loading the limma results for 11 cell line and the reprogramming dataset: reprogpval=read.table("input-files/reprogramming-limma-results-adj-P-values.txt",sep="\t") celllinespval=read.table("input-files/11-cell-lines-limma-results-adj-P-values.txt",sep="\t") # Protein complex definitions, matched Ensembl IDs with Uniprot IDs and mouse orthologs (Ensembl protein IDS) reprogcomps=read.table("input-files/matched-protein-complexes-reprogramming-dataset.txt",sep="\t",header=TRUE) celllinecomps=read.table("input-files/matched-protein-complexes-11-cell-line-dataset.txt",sep="\t",header=TRUE) # adjusted P value threshold set as: 0.05 pvalthr=0.05 # found proteins that were differentially expressed in at least one condition reprogsig = apply(reprogpval0 celllinesig = apply(celllinespval0 # significant in both datasets: commonset = rownames(reprogpval)[rownames(reprogpval)%in%rownames(celllinespval)] reprogsiglist = unique(rownames(reprogpval)[reprogsig]) celllinesiglist = unique(rownames(celllinespval)[celllinesig]) commoncount = sum(unique(rownames(reprogpval)[reprogsig])%in%unique(rownames(celllinespval)[celllinesig])) venn1 = sum(reprogsiglist%in%commonset) venn2 = sum(celllinesiglist%in%commonset) #exporting the venn diagram for protein members of complexes identified as differentially expressed in both datasets pdf("output-files/venn-diagram-common-stoichiometric-members.pdf",height=4,width=6) draw.pairwise.venn(venn1,venn2,commoncount, category = c( "11 cell line","Reprogramming"),scaled =FALSE) dev.off() ############################### # calculating the ratio of dynamic members in each complex: # this combined complex definition contains only proteins that was quantified in any of the 11 cell line or the reprogramming dataset. # this can be considered as the union of all the proteins that was considered in our analysis. comps<-read.table("input-files/complexes-hansson-11cell-ver5-ordered-all.txt",sep="\t") # for complex-based analysis, we first generate the unique set of complexes. uniqcomp<-unique(comps[,1]) aver<-c() reprogvariableaver<-c() celllinesvariableaver<-c() for(i in 1:length(uniqcomp)) { #protein members for the complex were retrieved. members<-comps[comps[,1]==uniqcomp[i],2] #we calculate the ratio of protein members that were differentially expressed in the protein complex. reprogvariableaver[i]<-c(sum(reprogsig[as.character(members)]>0,na.rm=TRUE)/sum(members%in%names(reprogsig))) #we calculate the ratio of protein members that were differentially expressed in the protein complex. celllinesvariableaver[i]<-c(sum(celllinesig[as.character(members)]>0,na.rm=TRUE)/sum(members%in%names(celllinesig))) } #replacing the value with NA if the protein complex (less than 5 members) was not considered for that proteomic dataset. reprogvariableaver[!uniqcomp%in%unique(c(as.character(unique(reprogcomps[,1]))))]=NA celllinesvariableaver[!uniqcomp%in%unique(c(as.character(unique(celllinecomps[,1]))))]=NA # calculating the average of values from both datasets: variableaver=apply(cbind(reprogvariableaver,celllinesvariableaver),1,function(x)mean(x,na.rm=TRUE)) #preparing the output file. variablecomp=cbind(uniqcomp,reprogvariableaver,celllinesvariableaver,variableaver) colnames=c("ProteinComplexes","RatioinReprogramming","Ratioin11cellline","AverageValue") #writing the output file. write.table(variablecomp,"output-files/Variable-complexes-ratios-of-dynamic-subunits.txt",sep="\t",col.names=FALSE,row.names=FALSE) # generating heatmap from the average value: heatdata<-sort(as.numeric(variableaver)) #plotted as matrix, so we plot twice to modify later in illustrator. heatdata=rbind(heatdata,heatdata) colnames(heatdata)=uniqcomp #key for the heatmap plotted separately: pdf("output-files/variable-complexes-heatmap-with-key.pdf",width=30,height=15) heatmap.2(heatdata,dendrogram="none",Colv=NULL,Rowv=NULL,col=colorRampPalette(rev(c("red","red","Salmon","Salmon","white","grey20")))(100),breaks=seq(0,1,by=0.01), na.color="black", tracecol=NULL,trace="column",hline=TRUE ,revC=TRUE,key=TRUE,cexCol=1,cexRow=0.15) dev.off() writeLines(capture.output(sessionInfo()), "sessionInfo.txt")