## This source code file is part of the analysis of variable protein complexes (VariableComplexes).
## Copyright (C) 2016 Murat Iskar, Alessandro Ori
##
## This program is free software: you can redistribute it and/or modify
## it under the terms of the GNU General Public License as published by
## the Free Software Foundation, either version 3 of the License, or
## (at your option) any later version.
##
## This program is distributed in the hope that it will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
## GNU General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with this program. If not, see .
##
## Please check the publication "Spatiotemporal variation of mammalian protein complex stoichiometries", XXX
## and the supplementary website: www.bork.embl.de/variable_complexes
##
## complex_definitions_filtering_redundancy.R
## v1
## murat iskar
## 20.1.2016
##
##
## input files:
## Complexes-manualannotation-COMPLEAT-CORUM-redundant.txt
##
## output files:
## Protein-Complexes-integrated-filtered.txt
options(max.print=200)
options(stringsAsFactors=FALSE)
library(stringr)
# we read the protein complexes table including manual annotations and
compdef= read.table("input-files/Complexes-manualannotation-COMPLEAT-CORUM-redundant.txt",sep="\t",header=TRUE,quote="",row.names=NULL, comment.char="")
# we retain complexes that have 5 or more members.
compdef<-compdef[as.numeric(compdef[,2])>=5,]
# we manually removed the case of HC1350 complex and manually annotated within the set itself.
compdef=compdef[-1*which(compdef[,1]=="HC1350"),]
i=1
compdeffilt=compdef
removedcomp=c()
while(i=0.5))>0)
{
rem=-1*(i+which(l>=0.5))
# we dont remove the manually annotated complexes.
rem=rem[rem<(-63)]
if(sum(compdeffilt[rem*(-1),1]%in%"HC3525")>0)
{
print(c(i,compdeffilt[i,1]))
}
compdeffilt<-compdeffilt[rem,]
}
print(c(i,sum(l>=0.5),length(compdeffilt[,1])))
i=i+1
}
#creating an internal id for complex definitions:
ids=c("Complex ID",paste("embl_complex_",str_pad(1:length(compdeffilt[,1]), 3, pad = "0"),sep=""))
#adding header information:
header=read.table("input-files/Complexes-manualannotation-COMPLEAT-CORUM-redundant.txt",sep="\t",header=FALSE,quote="",row.names=NULL, comment.char="",nrows=1)
names(header)=names(compdeffilt)
compdeffilt=rbind(header,compdeffilt)
compdeffilt=cbind(ids,compdeffilt)
write.table(compdeffilt,"output-files/Protein-Complexes-integrated-filtered.txt",sep="\t",col.names=FALSE,row.names=FALSE,quote=FALSE)
writeLines(capture.output(sessionInfo()), "sessionInfo.txt")