## This source code file is part of the analysis of variable protein complexes (VariableComplexes). ## Copyright (C) 2016 Murat Iskar, Alessandro Ori ## ## This program is free software: you can redistribute it and/or modify ## it under the terms of the GNU General Public License as published by ## the Free Software Foundation, either version 3 of the License, or ## (at your option) any later version. ## ## This program is distributed in the hope that it will be useful, ## but WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ## GNU General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with this program. If not, see . ## ## Please check the publication "Spatiotemporal variation of mammalian protein complex stoichiometries", XXX ## and the supplementary website: www.bork.embl.de/variable_complexes ## ## complex_definitions_filtering_redundancy.R ## v1 ## murat iskar ## 20.1.2016 ## ## ## input files: ## Complexes-manualannotation-COMPLEAT-CORUM-redundant.txt ## ## output files: ## Protein-Complexes-integrated-filtered.txt options(max.print=200) options(stringsAsFactors=FALSE) library(stringr) # we read the protein complexes table including manual annotations and compdef= read.table("input-files/Complexes-manualannotation-COMPLEAT-CORUM-redundant.txt",sep="\t",header=TRUE,quote="",row.names=NULL, comment.char="") # we retain complexes that have 5 or more members. compdef<-compdef[as.numeric(compdef[,2])>=5,] # we manually removed the case of HC1350 complex and manually annotated within the set itself. compdef=compdef[-1*which(compdef[,1]=="HC1350"),] i=1 compdeffilt=compdef removedcomp=c() while(i=0.5))>0) { rem=-1*(i+which(l>=0.5)) # we dont remove the manually annotated complexes. rem=rem[rem<(-63)] if(sum(compdeffilt[rem*(-1),1]%in%"HC3525")>0) { print(c(i,compdeffilt[i,1])) } compdeffilt<-compdeffilt[rem,] } print(c(i,sum(l>=0.5),length(compdeffilt[,1]))) i=i+1 } #creating an internal id for complex definitions: ids=c("Complex ID",paste("embl_complex_",str_pad(1:length(compdeffilt[,1]), 3, pad = "0"),sep="")) #adding header information: header=read.table("input-files/Complexes-manualannotation-COMPLEAT-CORUM-redundant.txt",sep="\t",header=FALSE,quote="",row.names=NULL, comment.char="",nrows=1) names(header)=names(compdeffilt) compdeffilt=rbind(header,compdeffilt) compdeffilt=cbind(ids,compdeffilt) write.table(compdeffilt,"output-files/Protein-Complexes-integrated-filtered.txt",sep="\t",col.names=FALSE,row.names=FALSE,quote=FALSE) writeLines(capture.output(sessionInfo()), "sessionInfo.txt")