STEP 15: Python Code

									
class step15_preparation:
	@staticmethod
	def execute(**kwargs):
		folder = kwargs.get('folder','PATH')
		output_folder = kwargs.get('output_folder','PATH')

		print('get_complex_underlying_data')
		step15 = step15_preparation.get_complex_underlying_data(folder = folder, output_folder = output_folder)

	@staticmethod
	def load_multivariate_subunit_specific_data(dataset, module_type, datatype, covariate, **kwargs):
		folder = kwargs.get('folder','PATH')

		joint_name = '_'.join([dataset, module_type, datatype, covariate])
		filename = joint_name + '_data_all_multi_covariate_subunitSpecific_empiricalFDR.tsv.gz'
		data = DataFrameAnalyzer.open_in_chunks(folder, filename)
		return data

	@staticmethod
	def get_complex_underlying_data(**kwargs):
		folder = kwargs.get('folder','PATH')
		output_folder = kwargs.get('output_folder','PATH')

		module = 'complex'
		sex_data = step15_preparation.load_multivariate_subunit_specific_data('gygi3',
				   module, 'quant', 'sex', folder = folder)
		sex_stochdata = step15_preparation.load_multivariate_subunit_specific_data('gygi3',
						module, 'stoichiometry', 'sex', folder = folder)
		diet_data = step15_preparation.load_multivariate_subunit_specific_data('gygi3',
				   module, 'quant', 'diet', folder = folder)
		diet_stochdata = step15_preparation.load_multivariate_subunit_specific_data('gygi3',
						module, 'stoichiometry', 'diet', folder = folder)

		goldComplexes = list()
		for complexID in complexDict.keys():
			gold = complexDict[complexID]['goldComplex'][0]
			altName = complexDict[complexID]['altName'][0]
			if gold=='yes':
				goldComplexes.append(complexID + ':' + altName)
		s = sex_data[sex_data['complex.name'].str.contains('tRNA splicing')]
		sex_data = sex_data[sex_data['n.subunits']>=5]
		sex_data = pd.concat([sex_data,s])

		sex_data.index = sex_data['complex.name']
		sex_stochdata.index = sex_stochdata['complex.name']
		diet_data.index = diet_data['complex.name']
		diet_stochdata.index = diet_stochdata['complex.name']

		sex_data = sex_data[['r2.all.module','empirical.FDR.module']].drop_duplicates()
		sex_stochdata = sex_stochdata[['r2.all.module','empirical.FDR.module']].drop_duplicates()
		diet_data = diet_data[['r2.all.module','empirical.FDR.module']].drop_duplicates()
		diet_stochdata = diet_stochdata[['r2.all.module','empirical.FDR.module']].drop_duplicates()

		sex_dict = sex_data['r2.all.module'].to_dict()
		sex_stochdict = sex_stochdata['r2.all.module'].to_dict()
		diet_dict = diet_data['r2.all.module'].to_dict()
		diet_stochdict = diet_stochdata['r2.all.module'].to_dict()

		sex_pval_dict = sex_data['empirical.FDR.module'].to_dict()
		sex_pval_stochdict = sex_stochdata['empirical.FDR.module'].to_dict()
		diet_pval_dict = diet_data['empirical.FDR.module'].to_dict()
		diet_pval_stochdict = diet_stochdata['empirical.FDR.module'].to_dict()

		df_list = list()
		key_list = list()
		pval_df_list = list()
		for key in sex_dict:
			sex_effect = sex_dict[key]
			sex_pval = sex_pval_dict[key]
			try:
				diet_effect = diet_dict[key]
				diet_pval = diet_pval_dict[key]
			except:
				diet_effect = np.nan
				diet_pval = np.nan
			try:
				sex_stoch_effect = sex_stochdict[key]
				sex_stoch_pval = sex_pval_stochdict[key]
			except:
				sex_stoch_effect = np.nan
				sex_stoch_pval = np.nan
			try:
				diet_stoch_effect = diet_stochdict[key]
				diet_stoch_pval = diet_pval_stochdict[key]
			except:
				diet_stoch_effect = np.nan
				diet_stoch_pval = np.nan
			temp = [sex_effect, sex_stoch_effect, diet_effect, diet_stoch_effect]
			temp_pval = [sex_pval, sex_stoch_pval, diet_pval, diet_stoch_pval]
			df_list.append(temp)
			pval_df_list.append(temp_pval)
			key_list.append(key)
		df = pd.DataFrame(df_list)
		df.index = key_list
		df.columns = ['sex_quant','sex_stoch','diet_quant','diet_stoch']

		pval_df = pd.DataFrame(pval_df_list)
		pval_df.index = key_list
		pval_df.columns = ['sex_quant','sex_stoch','diet_quant','diet_stoch']

		df.to_csv(output_folder + 'suppFigure4a_underlyingData_gygi3_complex_effectSizeMatrix.tsv', sep = '\t')
		pval_df.to_csv(output_folder + 'suppFigure4a_underlyingData_gygi3_complex_pvalMatrix.tsv', sep = '\t')
		return df, pval_df

class step15_figure:
	@staticmethod
	def execute(**kwargs):
		folder = kwargs.get('folder','PATH')
		output_folder = kwargs.get('output_folder','PATH')

		print('FIGURE6B: plot_effectSize_complexDistribution')
		step15_figure.plot_effectSize_complexDistribution(folder = folder)

	@staticmethod
	def get_data(**kwargs):
		folder = kwargs.get('folder','PATH')

		mouse_df = DataFrameAnalyzer.open_in_chunks(folder,'suppFigure4a_underlyingData_gygi3_complex_effectSizeMatrix.tsv.gz')
		mouse_pval_df = DataFrameAnalyzer.open_in_chunks(folder,'suppFigure4a_underlyingData_gygi3_complex_pvalMatrix.tsv.gz')
		return {'mouse': (mouse_df, mouse_pval_df)}

	@staticmethod
	def plot_effectSize_complexDistribution(**kwargs):
		folder = kwargs.get('folder','PATH')

		data_dict = step15_figure.get_data()
		mouse_df, mouse_pval_df = data_dict['mouse']
		human_df, human_pval_df = data_dict['human']

		mouse_df.columns = ['mouse_'+item for item in list(mouse_df.columns)]
		mouse_pval_df.columns = ['mouse_'+item for item in list(mouse_pval_df.columns)]

		mouse_df = mouse_df.replace(np.nan,-100)
		mdf, proteinList = utilsFacade.recluster_matrix_only_rows(mouse_df)
		mdf = mdf.replace(-100, np.nan)
		mdf = mdf.T

		ranked_sorted_list = list()
		for i,row in mdf.iterrows():
			temp = list()
			for item in list(row):
				if str(item)!='nan':
					temp.append(item)
			ranked_temp = rankdata(temp)
			rank_dict = dict()
			for t,r in zip(temp, ranked_temp):
				rank_dict[t] = r
			final_temp = list()
			for t in list(row):
				if str(t)!='nan':
					final_temp.append(rank_dict[t])
				else:
					final_temp.append(np.nan)
			ranked_sorted_list.append(final_temp)
		ranked_df = pd.DataFrame(ranked_sorted_list)
		ranked_df.index = mdf.index
		ranked_df.columns = mdf.columns

		sex_quant_list = [item*100 for item in list(mdf.T['mouse_sex_quant'])]
		sex_stoch_list = [item*100 for item in list(mdf.T['mouse_sex_stoch'])]
		sex_sum_list =  np.array(sex_quant_list) + np.array(sex_stoch_list)
		diet_quant_list = [item*100 for item in list(mdf.T['mouse_diet_quant'])]
		diet_stoch_list = [item*100 for item in list(mdf.T['mouse_diet_stoch'])]
		key_list = list(mdf.columns)
		lists = [sex_sum_list, sex_quant_list, sex_stoch_list, diet_quant_list, diet_stoch_list, key_list]
		sorted_lists = utilsFacade.sort_multiple_lists(lists, reverse = True)
		sex_sum_list, sex_quant_list, sex_stoch_list, diet_quant_list, diet_stoch_list, key_list = sorted_lists
		ranked_df = ranked_df[key_list]


		sns.set(context='notebook', style='white', 
			palette='deep', font='Liberation Sans', font_scale=1, 
			color_codes=False, rc=None)
		plt.rcParams["axes.grid"] = False

		plt.clf()
		fig = plt.figure(figsize = (17,10))
		gs = gridspec.GridSpec(10,32)
		ax = plt.subplot(gs[0:4,0:])
		ax.axhline(10,color = 'k', linestyle = '--')
		ax.axhline(20,color = 'k', linestyle = '--')
		ax.axhline(30,color = 'k', linestyle = '--')
		ax.axhline(40,color = 'k', linestyle = '--')
		ax.axhline(50,color = 'k', linestyle = '--')
		ind = list(xrange(len(sex_quant_list)))
		width = 1
		rects = ax.bar(ind, sex_quant_list, width, color='lightblue', edgecolor = 'white')
		rects = ax.bar(ind, sex_stoch_list, width, color='darkblue',
					   edgecolor = 'white', bottom = np.array(sex_quant_list))

		ax.set_xlim(-0.5,len(sex_quant_list)+0.5)
		ax.set_xticklabels([])

		ax = plt.subplot(gs[4:8,0:])
		ax.set_ylim(-60,0)
		ax.axhline(-10,color = 'k', linestyle = '--')
		ax.axhline(-20,color = 'k', linestyle = '--')
		ax.axhline(-30,color = 'k', linestyle = '--')
		ax.axhline(-40,color = 'k', linestyle = '--')
		ax.axhline(-50,color = 'k', linestyle = '--')		
		ind = list(xrange(len(sex_quant_list)))
		width = 1
		rects = ax.bar(ind, (-1)*np.array(diet_quant_list), width, color='lightgreen', edgecolor = 'white')
		rects = ax.bar(ind, (-1)*np.array(diet_stoch_list), width, color='darkgreen',
					   edgecolor = 'white', bottom = (-1)*np.array(diet_quant_list))
		ax.set_xlim(-0.5,len(diet_quant_list)+0.5)
		plt.xticks(list(utilsFacade.frange(0.5,len(ranked_df.columns)+0.5,1)))
		ax.set_xticklabels([':'.join(item.split(':')[1:]) for item in list(ranked_df.columns)],
						   rotation = 90, fontsize = 5)
		plt.savefig(folder + 'fig6b_complex_effectSize_Distribution.pdf',
					bbox_inches = 'tight', dpi = 400)

if __name__ == "__main__":
	## EXECUTE STEP15
	step15_preparation.execute(folder = sys.argv[1], output_folder = sys.argv[2])
	step15_figure.execute(folder = sys.argv[1], output_folder = sys.argv[2])
									
								

All scripts were developed by Natalie Romanov (Bork group, EMBL). The source code used in the analysis of protein complex variability across individuals is released under the GNU General Public License v3.0. All scripts on this website/web resource is Copyright (C) 2019 Natalie Romanov, Michael Kuhn, Ruedi Aebersold, Alessandro Ori, Martin Beck, Peer Bork and EMBL.

GNU LICENSE

Download script here