Impact on Proteotype: Figure5

FIGURE 5: Python Code

									
class figure5a:
	
	@staticmethod
	def execute(**kwargs):
		folder = kwargs.get('folder','PATH')
		output_folder = kwargs.get('output_folder','PATH')

		print('FIGURE5A: figure5a_complex_abundanceChange_demonstration')
		figure5a.figure5a_complex_abundanceChange_demonstration()

		print('FIGURE5A: figure5a_overview_stoichiometry')
		figure5a.figure5a_overview_stoichiometry()

	@staticmethod
	def get_data(**kwargs):
		folder = kwargs.get('folder','PATH')

		data_dict = DataFrameAnalyzer.read_pickle(folder + 'figure5a_data_dictionary.pkl')
		relevant_complexes = ['HC663:COPII',
							  'HC2402:COPI',
							  'HC1479:retromer complex',
					  		  'SMC_AM000047:Cohesin complex']
		return data_dict, relevant_complexes

	@staticmethod
	def figure5a_complex_abundanceChange_demonstration(**kwargs):
		folder = kwargs.get('folder','PATH')

		data_dict, relevant_complexes = figure5a.get_data(folder = folder)

		sns.set(context='notebook', style='white', 
			palette='deep', font='Liberation Sans', font_scale=1, 
			color_codes=False, rc=None)
		plt.rcParams["axes.grid"] = True

		positions = [1,1.5]
		plt.clf()
		fig = plt.figure(figsize = (10,5))
		ax = fig.add_subplot(141)
		dataList = [data_dict[relevant_complexes[0]][0]
					data_dict[relevant_complexes[0]][1]]
		bp = ax.boxplot(dataList,notch=0,sym="",vert=1,
						patch_artist=True, widths=[0.4]*len(dataList),
						positions=positions)
		plt.setp(bp['medians'], color="black")
		plt.setp(bp['whiskers'], color="black",linestyle="--",alpha=0.8)
		for i,patch in enumerate(bp['boxes']):
			if i%2==0:
				patch.set_facecolor("#7FBF7F")	
			else:
				patch.set_facecolor("orange")	
			patch.set_edgecolor("black")
			patch.set_alpha(0.9)
			sub_group = dataList[i]
			x = numpy.random.normal(positions[i], 0.04, size=len(dataList[i]))
			ax.scatter(x,sub_group,color='white', alpha=0.5,edgecolor="black",s=5)
		ax.set_ylim(7,14)

		ax = fig.add_subplot(142)
		dataList = [data_dict[relevant_complexes[1]][0],data_dict[relevant_complexes[1]][1]]
		bp = ax.boxplot(dataList,notch=0,sym="",vert=1,patch_artist=True,
						widths=[0.4]*len(dataList),positions=positions)
		plt.setp(bp['medians'], color="black")
		plt.setp(bp['whiskers'], color="black",linestyle="--",alpha=0.8)
		for i,patch in enumerate(bp['boxes']):
			if i%2==0:
				patch.set_facecolor("#7FBF7F")	
			else:
				patch.set_facecolor("orange")	
			patch.set_edgecolor("black")
			patch.set_alpha(0.9)
			sub_group = dataList[i]
			x = numpy.random.normal(positions[i], 0.04, size=len(dataList[i]))
			ax.scatter(x,sub_group,color='white', alpha=0.5,edgecolor="black",s=5)
		ax.set_ylim(7,14)

		ax = fig.add_subplot(143)
		dataList = [data_dict[relevant_complexes[2]][0],data_dict[relevant_complexes[2]][1]]
		bp = ax.boxplot(dataList,notch=0,sym="",vert=1, patch_artist=True,
						widths=[0.4]*len(dataList),positions=positions)
		plt.setp(bp['medians'], color="black")
		plt.setp(bp['whiskers'], color="black",linestyle="--",alpha=0.8)
		for i,patch in enumerate(bp['boxes']):
			if i%2==0:
				patch.set_facecolor("#7FBF7F")	
			else:
				patch.set_facecolor("orange")	
			patch.set_edgecolor("black")
			patch.set_alpha(0.9)
			sub_group = dataList[i]
			x = numpy.random.normal(positions[i], 0.04, size=len(dataList[i]))
			ax.scatter(x,sub_group,color='white', alpha=0.5,edgecolor="black",s=5)
		ax.set_ylim(7,14)

		ax = fig.add_subplot(144)
		dataList = [data_dict[relevant_complexes[3]][0],data_dict[relevant_complexes[3]][1]]
		bp = ax.boxplot(dataList,notch=0,sym="",vert=1, patch_artist=True,
						widths=[0.4]*len(dataList),positions=positions)
		plt.setp(bp['medians'], color="black")
		plt.setp(bp['whiskers'], color="black",linestyle="--",alpha=0.8)
		for i,patch in enumerate(bp['boxes']):
			if i%2==0:
				patch.set_facecolor("#7FBF7F")	
			else:
				patch.set_facecolor("orange")	
			patch.set_edgecolor("black")
			patch.set_alpha(0.9)
			sub_group = dataList[i]
			x = numpy.random.normal(positions[i], 0.04, size=len(dataList[i]))
			ax.scatter(x,sub_group,color='white', alpha=0.5,edgecolor="black",s=5)
		ax.set_ylim(7,14)
		plt.savefig(folder + 'fig5a_complex_abundances.pdf', bbox_inches = 'tight',dpi = 400)

	@staticmethod
	def figure5a_overview_stoichiometry(**kwargs):
		folder = kwargs.get('folder','PATH')

		df = DataFrameAnalyzer.open_in_chunks(folder,'figure5a_bigPicture_underlying_data.tsv.gz')
		complex_list = list(df['complex'])
		cohen_list = list(df['cohen'])
		tt_pval_list = list(df['tt_pval'])
		stable_fractions = list(df['stable'])
		variable_fractions = list(df['variable'])
		tt_pvalCorrs = list(df['tt_pvalCorr'])
		tt1 = list(df.tt1)[0]
		tt2 = list(df.tt2)[0]

		sns.set(context='notebook', style='white', 
			palette='deep', font='Liberation Sans', font_scale=1, 
			color_codes=False, rc=None)
		plt.rcParams["axes.grid"] = False

		plt.clf()
		fig = plt.figure(figsize = (15,5))
		gs = gridspec.GridSpec(10,32)
		ax = plt.subplot(gs[0:5,0:])
		ind = np.arange(len(complex_list))
		width = 0.85
		ax.axhline(0, color = 'k')
		ax.axhline(0.5,linestyle = '--', color = 'k')
		ax.axhline(-0.5,linestyle = '--', color = 'k')
		ax.axhline(1.0,linestyle = '--', color = 'k')
		ax.axhline(-1.0,linestyle = '--', color = 'k')
		ax.axhline(1.5,linestyle = '--', color = 'k')
		ax.axhline(-1.5,linestyle = '--', color = 'k')
		ax.axhline(tt1, color = 'red')
		ax.axhline(tt2, color = 'red')
		colors = list()
		for p,pval in enumerate(tt_pval_list):
			if str(pval)=='nan':
				colors.append('grey')
			else:
				if cohen_list[p]>=tt1:
					colors.append('purple')
				elif cohen_list[p]<=tt2:
					colors.append('lightgreen')
				else:
					colors.append('grey')
		rects = ax.bar(ind, cohen_list, width, color=colors,
			edgecolor = 'white')
		ax.set_ylim(-2,2)
		ax.set_xlim(-0.1, len(complex_list)+0.1)
		ax.set_xticklabels([])

		ax = plt.subplot(gs[5:7,0:])
		rects1 = ax.bar(ind, stable_fractions, width, color='darkblue',
						edgecolor = 'white')
		rects2 = ax.bar(ind, variable_fractions, width, color='red',
						bottom =np.array(stable_fractions),
						edgecolor = 'white')
		ax.set_ylim(0,100)
		ax.set_xlim(-0.1, len(complex_list)+0.1)
		plt.xticks(list(utilsFacade.frange(0.5,len(complex_list)+0.5,1)))
		ax.set_xticklabels(complex_list,rotation = 90, fontsize = 7)
		plt.savefig(folder + 'fig5a_abundance_stoichiometry_comparison.pdf',
					bbox_inches = 'tight', dpi = 400)

	@staticmethod
	def figure5a_overview_stoichiometry_diet(**kwargs):
		folder = kwargs.get('folder','PATH')

		df = DataFrameAnalyzer.open_in_chunks(folder,'figure5a_bigPicture_diet_underlying_data.tsv.gz')
		complex_list = list(df['complex'])
		cohen_list = list(df['cohen'])
		tt_pval_list = list(df['tt_pval'])
		stable_fractions = list(df['stable'])
		variable_fractions = list(df['variable'])
		tt_pvalCorrs = list(df['tt_pvalCorr'])
		tt1 = list(df.tt1)[0]
		tt2 = list(df.tt2)[0]

		sns.set(context='notebook', style='white', 
			palette='deep', font='Liberation Sans', font_scale=1, 
			color_codes=False, rc=None)
		plt.rcParams["axes.grid"] = False

		plt.clf()
		fig = plt.figure(figsize = (15,5))
		gs = gridspec.GridSpec(10,32)
		ax = plt.subplot(gs[0:5,0:])
		ind = np.arange(len(complex_list))
		width = 0.85
		ax.axhline(0, color = 'k')
		ax.axhline(0.5,linestyle = '--', color = 'k')
		ax.axhline(-0.5,linestyle = '--', color = 'k')
		ax.axhline(1.0,linestyle = '--', color = 'k')
		ax.axhline(-1.0,linestyle = '--', color = 'k')
		ax.axhline(1.5,linestyle = '--', color = 'k')
		ax.axhline(-1.5,linestyle = '--', color = 'k')
		ax.axhline(tt1, color = 'red')
		ax.axhline(tt2, color = 'red')
		colors = list()
		for p,pval in enumerate(tt_pval_list):
			if str(pval)=='nan':
				colors.append('grey')
			else:
				if cohen_list[p]>=tt1:
					colors.append('purple')
				elif cohen_list[p]<=tt2:
					colors.append('lightgreen')
				else:
					colors.append('grey')
		rects = ax.bar(ind, cohen_list, width, color=colors,
			edgecolor = 'white')
		ax.set_ylim(-2,2)
		ax.set_xlim(-0.1, len(complex_list)+0.1)
		ax.set_xticklabels([])

		ax = plt.subplot(gs[5:7,0:])
		rects1 = ax.bar(ind, stable_fractions, width, color='darkblue',
						edgecolor = 'white')
		rects2 = ax.bar(ind, variable_fractions, width, color='red',
						bottom =np.array(stable_fractions),
						edgecolor = 'white')
		ax.set_ylim(0,100)
		ax.set_xlim(-0.1, len(complex_list)+0.1)
		plt.xticks(list(utilsFacade.frange(0.5,len(complex_list)+0.5,1)))
		ax.set_xticklabels(complex_list,rotation = 90, fontsize = 7)
		plt.savefig(folder + 'fig5a_abundance_diet_stoichiometry_comparison.pdf', 
					bbox_inches = 'tight', dpi = 400)

class figure5b:
	@staticmethod
	def execute(**kwargs):
		folder = kwargs.get('folder','PATH')
		output_folder = kwargs.get('output_folder','PATH')

		print('FIGURE5B: main_figure5b_volcanoPlots')
		figure5b.main_figure5b_volcanoPlots()

		print('FIGURE5B: main_figure5b_boxPlots')
		figure5b.main_figure5b_boxPlots()

	@staticmethod
	def get_data(name,**kwargs):
		folder = kwargs.get('folder','PATH')

		filename = 'gygi3_complex_mf_merged_perGeneRun.tsv.gz'
		data = DataFrameAnalyzer.open_in_chunks(folder, filename)
		data = data.drop_duplicates()
		data = data[data.analysis_type=='stoch_male-female']
		data = data.drop(['C9','C2','Psd3'], axis = 0)
		complex_data = data[data.complex_search.str.contains(name)]

		pval_list = -np.log10(np.array(data['pval.adj']))
		fc_list = np.array(data['logFC'])
		complex_pval_list = -np.log10(np.array(complex_data['pval.adj']))
		complex_fc_list = np.array(complex_data['logFC'])
		complex_label_list = list(complex_data.index)
		complex_colors = ['red' if item<=0.01 else 'darkblue' for item in list(complex_data['pval.adj'])]

		return {'pval':pval_list,
				'fc': fc_list,
				'complex_pval': complex_pval_list,
				'complex_fc':complex_fc_list,
				'complex_label': complex_label_list,
				'complex_color':complex_colors}

	@staticmethod
	def main_figure5b_volcanoPlots(**kwargs):
		output_folder = kwargs.get('output_folder','PATH')

		print('volcano_plot_stoichiometry:COPI')
		figure5b.figure_volcanoPlot_complex_mfComparison('HC2402:COPI', 'cop1', folder = output_folder)

		print('volcano_plot_stoichiometry:COPII')
		figure5b.figure_volcanoPlot_complex_mfComparison('HC663:COPII', 'cop2', folder = output_folder)

		print('volcano_plot_stoichiometry:Cohesin')
		figure5b.figure_volcanoPlot_complex_mfComparison('Cohesin', 'cohesin', folder = output_folder)
		
		print('volcano_plot_stoichiometry:retromer complex')
		figure5b.figure_volcanoPlot_complex_mfComparison('retromer', 'retromer', folder = output_folder)

	@staticmethod
	def main_figure5b_boxPlots(**kwargs):
		output_folder = kwargs.get('output_folder','PATH')

		print('boxplot:COPI')
		figure5b.figure_boxplot_complex_logFC_male_female('HC2402:COPI', 'cop1', folder = output_folder)

		print('boxplot:COPII')
		figure5b.figure_boxplot_complex_logFC_male_female('HC663:COPII', 'cop2', folder = output_folder)

		print('boxplot:retromer complex')
		figure5b.figure_boxplot_complex_logFC_male_female('retromer', 'retromer', folder = output_folder)

		print('boxplot:Cohesin')
		figure5b.figure_boxplot_complex_logFC_male_female('Cohesin', 'cohesin', folder = output_folder)

	@staticmethod
	def figure_volcanoPlot_complex_mfComparison(complex_id, output_name, **kwargs):
		folder = kwargs.get('folder','PATH')

		data_dict = figure5b.get_data(complex_id)
		pval_list = data_dict['pval']
		fc_list = data_dict['fc']
		complex_pval_list = data_dict['complex_pval']
		complex_fc_list = data_dict['complex_fc']
		complex_label_list = data_dict['complex_label']
		complex_colors = data_dict['complex_color']

		sns.set(context='notebook', style='white', 
			palette='deep', font='Liberation Sans', font_scale=1, 
			color_codes=False, rc=None)
		plt.rcParams["axes.grid"] = True

		plt.clf()
		fig = plt.figure(figsize = (5,5))
		ax = fig.add_subplot(111)
		ax.scatter(fc_list, pval_list, edgecolor = 'black',
				   s = 30, color = 'white', alpha = 0.2)
		ax.scatter(complex_fc_list, complex_pval_list,
				   edgecolor = 'black', s = 50, color = complex_colors)
		for count, label in enumerate(complex_label_list):
			x,y = complex_fc_list[count], complex_pval_list[count]
			ax.annotate(label, xy = (x,y), color = complex_colors[count])
		ax.set_xlabel('logFC (male/female)')
		ax.set_ylabel('p.value[-log10]')
		ax.set_xlim(-1.25,1.25)
		ax.set_ylim(-0.01, 35)
		plt.savefig(folder + 'fig5b_' + output_name + '_volcano_mf_comparison.pdf',
					bbox_inches = 'tight', dpi = 400)

	@staticmethod
	def get_data_boxplot(name,**kwargs):
		folder = kwargs.get('folder','PATH')

		filename = 'gygi3_complex_mf_merged_perGeneRun.tsv.gz'
		data = DataFrameAnalyzer.open_in_chunks(folder, filename)
		data = data.drop_duplicates()
		data = data.drop(['C9','C2','Psd3'], axis = 0)
		complex_data = data[data.complex_search.str.contains(name)]
		male_data = complex_data[complex_data.analysis_type=='male']
		female_data = complex_data[complex_data.analysis_type=='female']
		stoch_maleFemale_data = complex_data[complex_data.analysis_type=='stoch_male-female']
		pval_dict = stoch_maleFemale_data['pval.adj'].to_dict()

		wpc_data = DataFrameAnalyzer.open_in_chunks(folder, 'complex_filtered_stoch_gygi3.tsv.gz')
		wpc_data = wpc_data[wpc_data.complex_search.str.contains(name)]
		quant_list = utilsFacade.filtering(list(wpc_data.columns),'quant_')
		male_list = utilsFacade.filtering(quant_list, 'M')
		female_list = utilsFacade.filtering(quant_list, 'F')
		male_data = wpc_data[male_list].T
		female_data = wpc_data[female_list].T

		protein_list = list(set(male_data.columns).intersection(set(female_data.columns)))
		pval_list = [pval_dict[protein] for protein in protein_list]
		pval_list, protein_list = zip(*sorted(zip(pval_list, protein_list)))
		data_list = list()
		color_list = list()
		sig_protein_list = list()
		positions = [0.5]
		for protein in protein_list:
			if pval_dict[protein]<=0.01:
				try:
					data_list.append(utilsFacade.finite(list(male_data[protein])))
					data_list.append(utilsFacade.finite(list(female_data[protein])))
					color_list.append('purple')
					color_list.append('green')
					sig_protein_list.append(protein)
					sig_protein_list.append(protein)
					positions.append(positions[-1]+0.4)
					positions.append(positions[-1]+0.6)
				except:
					data_list.append(utilsFacade.finite(list(male_data[protein].T.iloc[0].T)))
					data_list.append(utilsFacade.finite(list(female_data[protein].T.iloc[0].T)))
					color_list.append('purple')
					color_list.append('green')
					sig_protein_list.append(protein)
					sig_protein_list.append(protein)
					positions.append(positions[-1]+0.4)
					positions.append(positions[-1]+0.6)					

		male_median_data = male_data.T.median().T
		female_median_data = female_data.T.median().T
		data_list.append(list(male_median_data))
		data_list.append(list(female_median_data))
		color_list.append('grey')
		color_list.append('grey')
		if name=='HC2402:COPI':
			sig_protein_list.append('COPI-male')
			sig_protein_list.append('COPI-female')
		elif name == 'HC663:COPII':
			sig_protein_list.append('COPII-male')
			sig_protein_list.append('COPII-female')
		elif name=='MAPK':
			sig_protein_list.append('MAPK-male')
			sig_protein_list.append('MAPK-female')
		elif name == 'Cohesin':
			sig_protein_list.append('Cohesin-male')
			sig_protein_list.append('Cohesin-female')			
		positions.append(positions[-1]+0.5)

		return {'data':data_list,
				'color': color_list,
				'sig_protein':sig_protein_list,
				'positions':positions}

	@staticmethod
	def figure_boxplot_complex_logFC_male_female(complex_id, output_name, **kwargs):
		folder = kwargs.get('folder','PATH')

		data_dict = figure5b.get_data_boxplot(complex_id)
		data_list = data_dict['data']
		color_list = data_dict['color']
		sig_protein_list = data_dict['sig_protein']
		positions = data_dict['positions']

		sns.set(context='notebook', style='white', 
			palette='deep', font='Liberation Sans', font_scale=1, 
			color_codes=False, rc=None)
		plt.rcParams["axes.grid"] = True

		plt.clf()
		fig = plt.figure(figsize = (5,5))
		ax = fig.add_subplot(111)
		bp = ax.boxplot(data_list,notch=0,sym="",vert=1,patch_artist=True,
			 			widths=[0.4]*len(data_list), positions = positions)
		plt.setp(bp['medians'], color="black")
		plt.setp(bp['whiskers'], color="black",linestyle="--",alpha=0.8)
		for i,patch in enumerate(bp['boxes']):
			patch.set_edgecolor("black")
			patch.set_alpha(0.6)
			patch.set_color(color_list[i])
			sub_group = data_list[i]
			x = numpy.random.normal(positions[i], 0.04, size=len(data_list[i]))
			ax.scatter(x,sub_group,color='white', alpha=0.5,edgecolor="black",s=5)
		ax.set_xticklabels(sig_protein_list, rotation = 90)
		ax.set_ylim(-1,1)
		ax.set_ylabel('normalized abundances')
		plt.savefig(folder + 'fig5b_' + output_name + '_boxplot_males_females.pdf',
					bbox_inches = 'tight', dpi = 400)

if __name__ == "__main__":
	## EXECUTE FIGURE5
	figure5a.execute(folder = sys.argv[1], output_folder = sys.argv[2])
	figure5b.execute(folder = sys.argv[1], output_folder = sys.argv[2])
All scripts were developed by Natalie Romanov (Bork group, EMBL). The source code used in the analysis of protein complex variability across individuals is released under the GNU General Public License v3.0. All scripts on this website/web resource is Copyright (C) 2019 Natalie Romanov, Michael Kuhn, Ruedi Aebersold, Alessandro Ori, Martin Beck, Peer Bork and EMBL.
GNU LICENSE
Download script here
Computational Pipeline

All Code Material

FIGURE 5: Python Code