SUPP. FIGURE 6: Python Code

									
class supp_figure6a:
	@staticmethod
	def execute(**kwargs):
		folder = kwargs.get('folder', 'PATH')

		print('read_auc_df')
		auc_df = supp_figure6a.read_auc_df(folder)

		print('plot_auc_matrix')
		supp_figure6a.plot_auc_matrix(folder, auc_df)

	@staticmethod
	def read_auc_df(folder):
		fname = 'aucData_summary_yeast_auc_df.tsv.gz'
		auc_df = DataFrameAnalyzer.getFile(folder, fname)
		return auc_df

	@staticmethod
	def get_specific_color_gradient(colormap,inputList, **kwargs):
		vmin = kwargs.get("vmin", False)
		vmax = kwargs.get("vmax", False)
		cm = plt.get_cmap(colormap)
		if type(inputList)==list:
			if vmin == False and vmax == False:
				cNorm = mpl.colors.Normalize(vmin=min(inputList), vmax=max(inputList))
			else:
				cNorm = mpl.colors.Normalize(vmin=vmin, vmax=vmax)
		else:
			if vmin == False and vmax == False:
				cNorm = mpl.colors.Normalize(vmin=inputList.min(), vmax=inputList.max())
			else:
				cNorm = mpl.colors.Normalize(vmin=vmin, vmax=vmax)
		scalarMap = mpl.cm.ScalarMappable(norm=cNorm, cmap=cm)
		scalarMap.set_array(inputList)
		colorList=scalarMap.to_rgba(inputList)
		return scalarMap,colorList

	@staticmethod
	def plot_auc_matrix(folder, auc_df):
		max_list = auc_df.max()
		auc_df = auc_df.T
		auc_df['max'] = pd.Series(max_list, index = auc_df.index)
		auc_df = auc_df.sort_values('max', ascending = False)
		auc_df = auc_df.drop('max', axis = 1)
		mean_list = auc_df.mean()
		auc_df = auc_df.T
		auc_df['mean'] = pd.Series(mean_list, index = auc_df.index)
		auc_df = auc_df.sort_values('mean', ascending = True)
		auc_df = auc_df.drop('mean', axis = 1)
		auc_df = auc_df.T
		data = auc_df.copy()

		sns.set(context='notebook', style='white', 
			palette='deep', font='Liberation Sans', font_scale=1, 
			color_codes=False, rc=None)
		plt.rcParams["axes.grid"] = False

		plt.clf()
		x_mean_list = list()
		for col in data.columns:
			x_mean_list.append(np.mean(utilsFacade.finite(list(data[col])))-0.5)
		y_mean_list = list()
		for col in data.T.columns:
			y_mean_list.append(max(utilsFacade.finite(list(data.T[col])))-0.5)

		plt.clf()
		fig = plt.figure(figsize = (5,8))
		gs = gridspec.GridSpec(16,11)
		ax1_density = plt.subplot(gs[0:2,0:8])
		ax1_density.set_ylim(0.5,0.8)
		ax1_density.axhline(0.55, alpha = 0.6, color="grey", linestyle='--', linewidth = 0.2, zorder=1)
		ax1_density.axhline(0.6, alpha = 0.6, color="grey", linestyle='--', linewidth = 0.2, zorder=1)
		ax1_density.axhline(0.65, alpha = 0.6, color="grey", linestyle='--', linewidth = 0.2, zorder=1)
		ax1_density.axhline(0.7, alpha = 0.6, color="grey", linestyle='--', linewidth = 0.2, zorder=1)
		ax1_density.axhline(0.75, alpha = 0.6, color="grey", linestyle='--', linewidth = 0.2, zorder=1)
		scalarmap_x, colorList_x = supp_figure6a.get_specific_color_gradient(plt.cm.Greys, np.array(xrange(len(x_mean_list))))
		ax1_density.bar(np.arange(len(x_mean_list)), x_mean_list, 0.95, color = colorList_x, bottom = 0.5,
						edgecolor = "white", linewidth = 2, zorder = 3)
		plt.xticks(list(xrange(len(data.columns))))
		ax1_density.set_xticklabels([])
		ax1_density.set_xlim(0,len(data.columns))
		ax = plt.subplot(gs[2:10,0:8])
		scalarmap, colorList = supp_figure6a.get_specific_color_gradient(plt.cm.RdBu, np.array(data), vmin = 0.4, vmax = 0.7)
		sns.heatmap(data, cmap = plt.cm.RdBu, vmin = 0.4,vmax = 0.7,
					linecolor = "white", linewidth = 2, cbar = False)
		y_mean_list = y_mean_list[::-1]
		ax2_density = plt.subplot(gs[2:10,8:10])
		plt.yticks(list(xrange(len(data.index))))
		ax2_density.set_ylim(0,len(data.index))
		ax2_density.set_xlim(0.5,0.85)
		scalarmap_y, colorList_y = supp_figure6a.get_specific_color_gradient(plt.cm.Greys, np.array(xrange(len(y_mean_list))))
		plt.setp(ax2_density.get_xticklabels(), rotation = 90)
		ax2_density.set_yticklabels([])
		ax2_density.axvline(0.7, color = "red", linestyle = "--", linewidth = 0.5)
		ax2_density.axvline(0.55, alpha = 0.6, color="grey", linestyle='--', linewidth = 0.2, zorder=1)
		ax2_density.axvline(0.6, alpha = 0.6, color="grey", linestyle='--', linewidth = 0.2, zorder=1)
		ax2_density.axvline(0.65, alpha = 0.6, color="grey", linestyle='--', linewidth = 0.2, zorder=1)
		ax2_density.axvline(0.75, alpha = 0.6, color="grey", linestyle='--', linewidth = 0.2, zorder=1)
		ax2_density.axvline(0.8, alpha = 0.6, color="grey", linestyle='--', linewidth = 0.2, zorder=1)
		ax2_density.barh(np.arange(len(y_mean_list)), y_mean_list,
			0.95, color = colorList_y, left = 0.5,
			edgecolor = "white", linewidth = 2, zorder = 3)
		ax_category = plt.subplot(gs[2:10,10:11])
		df = pd.DataFrame({"color":["green"]*7+["lightgreen"]+["magenta"]*2+["green","magenta","green"]})
		df = pd.DataFrame({'color':14*[1]})
		sns.heatmap(df, cbar = False, linewidth = 2, linecolor = "white")
		ax_category.axis("off")
		ax_cbar = plt.subplot(gs[13:14,0:8])
		cbar = fig.colorbar(scalarmap, cax = ax_cbar, 
			   orientation = "horizontal")
		cbar.set_label("Area under curve (AUC)")
		plt.savefig(folder + "aucData_yeast_auc_matrix.pdf", bbox_inches = "tight", dpi=400)

class supp_figure6b:
	@staticmethod
	def execute(**kwargs):
		folder = kwargs.get('folder', 'PATH')

		print('load_data')
		data_dict = supp_figure6b.load_data(folder)

		print('plot_boxplot_distributions')
		supp_figure6b.plot_boxplot_distributions(folder, data_dict)

	@staticmethod
	def load_data(folder):
		categories = ['ALL_proteins', 'complex_abundance', 'complex_stoichiometry',
					  'pathway_abundance', 'pathway_stoichiometry']
		keys = ['ENV_yeast11','ENV_yeast3','GEN_yeast14', 'ENV_yeast20', 'GEN_yeast5']

		data_dict = dict((e1,dict()) for e1 in categories)
		for cat in categories:
			data_dict[cat] = dict((e1,list()) for e1 in keys)
			for key in keys:
				print(cat, key)
				fname = 'RESMODULE_' + key + '_' + cat.lower() + '.tsv'
				data = DataFrameAnalyzer.getFile(folder, fname)
				r2_list = np.median(utilsFacade.finite(list(data['r2.all.module'])))
				data_dict[cat][key] = r2_list
		return data_dict

	@staticmethod
	def plot_boxplot_distributions(folder, data_dict):
		categories = ['ALL_proteins', 'complex_abundance', 'complex_stoichiometry']
		keys = ['ENV_yeast11','ENV_yeast3','ENV_yeast20', 'GEN_yeast5','GEN_yeast14']

		data_list = list()
		xlabel_list = list()
		for key in keys:
			for cat in categories:
				dat = data_dict[cat][key]
				data_list.append(dat)
				xlabel_list.append(key + '::' + cat)

		sns.set(context='notebook', style='white', 
			palette='deep', font='Liberation Sans', font_scale=1, 
			color_codes=False, rc=None)
		plt.rcParams["axes.grid"] = True

		plt.clf()
		fig = plt.figure(figsize = (10,5))
		ax = fig.add_subplot(111)
		bp = ax.boxplot(data_list, notch=0, sym="", vert=1, patch_artist=True, widths=[0.5]*len(data_list))
		plt.setp(bp['medians'], color="black")
		plt.setp(bp['whiskers'], color="black",linestyle="-")
		for i,patch in enumerate(bp['boxes']):
			if i%3==0:
				patch.set_facecolor('white')
				patch.set_edgecolor('blue')
			elif i%3==1:
				patch.set_facecolor('lightblue')
				patch.set_edgecolor('black')
			else:
				patch.set_facecolor('darkblue')
				patch.set_edgecolor('black')
			patch.set_alpha(0.8)
		plt.xticks(list(xrange(len([''] + xlabel_list))))
		ax.set_xticklabels([''] + xlabel_list, rotation = 90)
		plt.savefig(folder + 'explained_variance_yeast_datasets.pdf', bbox_inches = 'tight', dpi = 300)

if __name__ == "__main__":
	## EXECUTE SUPPFIGURE6
	supp_figure6a.execute(folder = sys.argv[1])
	supp_figure6b.execute(folder = sys.argv[1])
									
								

All scripts were developed by Natalie Romanov (Bork group, EMBL). The source code used in the analysis of protein complex variability across individuals is released under the GNU General Public License v3.0. All scripts on this website/web resource is Copyright (C) 2019 Natalie Romanov, Michael Kuhn, Ruedi Aebersold, Alessandro Ori, Martin Beck, Peer Bork and EMBL.

GNU LICENSE

Download script here