# ------------------------------------------------------------
# Author: Natalie Romanov
# ------------------------------------------------------------

"""
Library of common functions used throughout all scripts
"""

import sys
import gzip
sys.path.append('SCRIPT PATH')
from wp_utils_import_packages import *
#from wp_utils_library_code import *

class RFacade:
    @staticmethod
    def perform_limma_analysis(quant_data, design_df, contrast):
        #contrast = 'male-female'
        from rpy2.robjects.packages import importr
        from rpy2.robjects.vectors import FloatVector

        r = rpy2.robjects.r
        #ALL = importr('ALL')
        base = importr('base')
        limma = importr("limma", robject_translations = {"format.perc": "_format_perc"})
        robjects.r('data("ALL")')

        datasetNormalized = quant_data.copy()
        
        r.assign('design_df',design_df)
        fit = r['lmFit'](datasetNormalized, design=design_df) # Fit the original matrix to the above design.
        contrastsMatrix = r['makeContrasts'](contrast,levels = design_df)
        fit2 = r['contrasts.fit'](fit, contrasts = contrastsMatrix) # Making the comparison.
        fit2 = r['eBayes'](fit2) # Moderating the t-tests by empirical Bayes smoothing method.

        a = r['topTable'](fit2, coef = contrast, number= len(quant_data))#, r['adjust.method']="fdr")
        alist = list(str(a).split('\n')[:-1])
        cols = filter(lambda a:str(a)!='',alist[0].split(' '))
        df_list = list()
        gene_list = list(quant_data.index)
        for num in alist[1:]:
            temp = filter(lambda a:str(a)!='',num.split(' '))[1:]
            gene = filter(lambda a:str(a)!='',num.split(' '))[0]
            if len(temp)==0:
                break
            else:
                df_list.append(temp)
        adf = pd.DataFrame(df_list)
        adf.columns = cols
        adf.index = gene_list
        return adf

    @staticmethod
    def get_bh_pvalues(pvals):

        # r-libraries
        from rpy2.robjects.packages import importr
        from rpy2.robjects.vectors import FloatVector

        stats = importr("stats", robject_translations = {"format.perc": "_format_perc"})
        p_adjust = stats.p_adjust(FloatVector(pvals), method = 'BH')
        return p_adjust

    @staticmethod
    def make_multivariate_linear_regression(x,y,x_total):
        from rpy2.robjects.packages import importr
        from rpy2.robjects.vectors import FloatVector        
        stats = importr("stats", robject_translations = {"format.perc": "_format_perc"})
        base = importr('base')

        robjects.globalenv["y"] = y
        query = ""
        for ci in range(x.shape[1]):
            robjects.globalenv["x" + str(ci)] = FloatVector(list(x[:, ci]))
            query += "x" + str(ci)
            if ci + 1 != x.shape[1]:
                query += " + "
        query += ' ~ y'
        M = stats.lm(query, data=x_total)
        return M

    @staticmethod
    def get_wilcox_test_pval(pos, neg, alternative='g'):
        # return train and test element
        # print 'calculating R^2'
        r = rpy2.robjects.r
        pval = r['wilcox.test'](FloatVector(pos), FloatVector(neg),
                                alternative=alternative)[2][0]
        return pval

    @staticmethod
    def get_lrtest(X1, X2, y):
        from rpy2.robjects.packages import importr
        from rpy2.robjects.vectors import FloatVector

        #stats = importr('stats')
        stats = importr("stats", robject_translations = {"format.perc": "_format_perc"})
        base = importr('base')
        lmtest = importr('lmtest')

        robjects.globalenv["y"] = y

        models = []
        for x in [X1, X2]:
            query = "y ~ "
            for ci in range(x.shape[1]):
                robjects.globalenv["x" + str(ci)] = FloatVector(list(x[:, ci]))
                query += "x" + str(ci)
                if ci + 1 != x.shape[1]:
                    query += " + "
            print query
            lm = stats.lm(query)
            models.append(lm)
        pvalue = list(lmtest.lrtest(models[0], models[1]))[-1][-1]
        print pvalue
        return pvalue

    @staticmethod
    def get_pval_asterisks(pvals, vert=False):
        if vert:
            return ["*\n*\n*\n*" if pval <= 0.0001 else
                    ("*\n*\n*" if pval < 0.001 else
                     ('*\n*' if pval < 0.01 else "*" if pval < 0.01 else ""))
                    for pval in pvals]

        return ["****" if pval <= 0.0001 else
                                  ("***" if pval < 0.001 else
                                   ('**' if pval < 0.01 else "*" if pval < 0.01 else ""))
                                  for pval in pvals]

    @staticmethod
    def get_fdr(df):
        '''
        Calculate an FDR
        :param df: labels (1, 0) have to be defined
        :return:
        '''
        fdr_by_score = {}
        tp = 0
        fp = 0
        fdrs = []
        for ri, r in df.iterrows():
            label = r.values
            tp += 1 if int(label) == 1 else 0
            fp += 1 if int(label) == 0 else 0
            fdr = float(fp) / float(fp + tp)
            fdrs.append(fdr)

        df['FDR'] = fdrs
        return df

    @staticmethod
    def get_bh_pvalues_python(p):
        p = np.asfarray(p)
        by_descend = p.argsort()[::-1]
        by_orig = by_descend.argsort()
        steps = float(len(p)) / np.arange(len(p), 0, -1)
        q = np.minimum(1, np.minimum.accumulate(steps * p[by_descend]))
        return q[by_orig]

class complexFacade:
    @staticmethod
    def get_complex_filename(altName):
        complex_filename = altName.replace("/","_")
        complex_filename = complex_filename.replace(":","_")
        complex_filename = complex_filename.replace("(","_")
        complex_filename = complex_filename.replace(")","_")
        return complex_filename

    @staticmethod
    def get_associated_complexes(protein,complexDict,**kwargs):
        species = kwargs.get('species','human')
        complex_ids = list()
        complex_names = list()
        for complexID in complexDict:
            human_genes = complexDict[complexID][species + 'GeneNames']
            human_uniprots = complexDict[complexID][species + 'Uniprots']
            human_egenes = complexDict[complexID][species + 'Genes']
            human_eproteins = complexDict[complexID][species + 'Proteins']
            all_names = set.union(*[set(human_genes),set(human_uniprots),
                        set(human_egenes),set(human_eproteins)])
            if protein in all_names:
                complex_ids.append(complexID)
                complex_names.append(complexDict[complexID]['altName'][0])
        return ';;'.join(complex_ids),';;'.join(complex_names)

class utilsFacade:
    @staticmethod
    def pickle_dump(a, folder, fileName, **kwargs):
        ignore_file_existence = kwargs.get('ignore_file_existence',False)

        file_existence = os.path.exists(folder + fileName + '.pkl')
        if file_existence == True and ignore_file_existence==False:
            raise Exception('File exists already! Please specify ignore_file_existence==True to overwrite!')
        else:
            print('dumping pickle file:' + folder + fileName + '.pkl')
            with open(folder + fileName + '.pkl', 'wb') as handle:
                pickle.dump(a, handle, protocol=pickle.HIGHEST_PROTOCOL)

    @staticmethod
    def pickle_load(folder, fileName):
        with open(folder + fileName, 'rb') as handle:
            b = pickle.load(handle)       
        return b

    @staticmethod
    def recluster_matrix_only_rows(matrix, **kwargs):
        '''
        given matrix, reclusters matrix only rows though;
        columns stay the same
        '''
        method = kwargs.get('method','average')
        metric = kwargs.get('metric','euclidean')

        row_linkage = hierarchy.linkage(distance.pdist(np.array(matrix)),
            method=method,metric=metric)
        Z1 = sch.dendrogram(row_linkage,orientation='left',no_plot=True)
        idx1 = Z1["leaves"]
        correlations_array = np.array(matrix)[idx1,:]
        idxList = list(matrix.index)
        proteinList=list()
        for i in idx1:
            proteinList.append(idxList[i])
        new_hmapData = pd.DataFrame(correlations_array)
        new_hmapData.columns = matrix.columns
        new_hmapData.index = proteinList
        return new_hmapData, proteinList

    @staticmethod
    def standarize_rows_zscores(df):
        #Transform data
        myArray = np.array(df)
        normalizedArray = []

        for row in range(0, len(myArray)):
            list_values = []
            Min =  min(myArray[row])
            Max = max(myArray[row])
            mean = np.mean(myArray[row])
            std = np.std(myArray[row])

            for element in myArray[row]:
                list_values.append((element - mean)/std)
            normalizedArray.append(list_values)

        newArray = []

        for row in range(0, len(normalizedArray)):
            list_values = normalizedArray[row]
            newArray.append(list_values)
        new_df = pd.DataFrame(newArray)
        new_df.columns = map(float,df.columns)
        new_df.index = df.index
        return new_df
               
    @staticmethod
    def standarize_rows(df):
        #Transform data
        myArray = np.array(df)
        normalizedArray = []

        for row in range(0, len(myArray)):
            list_values = []
            Min =  min(myArray[row])
            Max = max(myArray[row])

            for element in myArray[row]:
                list_values.append(  float(element-Min)/float(Max- Min) )
            normalizedArray.append(list_values)

        newArray = []

        for row in range(0, len(normalizedArray)):
            list_values = [x / sum(normalizedArray[row])*100 for x in normalizedArray[row]]
            newArray.append(list_values)
        new_df = pd.DataFrame(newArray)
        new_df.columns = map(float,df.columns)
        new_df.index = df.index
        return new_df

    @staticmethod
    def format_e(n):
        a = '%E' % n
        return a.split('E')[0].rstrip('0').rstrip('.')[0:4] + 'E' + a.split('E')[1]

    @staticmethod
    def shuffle_dataframe(df):
        col = df.columns
        val = df.values
        shape = val.shape
        val_flat = val.flatten()
        np.random.shuffle(val_flat)
        return pd.DataFrame(val_flat.reshape(shape),columns=col)

    @staticmethod
    def recluster_matrix(matrix,**kwargs):
        '''
        given matrix, reclusters matrix >> rows and columns
        '''

        linkage_method = kwargs.get('linkage_method','average')
        metric = kwargs.get('metric','euclidean')

        row_linkage = hierarchy.linkage(distance.pdist(np.array(matrix)),
            method=linkage_method,metric=metric)
        Z1 = sch.dendrogram(row_linkage,orientation='left',no_plot=True)
        idx1 = Z1["leaves"]
        correlations_array = np.array(matrix)[idx1,:]
        idxList = list(matrix.index)
        proteinList = list()
        for i in idx1:
            proteinList.append(idxList[i])
        new_hmapData = pd.DataFrame(correlations_array)
        new_hmapData.columns = matrix.columns
        new_hmapData.index = proteinList

        col_linkage = hierarchy.linkage(distance.pdist(np.array(matrix.T)),
            method=linkage_method,metric=metric)
        Z1 = sch.dendrogram(col_linkage,orientation='top',no_plot=True)
        idx1 = Z1["leaves"]
        correlations_array = np.array(matrix)[:,idx1]
        idxList=list(matrix.columns)
        proteinList=list()
        for i in idx1:
            proteinList.append(idxList[i])
        new_hmapData = pd.DataFrame(correlations_array)
        new_hmapData.columns = proteinList
        new_hmapData.index = matrix.index
        return new_hmapData

    @staticmethod
    def quantileNormalize1(df_input):
        df = df_input.copy()
        #compute rank
        dic = {}
        for col in df:
            dic.update({col : sorted(df[col])})
        sorted_df = pd.DataFrame(dic)
        rank = sorted_df.mean(axis = 1).tolist()
        #sort
        for col in df:
            t = np.searchsorted(np.sort(df[col]), df[col])
            df[col] = [rank[i] for i in t]
        return df

    @staticmethod
    def frange(x, y, jump):
    	"""
    	generator function: makes range of floats possible
    	"""
    	while x < y:
    	    yield x
    	    x += jump

    @staticmethod
    def hasNumbers(inputString):
        return any(char.isdigit() for char in inputString)

    @staticmethod
    def fitNormalCurve(myArray):
        mu,std = scipy.stats.norm.fit(myArray)
        return mu,std

    @staticmethod
    def get_quantCols(data):
        quantCols=list()
        for col in data.columns:
            if col.startswith("quant_")==True:
                quantCols.append(col)
        return quantCols

    @staticmethod
    def quantileNormalize2(df_input):
        df = df_input.copy()
        #compute rank
        dic = {}
        for col in df:
            dic[col] = df[col].sort_values(na_position='first').values
        sorted_df = pd.DataFrame(dic)
        #rank = sorted_df.mean(axis = 1).tolist()
        rank = sorted_df.median(axis = 1).tolist()
        #sort
        for col in df:
            print(col)
            # compute percentile rank [0,1] for each score in column 
            t = df[col].rank( pct=True, method='max' ).values
            # replace percentile values in column with quantile normalized score
            # retrieve q_norm score using calling rank with percentile value
            df[col] = [ np.nanpercentile( rank, i*100 ) if ~np.isnan(i) else np.nan for i in t ]
        return df

    @staticmethod
    def flatten(xs):
        """
        Given a nested list, this function can disentangle the values and just give you the list of values.
        """
        result = []
        if isinstance(xs, (list, tuple)):
            for x in xs:
                result.extend(x)
        else:
            result.append(xs)
        return result

    @staticmethod
    def polyfit(x, y, degree):
        """
        gives you the parameters of the polyfit-curve fitted onto your data.
        """
        results =dict()
        coeffs = numpy.polyfit(x, y, degree)
        results['polynomial'] = coeffs.tolist()
        correlation = numpy.corrcoef(x, y)[0,1]
        results['correlation'] = correlation;results['determination'] = correlation**2
        return results

    @staticmethod
    def eformat(f, prec, exp_digits):
        """
        This function can display your number with e^10, etc.
        """
        s = "%.*e"%(prec, f)
        mantissa, exp = s.split('e')
        return "%se%+0*d"%(mantissa, exp_digits+1, int(exp))

    @staticmethod
    def getCombinations(lst):
        return list(itertools.combinations(lst, 2))

    @staticmethod
    def permutations(maxValue,minValue,lenArray):
        """
        1) array (with given length) is filled with random numbers from minValue to maxValue
        """
        permutedList=list()
        for s in xrange(0,1000):
            tempList=list()
            for i in xrange(0,lenArray):
                tempList.append(random.uniform(minValue,maxValue))
            permutedList.append(tempList)
        return permutedList

    @staticmethod
    def convertToStandardNormal(inputArray):
        """
        inputArray converted to standard normal (check on normality before?)
        """ 
        standardArray=list()
        mu=inputArray.mean()
        std=inputArray.std()
        for x in inputArray:
            standardArray.append((x-mu)/std)
        return numpy.asarray(standardArray)

    @staticmethod
    def getSampleZScore(inputArray,sample):
        """
        compute z-score for all values against the standard normal
        """ 
        mu=inputArray.mean()
        std=inputArray.std()
        zSample=(sample-mu)/std
        return zSample

    @staticmethod
    def get_empirical_fdr(df):
        '''
        Calculate an FDR
        :param df: p.val and label (1, 0) have to be defined
        :return:
        '''
        df = df.sort_values('p.val', ascending=True)
        df = df.reset_index(drop=True)
        fdr_by_score = {}
        tp = 0
        fp = 0
        fdrs = []
        for ri, r in df.iterrows():
            pval, label = r.values
            tp += 1 if int(label) == 1 else 0
            fp += 1 if int(label) == 0 else 0
            fdr = float(fp) / float(fp + tp)
            fdrs.append(fdr)

        df['FDR'] = fdrs
        return df

    @staticmethod
    def correct_pvalues(pvalues,**kwargs):
        fdr_method=kwargs.get("method","fdr_bh")
        alpha=kwargs.get("alpha",0.1)
        pValues = numpy.array(pvalues)
        _, pValuesCorr, _, _ = statsmodels.sandbox.stats.multicomp.multipletests(numpy.array(pValues), alpha=alpha, method=fdr_method)
        return pValuesCorr

    @staticmethod
    def trimmean(arr,**kwargs):
        """
        calculates the trimmed mean for a given array (percentage 25% -- kwargs: percent)
        """
        percent=kwargs.get("percent",25)
        n = len(arr)
        k = int(round(n*(float(percent)/100)/2))
        return numpy.median(sorted(arr[k+1:n-k]))

    @staticmethod
    def trimstd(arr, percent):
        """
        calculates the trimmed std for a given array (percentage 25% -- kwargs: percent)
        """    
        n = len(arr)
        k = int(round(n*(float(percent)/100)/2))
        arr=sorted(arr)
        return numpy.std(arr[k:n-k],ddof=1)  

    @staticmethod
    def filtering(input_list,keyword, **kwargs):
        condition = kwargs.get('condition','find')
        print('filtering', condition)
        if condition == 'find':
            s = filter(lambda a:str(a).find(keyword)!=-1, input_list)
        elif condition == 'notfind':
            s = filter(lambda a:str(a).find(keyword)==-1, input_list)
        elif condition == 'startswith':
            s = filter(lambda a:str(a).startswith(keyword)==True, input_list)
        elif condition == 'endswith':
            s = filter(lambda a:str(a).endswith(keyword)==True, input_list)
        elif condition == 'notstartswith':
            s = filter(lambda a:str(a).startswith(keyword)==False, input_list)
        elif condition == 'notendswith':
            s = filter(lambda a:str(a).endswith(keyword)==False, input_list)
        elif condition == 'equals':
            s = filter(lambda a:str(a)==keyword, input_list)
        elif condition == 'notequals':
            s = filter(lambda a:str(a)!=keyword, input_list)
        else:
            s = input_list
        return s

    @staticmethod
    def sort_multiple_lists(lists, **kwargs):
        reverse = kwargs.get('reverse',False)
        sorted_lists = sorted(itertools.izip(*lists),reverse=reverse, key=lambda x: x[0])
        new_lists = [[x[i] for x in sorted_lists] for i in range(len(lists))]
        return new_lists

    @staticmethod
    def get_data_list(data):
        data_list = list()
        for col in list(data.columns):
            data_list.append(filter(lambda a:str(a)!='nan',list(data[col])))
        return data_list

    @staticmethod
    def median(mylist):
        """
        calculates the median if you feel incapable to use numpy!
        """
        sorts = sorted(mylist)
        length = len(sorts)
        if not length % 2:
            return (sorts[length / 2] + sorts[length / 2 - 1]) / 2.0
        return sorts[length / 2] 

    @staticmethod
    def make_dictionary_from_dataframe(df, keyname, valuename):
        df_dict = {k: list(v) for k,v in df.groupby(keyname)[valuename]}
        return df_dict

    @staticmethod
    def get_cluster_classes(den, label='ivl'):
        cluster_idxs = defaultdict(list)
        for c, pi in zip(den['color_list'], den['icoord']):
            for leg in pi[1:3]:
                i = (leg - 5.0) / 10.0
                if abs(i - int(i)) < 0.0001:
                    cluster_idxs[c].append(int(i))
        cluster_classes = {}
        count=0
        for c, l in cluster_idxs.items():
            i_l = [den[label][i] for i in l]
            cluster_classes[count] = i_l
            count+=1
        return cluster_classes

    @staticmethod
    def some(x, n):
        """
        RANDOM ROW SELECTION IN PANDAS DATAFRAME
        for a given array, this function gives me a random sample of a certain length
        """
        return x.ix[random.sample(x.index, n)]

    @staticmethod
    def timestamp():
        return time.strftime("%Y%m%d")

    @staticmethod
    def zscore_to_pvalue(z_scores):
        p_values1 = scipy.stats.norm.sf(abs(z_scores)) #one-sided
        p_values2 = scipy.stats.norm.sf(abs(z_scores))*2 #twosided
        return p_values1,p_values2

    @staticmethod
    def finite(lst):
        return filter(lambda a:np.isfinite(a),lst)

    @staticmethod
    def drawRandomSample(x,n):
        return numpy.random.choice(x,n)

    @staticmethod
    def makePositive(listX):
        """
        This function renders a given list positive by performing the calculation: numpy.log(1-x)
        """
        return map(lambda x: numpy.log(1-x), listX)

    @staticmethod
    def get_correlation_values(data):
        triuData=np.triu(np.array(data))
        count=0
        tempList=list()
        for item in triuData:
            for i in item[(count+1):]:
                if i>-1 and i<1 and str(i)!="nan":
                    tempList.append(i)
            count+=1
        return tempList

    @staticmethod
    def get_triangular_values(data):
        triuData=np.triu(np.array(data))
        count=0
        tempList=list()
        for item in triuData:
            for i in item[(count+1):]:
                tempList.append(i)
            count+=1
        return tempList      

    @staticmethod
    def getMedianList(lists):
        """
        input: lists (nested lists)
        This function calculates the medianProfile out of several lists.
        """ 
        medianList=list()
        for i in xrange(len(lists[0])):
            tempList=list()
            for l in lists:
                if numpy.isfinite(l[i])==True:
                    tempList.append(l[i])
            medianList.append(numpy.median(tempList))
        return medianList

    @staticmethod
    def getMaxList(lists):
        """
        input: lists (nested lists)
        This function calculates the maxProfile out of several lists.
        """ 
        medianList=list()
        for i in xrange(len(lists[0])):
            tempList=list()
            for l in lists:
                if numpy.isfinite(l[i])==True:
                    tempList.append(l[i])
            medianList.append(0.75*max((tempList)))
        return medianList

    @staticmethod
    def getMinList(lists):
        """
        input: lists (nested lists)
        This function calculates the minProfile out of several lists.
        """ 
        medianList=list()
        for i in xrange(len(lists[0])):
            tempList=list()
            for l in lists:
                if numpy.isfinite(l[i])==True:
                    tempList.append(l[i])
            medianList.append(.75*min(tempList))
        return medianList

    @staticmethod
    def getTrimMedianList(lists):
        """
        input: lists (nested lists)
        This function calculates the medianProfile out of several lists.
        """ 
        medianList=list()
        for i in xrange(len(lists[0])):
            tempList=list()
            for l in lists:
                if numpy.isfinite(l[i])==True:
                    tempList.append(l[i])
            medianList.append(trimmean(tempList))
        return medianList

    @staticmethod
    def getTrimmedMeanList(lists,**kwargs):
        """
        input: lists (nested lists)
        This function calculates the trimmed medianProfile out of several lists.
        """ 
        threshold=kwargs.get("threshold",25)
        medianList=list()
        for i in xrange(len(lists[0])):
            tempList=list()
            for l in lists:
                if numpy.isfinite(l[i])==True:
                    tempList.append(l[i])
            medianList.append(trimmean(tempList,percent=threshold))
        return medianList

    @staticmethod
    def update_status(current,total):
        return (float(current)/float(total))*100

    @staticmethod
    def avoidSpecialSignsInName(sequence):###I THINK THERE IS A BETTER FUNCTION SOMEWHERE
        specialSigns=["(",")","/","*","\/"]
        correctedSequence=sequence
        for special in specialSigns:
            correctedSequence=correctedSequence.replace(special,"_")
        return correctedSequence

class colorFacade:
    @staticmethod
    def hex2color(c):
        """
        Take a hex string *s* and return the corresponding rgb 3-tuple
        Example: #efefef -> (0.93725, 0.93725, 0.93725)
        """
        return ColorConverter.to_rgb(c)
    
    @staticmethod
    def make_colormap(seq,name):
        """Return a LinearSegmentedColormap
        seq: a sequence of floats and RGB-tuples. The floats should be increasing
        and in the interval (0,1).
        """
        import matplotlib.colors as mcolors
        c = mcolors.ColorConverter().to_rgb
        #seq = [c('red'), c('violet'), 0.33, c('violet'), c('blue'), 0.66, c('blue')]
        #seq = [c('black'), c('darkgrey'), 0.33, c('darkgrey'), c('grey'), 0.66, c('white')]
        seq = [(None,) * 3, 0.0] + list(seq) + [1.0, (None,) * 3]
        cdict = {'red': [], 'green': [], 'blue': []}
        for i, item in enumerate(seq):
            if isinstance(item, float):
                r1, g1, b1 = seq[i - 1]
                r2, g2, b2 = seq[i + 1]
                cdict['red'].append([item, r1, r2])
                cdict['green'].append([item, g1, g2])
                cdict['blue'].append([item, b1, b2])
        return mcolors.LinearSegmentedColormap(name, cdict)

    @staticmethod
    def hex_to_rgb(value):
        """
        This function converts Hex-Number into RGB-value!
        """
        value = value.lstrip('#')
        lv = len(value)
        return tuple(int(value[i:i + lv // 3], 16) for i in range(0, lv, lv // 3))

    @staticmethod
    def makeColorPalette():
        """
        This function produces a random set of colors in a list. The first colors are norm however.
        """
        originalColorPalette=["red","green","blue","orange","aqua","yellow","grey","brown","black","lime","#ec3b83","#b4eeb4","#ff7373"]
        for c in xrange(100000):
            r = lambda: random.randint(0,255)
            originalColorPalette.append('#%02X%02X%02X' % (r(),r(),r()))
        return originalColorPalette

    @staticmethod
    def truncate_colormap(cmap, minval=0.0, maxval=1.0, n=100):
        new_cmap = colors.LinearSegmentedColormap.from_list(
            'trunc({n},{a:.2f},{b:.2f})'.format(n=cmap.name, a=minval, b=maxval),
            cmap(np.linspace(minval, maxval, n)))
        return new_cmap

    @staticmethod
    def get_specific_color_gradient(colormap,inputList,**kwargs):
        vmin = kwargs.get('vmin','blaq')
        vmax = kwargs.get('vmax','blaq')
        cm = plt.get_cmap(colormap)
        if vmin=='blaq' or vmax=='blaq':
            if type(inputList)==list:
                cNorm = mpl.colors.Normalize(vmin=min(inputList), vmax=max(inputList))
            else:
                cNorm = mpl.colors.Normalize(vmin=inputList.min(), vmax=inputList.max())
        else:
            cNorm = mpl.colors.Normalize(vmin=vmin, vmax = vmax)
        scalarMap = mpl.cm.ScalarMappable(norm=cNorm, cmap=cm)
        scalarMap.set_array(inputList)
        colorList=scalarMap.to_rgba(inputList)
        return scalarMap,colorList

class DataFrameAnalyzer:
    @staticmethod
    def getFile(fileFolder,fileName,**kwargs):
        idx=kwargs.get("idx",0)
        header=kwargs.get("header","")
        delimiter=kwargs.get("delimiter","\t")
        """
        This function provides you with the pandas-dataframe extracted from the file.
        """
        if idx!="" and header=="":
            fileData=pd.read_csv(fileFolder+fileName,sep=delimiter,index_col=idx)
        elif idx!="" and header==None:
            fileData=pd.read_csv(fileFolder+fileName,sep=delimiter,index_col=idx,header=None)
        elif idx=="" and header=="":
            fileData=pd.read_csv(fileFolder+fileName,sep=delimiter)
        else:
            fileData=pd.read_csv(fileFolder+fileName,sep=delimiter,header=None)
        return fileData

    @staticmethod
    def get_dict(df, a, b):
        return pd.Series(df[b].values if b is not None else df.index,
                         index=df[a].values if a is not None else df.index).to_dict()

    @staticmethod
    def checkPathway(fileFolder):
        if not os.path.exists(os.path.dirname(fileFolder)):
            os.makedirs(os.path.dirname(fileFolder))

    @staticmethod
    def to_tsv_gz(df, path, index=None):
        df.to_csv(path, sep='\t', index=index, compression='gzip')

    @staticmethod
    def to_pickle(df, path):
        cPickle.dump(df, open(path, 'wb'))

    @staticmethod
    def read_pickle(path):
        return cPickle.load(open(path, 'rb'))

    @staticmethod
    def to_tsv(df, path, **kwargs):
        df.to_csv(path, sep='\t', index=None, **kwargs)

    @staticmethod
    def read_tsv_gz(path, engine=None, header='infer', sep='\t', index_col=None,**kwargs):
        return pd.read_csv(path, sep=sep, header=header,
                           compression='gzip', engine=engine, index_col=index_col,
                           **kwargs)

    @staticmethod
    def read_tsv(path, engine=None, header='infer', sep='\t', **kwargs):
        return pd.read_csv(path, sep=sep, index_col=None, header=header,
                           engine=engine, **kwargs)
   
    @staticmethod
    def join(folder,name):
        return folder+name

    @staticmethod
    def exists(file_name):
        return os.path.isfile(file_name) 

    @staticmethod
    def listdir(filename):
        return os.listdir(filename)

    @staticmethod
    def makedirs(folder):
        return os.makedirs(folder)

    @staticmethod
    def convert_to_gzip(folder,file_name):
        with open(folder+file_name) as f_in, gzip.open(folder+file_name+".gz", 'wb') as f_out:
            f_out.writelines(f_in)

    @staticmethod
    def write_to_gzip(data,folder,file_name):
        data_list=map(list,data.values)
        idx_list=list(data.index)
        headerList=[data.index.name]+"\t"+"\t".join(list(data.columns))+"\n"
        writer = gzip.open(folder+fileName, 'w')
        writer.write(headerList)
        counter=0
        for idx,dat in zip(idx_list,data_list):
            next=idx+"\t"+"\t".join(dat)+"\n"
            writer.write(next)
            counter+=1
        writer.close()  

    @staticmethod
    def export_file(exportText,fileFolder,fileName,**kwargs):
        headerText=kwargs.get("headerText","")
        outputFile=open(fileFolder+fileName,"w")
        if headerText!="":
            outputFile.write(headerText)
        outputFile.write(exportText)
        outputFile.close()

    @staticmethod
    def open_in_chunks(folder,fileName,**kwargs):
        delim=kwargs.get("delim","\t")
        header=kwargs.get("header","yes")
        chunksize=kwargs.get("chunksize",10**6)
        compression=kwargs.get("compression",None)
        print_chunks = kwargs.get('print_chunks','no')
        chunkList=list()
        if compression==None:
            if header==None:
                count=0
                for chunk in pd.read_csv(folder+fileName, chunksize=chunksize,sep=delim,index_col=0,header=None):
                    if print_chunks == 'yes':
                        print(count)
                    chunkList.append(chunk)
                    count+=1
            else:
                count=0
                for chunk in pd.read_csv(folder+fileName, chunksize=chunksize,sep=delim,index_col=0):
                    if print_chunks == 'yes':
                        print(count)
                    chunkList.append(chunk)
                    count+=1
        else:
            if header==None:
                count=0
                for chunk in pd.read_csv(folder+fileName, chunksize=chunksize,sep=delim,compression=compression,index_col=0,header=None):
                    if print_chunks == 'yes':
                        print(count)
                    chunkList.append(chunk)
                    count+=1
            else:
                count=0
                for chunk in pd.read_csv(folder+fileName, chunksize=chunksize,sep=delim,compression=compression,index_col=0):
                    if print_chunks == 'yes':
                        print(count)
                    chunkList.append(chunk)
                    count+=1
        data=pd.concat(chunkList)
        return data

class plottingFacade:
    @staticmethod
    def boxplot(ax, df, **kwargs):
        vert = kwargs.get('vert', 1)
        width = kwargs.get('width', 0.4)
        pos_width = kwargs.get('pos_width', 0.2)
        ylim = kwargs.get('ylim','notgiven')
        color = kwargs.get('color', 'grey')
        alpha = kwargs.get('alpha', 0.6)
        ecolor = kwargs.get('ecolor','black')
        ylabel = kwargs.get('ylabel','ylabel')

        data_list = list()
        positions = list()
        max_list = list()
        min_list = list()
        for c,column in enumerate(list(df.columns)):
            sub_list = utilsFacade.finite(list(df[column]))
            data_list.append(sub_list)
            positions.append(c*(width + pos_width))
            max_list.append(max(sub_list))
            min_list.append(min(sub_list))

        max_ylim = max(max_list)
        min_ylim = min(min_list)
        if ylim=="notgiven":
            ylim = [min_ylim, max_ylim]

        bp = ax.boxplot(data_list, notch=0, sym="", vert=vert, patch_artist=True,
                        widths=[width]*len(data_list), positions=positions)
        plt.setp(bp['medians'], color="black")
        plt.setp(bp['whiskers'], color="black", linestyle="--", alpha=1)
        for i,patch in enumerate(bp['boxes']):
            patch.set_edgecolor(ecolor)
            patch.set_alpha(alpha)
            patch.set_color(color)
        ax.set_ylim(min_ylim, max_ylim)
        ax.set_ylabel(ylabel)
        plt.xticks(positions)
        ax.set_xticklabels(list(df.columns), rotation = 90, fontsize = 11)

    @staticmethod
    def savefig(output_basename, dpi=400, **kwargs):
        pdf = kwargs.get("pdf", True)
        png = kwargs.get("png", True)

        if pdf: # save always a copy in PDF
            if '.png' in output_basename:
                for format in (".png", ".pdf"):
                    print 'saving', output_basename.replace(".png", format)
                    plt.savefig(output_basename.replace(".png", format), dpi=dpi)
            else:
                for format in (".png", ".pdf"):
                    if 'png' in format and not png:
                        continue
                    print 'saving', output_basename + format
                    plt.savefig(output_basename + format, dpi=dpi)
        else:
            if '.png' in output_basename:
                for format in [".png",]:
                    print 'saving', output_basename.replace(".png", format)
                    plt.savefig(output_basename.replace(".png", format), dpi=dpi)
                    print 'saved at'
                    print os.path.abspath(output_basename.replace(".png", format))
            else:
                for format in [".png",]:
                    if 'png' in format and not png:
                        continue
                    print 'saving', output_basename + format
                    plt.savefig(output_basename + format, dpi=dpi)
                    print 'saved at'
                    print os.path.abspath(output_basename + format)

    @staticmethod
    def sendpdf(path, dest):
        print 'sending image...'
        from os import system
        cmd = "scp " + path + " " + dest
        print cmd
        os.system("scp " + path + " " + dest)
        print 'sending done...'

    @staticmethod
    def subplots_adjust(top, bottom, left, right, hspace, wspace):
        plt.subplots_adjust(top=top, bottom=bottom, left=left, right=right,
                            hspace=hspace, wspace=wspace)

    @staticmethod
    def despine_all():
        sns.despine(offset=10, trim=True, top=True, right=True, left=True,
                    bottom=True)

    @staticmethod
    def set_plot_labels(**kwargs):
        xlab = kwargs.get("x")
        ylab = kwargs.get("y")
        title = kwargs.get("title")
        if xlab is not None:
            plt.xlabel(xlab)
        if ylab is not None:
            plt.ylabel(ylab)
        if title is not None:
            plt.title(title)

    @staticmethod
    def add_trendline(ax,x,y,**kwargs):
        '''
        Given scatter-plot with x and y data,
        trendline of  red-color (alpha=1) is 
        drawn into the ax.
        :param ax,x,y:
        :return:
        '''
        color = kwargs.get('color','red')
        style = kwargs.get('style','-')
        alpha = kwargs.get('alpha',1)
        # calc the trendline
        z = numpy.polyfit(x, y, 1)
        p = numpy.poly1d(z)
        ax.plot(x, p(x), color = color, linestyle = style, alpha = alpha)
        # the line equation:
        print "y=%.6fx+(%.6f)"%(z[0],z[1])
        return "y=%.6fx+(%.6f)"%(z[0],z[1]), x, p(x)

    @staticmethod
    def func_vertical_plotDensities_border(ax,dataDensities,**kwargs):
        '''
        plots density graphs; only outline with specified width and color
        note that the densities here are drawn in a vertical fashion
        :param ax,dataDensities (nested list):
        :return:
        '''

        linewidth = kwargs.get("linewidth",3)
        labelName = kwargs.get("labelName","backgroundData")
        faceColor = kwargs.get("facecolor","grey")
        alpha = kwargs.get("alpha",0.8)

        if len(dataDensities)>10000:
            originalData = dataDensities
            dataDensities = utilsFacade.drawRandomSample(dataDensities,10000)
        print(len(dataDensities))
        xs1 = numpy.linspace(min(dataDensities),max(dataDensities),len(dataDensities))
        densityReady = gaussian_kde(dataDensities)
        densityReady.covariance_factor = lambda : .25
        densityReady._compute_covariance()
        ax.plot(densityReady(xs1),xs1, color=faceColor,
                alpha=alpha, label=labelName, linewidth=linewidth)

    @staticmethod
    def genVenn2(setList, **kwargs):
        '''
        prepares venn for matplotlib-venn
        :param setList:
        :return:
        '''
        labels = kwargs.get('labels',['1','2'])
        colors = kwargs.get('colors',['green','grey'])

        setA = setList[0]
        setB = setList[1]

        setAb=setA.difference(setB)
        setaB=setB.difference(setA)

        setAB=setA.intersection(setB)

        numSetA=len(setA)
        numSetB=len(setB)
        numSetAb=len(setAb)
        numSetaB=len(setaB)
        numSetAB=len(setAB)

        ## venn2 subsets(Ab, aB, AB)
        venn = matplotlib_venn.venn2(subsets = (numSetAb, numSetaB, numSetAB),
               set_colors=colors, set_labels=labels)
        #subsets=(numSetAb, numSetaB, numSetAB)
        #venn=self.venn2_circles(subsets,alpha=0.5, color='black', linestyle='--', linewidth=2.0)
        for text in venn.set_labels:
            text.set_fontsize(20)
        for text in venn.subset_labels:
            text.set_fontsize(20)
        return venn
    
    @staticmethod
    def genVenn3(setList,**kwargs):
        '''
        prepares venn for matplotlib-venn
        :param setList:
        :return:
        '''

        setNames = kwargs.get('labels',['1','2','3'])
        colors = kwargs.get('colors',['green','grey','orange'])

        setA = setList[0]
        setB = setList[1]
        setC = setList[2]
        setAB=setB.intersection(setA)
        setBC=setB.intersection(setC)
        setCA=setC.intersection(setA)
        setABC=setAB.intersection(setC)
        setAbc=setA.difference(setAB).difference(setCA)
        setaBc=setB.difference(setAB).difference(setBC)
        setabC=setC.difference(setCA).difference(setBC)
        setABc=setAB.difference(setC)
        setaBC=setBC.difference(setA)
        setAbC=setCA.difference(setB)    
        setAll=setA.union(setB).union(setC)
        numSetA=len(setA)
        numSetB=len(setB)
        numSetAB=len(setAB)
        numSetBC=len(setBC)
        numSetCA=len(setCA)
        numSetABC=len(setABC)
        numSetAbc=len(setAbc)
        numSetaBc=len(setaBc)
        numSetabC=len(setabC)
        numSetABc=len(setABc)
        numSetaBC=len(setaBC)
        numSetAbC=len(setAbC)
        numSetAll=len(setAll)
        ## venn3 subsets(Abc, aBc, ABc, abC, AbC, aBC, ABC)
        venn = matplotlib_venn.venn3(subsets = (numSetAbc, numSetaBc, numSetABc, numSetabC, numSetAbC, numSetaBC, numSetABC),
            set_labels = (setNames[0],setNames[1],setNames[2]), set_colors = colors)
        return venn

    @staticmethod
    def get_legendHandles(sizeList,quantAmplifier,**kwargs):
        '''
        given a list of sizes, and a factor (quantAmplifier), prepares
        handle-list and text for legend
        :param sizeList, quantAmplifier:
        :return:
        '''
        shape = kwargs.get('shape','o')
        alpha = kwargs.get("alpha",1)
        color = kwargs.get("color","white")
        edgecolor = kwargs.get("edgecolor","black")

        legendHandleList = list()
        labelSize = list()
        for quantMem in sizeList:
            legendHandleList.append(plt.scatter([],[], s = quantMem*quantAmplifier,
                color = color, edgecolor = edgecolor, alpha = alpha, marker = shape))
            labelSize.append(str(int(round(float(quantMem),0))))
        return legendHandleList,labelSize

    @staticmethod
    def autolabel_barplot(rects):
        # attach some text labels to bar plots
        for rect in rects:
            height = rect.get_height()
            ax.text(rect.get_x() + rect.get_width()/2., 
                    1.05*height,'%d' % int(height),ha='center',
                    va='bottom',fontsize=4)

    @staticmethod
    def func_plotHeatmap(ax_hmap, hmap_data, **kwargs):
        '''
        plots clustered heatmap
        :param ax_hmap, hmap_data:
        :return:
        '''
        cmap = kwargs.get("cmap",plt.cm.bwr)
        title = kwargs.get("title","title")
        ylabel = kwargs.get("ylabel","DATA INDEX")
        xlabel = kwargs.get('xlabel','DATA COLUMNS')
        metric = kwargs.get("metric","euclidean")
        linkageMethod = kwargs.get("linkageMethod","centroid")
        colorDict = kwargs.get("colorDict",dict())
        makeHierarchicalClustering = kwargs.get("makeHierarchicalClustering",True)
        savefig = kwargs.get('savefig',True)
        output_location = kwargs.get('output_location','')
        if savefig == True:
            if output_location == '':
                raise Exception('Specify output-location for graph')

        hmapList = map(list,hmap_data.values())
        hmapArray=numpy.asarray(hmapList)
        hmap_proteinLabelList = list(hmap_data.index)
        hmap_clusterLabelList = list(hmap_data.index)
        
        if makeHierarchicalClustering == True:
            Y = sch.linkage(hmapArray, method=linkageMethod,metric=metric)
            Z1 = sch.dendrogram(Y, no_plot=True, orientation="left")
            idx1 = Z1["leaves"]
            hmapArray = hmapArray[idx1,:]
            proteinLabelList = list()
            for i in idx1:
                proteinLabelList.append(hmap_proteinLabelList[i])
            hmap_proteinLabelList = proteinLabelList
        heatmap = plt.pcolor(hmapArray, cmap=cmap)
        ax_hmap.set_ylim(0, len(numpy.arange(hmapArray.shape[0])))
        ax_hmap.set_title(title, fontsize=10, loc="left")
        ax_hmap.set_yticks(numpy.arange(hmapArray.shape[0])+0.5, minor=False)
        ax_hmap.set_xticks(numpy.arange(hmapArray.shape[1])+0.5, minor=False)
        ax_hmap.set_yticklabels(hmap_proteinLabelList, minor=False,fontsize=10)
        ax_hmap.set_xticklabels(hmap_clusterLabelList, minor=False,fontsize=8)
        plt.tick_params(axis="x",which="both",bottom="off",top="off")
        plt.tick_params(axis="y",which="both",left="off",right="off") 
        cbar_hmap = plt.colorbar(heatmap)
        cbar_hmap.ax.get_yaxis().labelpad = 10
        cbar_hmap.ax.set_ylabel(ylabel, rotation=270,fontsize=9)
        if savefig == True:
            plt.savefig(output_location, bbox_inches = 'tight', dpi = 400)

    @staticmethod
    def prepare_legend_input(names,colors):
        '''
        given names and colors, a handle-list and a text-list is prepared
        :param names, colors:
        :return legendHandleList, legendTextList:
        '''
        legendHandleList = list()
        legendTextList = list()
        legendTextList = names
        for col in colors:
            legendHandleList.append(plt.Rectangle((0,0),1,1,fc=col))
        return legendHandleList,legendTextList

    @staticmethod
    def make_legend(ax,legendHandleList,legendTextList,**kwargs):
        '''
        given a handle-list and a text-list, as well as ax, the legend is drawn.
        :param ax, legendHandleList, legendTextList:
        :return:
        '''
        loc = kwargs.get("loc","best")
        ncol = kwargs.get('ncol',1)
        fontsize = kwargs.get("fontsize",12)
        frameon = kwargs.get("frameon",False)

        ax.legend(legendHandleList,legendTextList,loc=loc,
                  fontsize=fontsize,frameon=frameon,ncol = ncol)

    @staticmethod
    def make_full_legend(ax,names,colors,**kwargs):
        '''
        given names and colors+ax, prepares and draws the full legend
        :param ax, names, colors:
        :return legendHandleList, legendTextList:
        '''

        loc=kwargs.get("loc","best")
        ncol = kwargs.get('ncol',1)
        fontsize=kwargs.get("fontsize",12)
        frameon=kwargs.get("frameon",False)

        legendHandleList,legendTextList = plottingFacade.prepare_legend_input(names,colors)
        plottingFacade.make_legend(ax,legendHandleList,legendTextList,loc=loc,
            fontsize=fontsize,frameon=frameon,ncol = ncol)
        return legendHandleList,legendTextList

    @staticmethod
    def func_plotDensities(ax, dataDensities,**kwargs):
        '''
        plots density graphs; fully colored, with specified width and color
        :param ax,dataDensities (nested list):
        :return:
        '''

        labelName = kwargs.get("labelName","backgroundData")
        faceColor = kwargs.get("facecolor","grey")
        alpha = kwargs.get("alpha",0.3)

        if len(dataDensities)>20000:
            originalData = dataDensities
            dataDensities = utilsFacade.drawRandomSample(dataDensities,10000)
        print(len(dataDensities))
        xs1 = numpy.linspace(min(dataDensities),max(dataDensities),len(dataDensities))
        densityReady = gaussian_kde(dataDensities)
        densityReady.covariance_factor = lambda : .25
        densityReady._compute_covariance()
        ax.fill_between(xs1,densityReady(xs1), facecolor = faceColor,
                        alpha = alpha, label = labelName)

    @staticmethod
    def func_plotDensities_border(ax,dataDensities,**kwargs):
        '''
        plots density graphs; only outline with specified width and color
        :param ax,dataDensities (nested list):
        :return:
        '''

        linewidth = kwargs.get("linewidth",3)
        labelName = kwargs.get("labelName","backgroundData")
        faceColor = kwargs.get("facecolor","grey")
        linestyle = kwargs.get('linestyle','-')
        alpha = kwargs.get("alpha",0.8)
        if len(dataDensities)>10000:
            originalData = dataDensities
            dataDensities = utilsFacade.drawRandomSample(dataDensities,10000)
        print(len(dataDensities))
        xs1 = numpy.linspace(min(dataDensities),max(dataDensities),len(dataDensities))
        densityReady = gaussian_kde(dataDensities)
        densityReady.covariance_factor = lambda : .25
        densityReady._compute_covariance()
        ax.plot(xs1,densityReady(xs1), color=faceColor, alpha=alpha, 
                label=labelName, linewidth=linewidth, linestyle = linestyle)

    @staticmethod
    def func_plotCorrelationMatrix(ax_matrix,proteinData, altProteinList, **kwargs):
        '''
        plots density graphs; only outline with specified width and color
        :param ax,dataDensities (nested list):
        :return:
        '''
        cmap = kwargs.get("cmap",plt.cm.bwr)
        title = kwargs.get("title","title")
        ylabel = kwargs.get("ylabel","R (pearson)")
        metric = kwargs.get("metric","euclidean")
        makeClustering = kwargs.get("makeClustering",True)
        colorDict = kwargs.get("colorDict",dict())
        linkageMethod = kwargs.get('linkageMethod','average')
        metric = kwargs.get('metric','euclidean')
        mainProteinColors = kwargs.get('mainProteinColors',['black']*len(proteinData))

        originalAltProteinList = list(proteinData.index)
        proteinDataArray=numpy.asarray(proteinData)
        R = numpy.corrcoef(proteinDataArray)

        ax_matrix.set_title(title, fontsize=20, loc="center")
        if makeClustering == True:
            Y = sch.linkage(R, method=linkageMethod,metric=metric)
            Z1 = sch.dendrogram(Y, orientation='left',no_plot=True)
            Z2 = sch.dendrogram(Y,no_plot=True)
            idx1 = Z1["leaves"]
            idx2 = Z2["leaves"]
            hierarchicalAltProteinList = list()
            newMainProteinColors = list()
            for i in idx1:
                hierarchicalAltProteinList.append(altProteinList[i])
                newMainProteinColors.append(mainProteinColors[i])
            mainProteinColors = newMainProteinColors
            originalAltProteinList = hierarchicalAltProteinList
            R = R[idx1,:]
            R = R[:,idx2]
        heatmap = ax_matrix.imshow(R, cmap=plt.cm.YlGn, interpolation="none")
        cbar = plt.colorbar(heatmap, ticks=numpy.linspace(R.min(),R.max(),15,endpoint=True),
               pad=0.04,fraction=0.046)
        cbar.ax.get_yaxis().labelpad = 10
        cbar.ax.tick_params(labelsize=12) 
        cbar.ax.set_ylabel(ylabel, rotation=270,fontsize=20)
        ax_matrix.set_xlim(-0.5,len(originalAltProteinList)-0.5)
        ax_matrix.set_ylim(-0.5,len(originalAltProteinList)-0.5)
        ax_matrix.set_yticks(numpy.arange(len(originalAltProteinList)-0.5),originalAltProteinList)
        ax_matrix.set_xticks(numpy.arange(len(originalAltProteinList)-0.5),originalAltProteinList)
        plt.xticks(xrange(len(originalAltProteinList)))
        plt.yticks(xrange(len(originalAltProteinList)))
        xtickNames = plt.setp(ax_matrix,xticklabels=originalAltProteinList)
        plt.setp(xtickNames,rotation=90,fontsize=10)        
        ytickNames = plt.setp(ax_matrix,yticklabels=originalAltProteinList)
        plt.setp(ytickNames,fontsize=10)
        plt.tick_params(axis="x",which="both",bottom="off",top="off")
        plt.tick_params(axis="y",which="both",left="off",right="off")
        return {"corrMatrix":R,"orderedProteins":originalAltProteinList}

    @staticmethod
    def func_plotBarPlot(ax,nestedData,groupLabelList,**kwargs):
        """
        This function can visualize barplots.
        :params ax, nestedData, groupLabelList:
        :return:
        """
        widthBars = kwargs.get("widthBars",0.35)
        widthBetweenBars = kwargs.get("widthBetweenBars",0.15)
        showLegend = kwargs.get("showLegend",True)
        orientation = kwargs.get("orientation","vertical")
        colorPalette = kwargs.get('colors',colorFacade.makeColorPalette())
        autolabeling = kwargs.get('autolabeling',True)

        groupNum = len(nestedData)
        ind = numpy.arange(groupNum) # the x locations for the groups

        overallGroupList = list()
        for i in xrange(len(nestedData[0])):
            groupList = list()
            for j in xrange(len(nestedData)):
                groupList.append(nestedData[j][i])
            overallGroupList.append(groupList)
        
        groupCount = 0
        legendHandleList = list()
        legendTextList = list()
        for group in overallGroupList:
            rects = ax.bar(ind, group, widthBars, 
                    orientation=orientation, 
                    color=colorPalette[groupCount])
            legendTextList.append(groupLabelList[groupCount])
            legendHandleList.append(rects)
            groupCount+=1
        ax.set_xticks(ind + widthBetweenBars)
        ax.set_xticklabels(groupLabelList,fontsize=5,rotation=90)
        if showLegend==True:
            leg = plt.legend(legendHandleList,legendTextList, prop={"size":8}, loc="upper left")
            plt.gca().add_artist(leg)
            plt.setp(leg.get_frame().set_edgecolor('grey'))
            plt.setp(leg.get_frame().set_linewidth(1.0))
        if autolabeling==True:
            plottingFacade.autolabel_barplot(rects)
        plt.tight_layout()
        return overallGroupList

    @staticmethod
    def vennPlotOverlap3Exp(setList, **kwargs):
        '''
        draws actuall venn from 3 sets
        :param names, colors:
        :return legendHandleList, legendTextList:
        '''
        nameList =  kwargs.get('labels',['1','2','3'])
        colorList = kwargs.get('colors',['green','grey','orange'])
        fileTitle = kwargs.get('fileTitle','')
        outputFolder = kwargs.get("outputFolder","")
        fileLocation = kwargs.get(fileLocation,outputFolder + '_VennDiagram_' + fileTitle + '.jpg')

        set1 = setList[0]
        set2 = setList[1]
        set3 = setList[2]

        overlappingSet = set1.intersection(set2).intersection(set3)
        overlappingProteinList = []
        for proteinPhosSiteGroup in overlappingSet:
            overlappingProteinList.append(proteinPhosSiteGroup.split(':')[0])
        numOverlappingProteins = len(set(overlappingProteinList))

        plt.clf()
        vennDiagram = plottingFacade.genVenn3(setList, labels = nameList, colors = colorList)
        plt.title('Overlap of ' + fileTitle + ':\n' + 'The overlap corresponds to ' + str(numOverlappingProteins) + ' Proteins')
        plt.tight_layout()
        plt.savefig(fileLocation, bbox_inches="tight")
        plt.clf()

    @staticmethod
    def plot_SampleBootstrapNormal(myList, **kwargs):
        """
        This function makes a resampling of the given list 
        and tries to fit a normal curve onto the resampled
        densities and plots them as well.
        The function returns all values.
        STILL: illustrate that the method is more or 
        less "legal" for these kind of non-parametric distributions.
        """
        save_file = kwargs.get('save_file',True)
        output_location = kwargs.get('output','/g/scb2/bork/romanov/wpc/bootstrap_output.pdf')

        resampledList = statsFacade.bootstrap_resample(numpy.asarray(myList),1000)
        mu,std = scipy.stats.norm.fit(resampledList)
        plottingFacade.func_plotDensities(resampledList)
        plottingFacade.func_plotDensities(numpy.asarray(myList))
        xmin,xmax = plt.xlim()
        x = numpy.linspace(xmin,xmax,100)
        p = scipy.stats.norm.pdf(x,mu,std)
        plt.plot(x,p,"k",linewidth=2)
        if save_file==False:
            plt.show()
        else:
            plt.savefig(output_location, bbox_inches = 'tight')
        plt.clf()
        return resampledList,mu,std,p

class systemAnalyzer:
    @staticmethod
    def delete_file(path):
        print('deleting file: ' + path)
        os.remove(path)
    
    @staticmethod
    def delete_folder(path):
        print('delete folder: ' + path)
        shutil.rmtree(path)

    @staticmethod
    def make_folder(path, **kwargs):
        ignore_duplicate = kwargs.get('ignore_duplicate',False)

        print('make_folder: ' + path)
        if ignore_duplicate==True:
            os.makedirs(path)
        else:
            if not os.path.exists(path):
                os.makedirs(path)
            else:
                raise Exception('Folder already exists!')

    @staticmethod
    def create_random_file(folder_path, **kwargs):
        fileName = kwargs.get('fileName','random_fileName')

        print('create random file in:' + folder_path)
        times = None
        with open(folder_path + fileName, 'a'):
            os.utime(folder_path + fileName, times)

    @staticmethod
    def check_path_existence(path):
        print('path existence: ' + path)
        return os.path.exists(path)

    @staticmethod
    def get_filesize(path):
        return os.path.getsize(path)

    @staticmethod
    def get_file_creationDate(path):
        return os.path.getctime(path)

    @staticmethod
    def get_file_modificationDate(path):
        return os.path.getmtime(path)

class Mapper(object):
    def __init__(self,identifiers,**kwargs):
        self.input_data=kwargs.get("input",'ensembl.gene,symbol,reporter,accession')
        self.output_data=kwargs.get("output",'ensembl.gene,entrezgene,uniprot,summary,ipi')
        self.species=kwargs.get("species","human")

        print("map_identifiers")
        self.hitDict,self.trans_df = self.map_identifiers(identifiers)
        mg = mygene.MyGeneInfo()
        self.fields = mg.get_fields()

    def map_identifiers(self,identifiers):
        """
        mapping given identifiers to biological databases
        returns 
        """
        mg = mygene.MyGeneInfo()
        trans_df = mg.querymany(identifiers,
                                scopes = self.input_data,
                                fields = self.output_data,
                                species = self.species,
                                as_dataframe = True, returnall = True)
        trans_df = trans_df["out"]
        hitDict = trans_df.to_dict()
        return hitDict,trans_df

class statsFacade:
    @staticmethod
    def TransformData(data_vector):
        m = np.mean(data_vector)
        sd = np.std(data_vector)
        s = [(d-m)/sd for d in data_vector]
        W = lambda x: -2*np.log(ECDF(s)(x))
        return np.array([W(x) for x in s])

    @staticmethod
    def CalculateCovariances(data_matrix):
        transformed_data_matrix = np.array([statsFacade.TransformData(f) for f in data_matrix])
        covar_matrix = np.cov(transformed_data_matrix)
        return covar_matrix                                                                     
    
    @staticmethod 
    def CombinePValues(covar_matrix, p_values, extra_info = False):
        m = int(covar_matrix.shape[0])
        #print "m", m
        df_fisher = 2.0*m
        Expected = 2.0*m
        cov_sum = 0
        for i in range(m):
            for j in range(i+1, m):
                cov_sum += covar_matrix[i, j]

        #print "cov sum", cov_sum
        Var = 4.0*m+2*cov_sum
        c = Var/(2.0*Expected)
        df_brown = 2.0*Expected**2/Var
        if df_brown > df_fisher:
            df_brown = df_fisher
            c = 1.0

        x = 2.0*sum([-np.log(p) for p in p_values])
        #print "x", x
        p_brown = chi2_cdf(df_brown, 1.0*x/c)
        p_fisher = chi2_cdf(df_fisher, 1.0*x)

        if extra_info:
            return p_brown, p_fisher, c, df_brown
        else:
            return p_brown

    @staticmethod
    def get_combined_pvalues(quant_array,pvalues, **kwargs):
        extra_info = kwargs.get('extra_info', False)

        covar_matrix = statsFacade.CalculateCovariances(quant_array)
        combined_pval = statsFacade.CombinePValues(covar_matrix, pvalues, extra_info = extra_info)
        return combined_pval

    @staticmethod
    def kMedoids(D, k, tmax=100):
        # determine dimensions of distance matrix D
        m, n = D.shape

        if k > n:
            raise Exception('too many medoids')
        # randomly initialize an array of k medoid indices
        M = np.arange(n)
        np.random.shuffle(M)
        M = np.sort(M[:k])

        # create a copy of the array of medoid indices
        Mnew = np.copy(M)

        # initialize a dictionary to represent clusters
        C = {}
        for t in xrange(tmax):
            # determine clusters, i. e. arrays of data indices
            J = np.argmin(D[:,M], axis=1)
            for kappa in range(k):
                C[kappa] = np.where(J==kappa)[0]
            # update cluster medoids
            for kappa in range(k):
                J = np.mean(D[np.ix_(C[kappa],C[kappa])],axis=1)
                j = np.argmin(J)
                Mnew[kappa] = C[kappa][j]
            np.sort(Mnew)
            # check for convergence
            if np.array_equal(M, Mnew):
                break
            M = np.copy(Mnew)
        else:
            # final update of cluster memberships
            J = np.argmin(D[:,M], axis=1)
            for kappa in range(k):
                C[kappa] = np.where(J==kappa)[0]

        # return results
        return M, C

    @staticmethod
    def z_transform(quant_data):
        quant_cols = list(quant_data.columns)
        zscore_dat_list = list()
        for col in quant_cols:
            dat = np.array(quant_data[col])
            fin_dat = np.array(filter(lambda a:np.isfinite(a),dat))
            zscores = (dat - fin_dat.mean())/fin_dat.std(ddof=0)
            #zscores = scipy.stats.zscore(dat)
            zscore_dat_list.append(zscores)
        zscore_dat = pd.DataFrame(zscore_dat_list).T
        zscore_dat.columns = quant_data.columns
        zscore_dat.index = quant_data.index
        return zscore_dat

    @staticmethod
    def get_iqr(x):
        q75, q25 = np.percentile(x, [75 ,25])
        iqr = q75 - q25
        return iqr, q25, q75

    @staticmethod
    def cohen_d(x,y):
        '''
        calculates cohen distance (effect size) between two distributions x and y
        '''
        nx = len(x)
        ny = len(y)
        dof = nx + ny - 2
        if len(x)==len(y):
            d = (mean(x) - mean(y)) / sqrt((std(x, ddof=1) ** 2 + std(y, ddof=1) ** 2) / 2.0)
        else:
            d=(mean(x) - mean(y)) / sqrt(((nx-1)*std(x, ddof=1) ** 2 + (ny-1)*std(y, ddof=1) ** 2) / dof)
        return d

    @staticmethod
    def bootstrap_resample(X, n=None):
        """ Bootstrap resample an array_like
        Parameters
        ----------
        X : array_like
          data to resample
        n : int, optional
          length of resampled array, equal to len(X) if n==None
        Results
        -------
        returns X_resamples
        """
        if n == None:
            n = len(X)
        resample_i = numpy.floor(numpy.random.rand(n)*len(X)).astype(int)
        X_resample = X[resample_i]
        return X_resample

    @staticmethod
    def rsquared(x, y):
        """ Return R^2 where x and y are array-like."""
        slope, intercept, r_value, p_value, std_err = scipy.stats.linregress(x, y)
        return r_value**2

    @staticmethod
    def checkForBimodality(sample):
        """
        The function checks on bimodality in the given sample.
        """
        g = mixture.GMM(n_components=2)
        g.fit(sample)
        weights = numpy.round(g.weights_, 2)
        means = numpy.round(g.means_, 2)
        return{"weights":weights,"means":means}

    @staticmethod
    def corrcoef(matrix):
        """
        making the correlation matrix, but giving only the correlation values
        """
        r = numpy.corrcoef(matrix[0:1000])
        rf = r[numpy.triu_indices(r.shape[0], 1)]
        return rf

    @staticmethod
    def getSampleZScore(inputArray,sample):
        """
        compute z-score for all values against the standard normal
        """ 
        mu = inputArray.mean()
        std = inputArray.std()
        zSample = (sample-mu)/std
        return zSample

    @staticmethod
    def get_empirical_fdr(df):
        '''
        Calculate an FDR
        :param df: p.val and label (1, 0) have to be defined
        :return:
        '''
        df = df.sort_values('p.val', ascending=True)
        df = df.reset_index(drop=True)
        fdr_by_score = {}
        tp = 0
        fp = 0
        fdrs = []
        for ri, r in df.iterrows():
            pval, label = r.values
            tp += 1 if int(label) == 1 else 0
            fp += 1 if int(label) == 0 else 0
            fdr = float(fp) / float(fp + tp)
            fdrs.append(fdr)

        df['FDR'] = fdrs
        return df

    @staticmethod
    def correct_nan_pvalues(pvalues,**kwargs):
        '''
        Benjamini Hochberg/etc. correction in vector of p-values
        where also nan's occur.
        '''
        fdr_method = kwargs.get("method","fdr_bh")
        alpha = kwargs.get("alpha",0.1)

        non_nan_pvalues = utilsFacade.finite(pvalues)
        pvalCorrs = statsFacade.correct_pvalues(non_nan_pvalues)
        pval_dict = dict()
        for pval,pvalCorr in zip(non_nan_pvalues, pvalCorrs):
            pval_dict[pval] = pvalCorr
        pvals_adjusted = list()
        for pval in pvalues:
            try:
                pvals_adjusted.append(pval_dict[pval])
            except:
                pvals_adjusted.append(np.nan)
        return pvals_adjusted

    @staticmethod
    def correct_pvalues(pvalues,**kwargs):
        fdr_method = kwargs.get("method","fdr_bh")
        alpha = kwargs.get("alpha",0.1)

        pValues = numpy.array(pvalues)
        _, pValuesCorr, _, _ = statsmodels.sandbox.stats.multicomp.multipletests(numpy.array(pValues),
            alpha=alpha, method=fdr_method)
        return pValuesCorr

    @staticmethod
    def zscore_to_pvalue(z_scores):
        p_values1 = scipy.stats.norm.sf(abs(z_scores)) #one-sided
        p_values2 = scipy.stats.norm.sf(abs(z_scores))*2 #twosided
        return p_values1,p_values2

    @staticmethod
    def performKolmogorovSmirnovTest(distribution1,distribution2,**kwargs):
        """
        Given the two distributions (arrays) we can perform a two-sampled KS-test using stats.ks_2samp.
        """
        considerSampling = kwargs.get("considerSampling",False)
        if considerSampling==True:
            if len(distribution1)>len(distribution2):
                sampledDistribution1 = random.sample(distribution1,len(distribution2))
                distribution1 = sampledDistribution1
            else:
                sampledDistribution2 = random.sample(distribution2,len(distribution1))
                distribution2 = sampledDistribution2
        ksResult = stats.ks_2samp(distribution1,distribution2)
        return ksResult

    @staticmethod
    def performUnpairedTest(distribution1,distribution2,**kwargs):
        """
        Given the two distributions (arrays) we can perform an Unpaired Test using stats.ttest_ind.
        """
        considerSampling = kwargs.get("considerSampling",False)
        if considerSampling==True:
            if len(distribution1)>len(distribution2):
                sampledDistribution1 = random.sample(distribution1,len(distribution2))
                distribution1 = sampledDistribution1
            else:
                sampledDistribution1 = random.sample(distribution2,len(distribution1))
                distribution2 = sampledDistribution1
        ptResult = stats.ttest_ind(distribution1,distribution2,equal_var=True)
        return ptResult

