Source code for cellmaps_utils.music_utils

import numpy as np
import pandas as pd
import pickle
import dill
from sklearn.metrics.pairwise import manhattan_distances, euclidean_distances, cosine_similarity
from scipy.spatial.distance import canberra



[docs]
def upper_tri_values(df):
    """
    Return array with values of upper triangle of the DataFrame

    :param df: Symmetric DataFrame
    :type df: :py:class:`pandas.DataFrame`
    :return:
    :rtype: :py:func:`numpy.array`
    """
    m = df.values
    return m[np.triu_indices(df.shape[0], k=1)]




[docs]
def znorm(df):
    """
    Z-transform within each column.

    :param df:
    :return:
    :rtype: :py:class:`pandas.DataFrame`
    """
    norm_df = pd.DataFrame(index=df.index, columns=df.columns)
    for c in df.columns:
        value = df[c]
        norm_df[c] = (value - value.mean()) / value.std()
    return norm_df




[docs]
def cosine_similarity_scaled(df):
    """
    Calculate Cosine similarity between each pair of rows in a DataFrame.
    Similarity scaled into [0, 1]

    :param df:
    :return:
    :rtype: :py:class:`pandas.DataFrame`
    """
    sim = cosine_similarity(df)
    shift = sim.min()
    sim -= shift
    scale = sim.max()
    sim /= scale
    return pd.DataFrame(sim, index=df.index.values, columns=df.index.values)




[docs]
def manhattan_similarity(df):
    """
    Calculate Manhattan similarity between each pair of rows in a DataFrame.
    Similarity scaled into [0, 1]

    :param df:
    :return:
    :rtype: :py:class:`pandas.DataFrame`
    """
    # Get manhattan distance
    dist = manhattan_distances(df)
    # Convert distance to similarity by max-minus
    sim = dist.max() - dist
    # Scale into [0,1]
    sim /= sim.max()
    return pd.DataFrame(sim, index=df.index.values, columns=df.index.values)




[docs]
def euclidean_similarity(df):
    """
    Calculate Euclidean similarity between each pair of rows in a DataFrame.
    Similarity scaled into [0, 1]

    :param df:
    :return:
    :rtype: :py:class:`pandas.DataFrame`
    """
    # Get euclidean distance
    dist = euclidean_distances(df)
    # Convert distance to similarity by max-minus
    sim = dist.max() - dist
    # Scale into [0,1]
    sim /= sim.max()
    return pd.DataFrame(sim, index=df.index.values, columns=df.index.values)




[docs]
def canberra_similarity(df):
    """
    Calculate Canberra similarity between each pair of rows in a DataFrame.
    Similarity scaled into [0, 1]

    :param df:
    :return:
    :rtype: :py:class:`pandas.DataFrame`
    """
    index = df.index.values
    dist = pd.DataFrame(0, index=index, columns=index, dtype=float)
    for i in range(len(index) - 1):
        a = df.loc[index[i]].values
        for j in range(i + 1, len(index)):
            b = df.loc[index[j]].values
            d = canberra(a, b)
            dist.at[index[i], index[j]] = d
            dist.at[index[j], index[i]] = d
    dist = dist.values
    # Convert distance to similarity by max-minus
    sim = dist.max() - dist
    # Scale into [0,1]
    sim /= sim.max()
    return pd.DataFrame(sim, index=index, columns=index)




[docs]
def pearson_scaled(df):
    """
    Calculate Pearson correlation between each pair of rows in a DataFrame.
    Correlation scaled into [0, 1]

    :param df:
    :return:
    :rtype: :py:class:`pandas.DataFrame`
    """
    corr = df.T.corr(method='pearson')
    shift = corr.min().min()
    corr -= shift
    scale = corr.max().max()
    corr /= scale
    return corr




[docs]
def spearman_scaled(df):
    """
    Calculate Spearman correlation between each pair of rows in a DataFrame.
    Correlation scaled into [0, 1]

    :param df:
    :return:
    """
    corr = df.T.corr(method='spearman')
    shift = corr.min().min()
    corr -= shift
    scale = corr.max().max()
    corr /= scale
    return corr




[docs]
def kendall_scaled(df):
    """
    Calculate Kendall correlation between each pair of rows in a DataFrame.
    Correlation scaled into [0, 1]

    :param df:
    :return:
    """
    corr = df.T.corr(method='kendall')
    shift = corr.min().min()
    corr -= shift
    scale = corr.max().max()
    corr /= scale
    return corr




[docs]
def check_symmetric(a, rtol=1e-05, atol=1e-08):
    """
    Check if the given numpy matrix is symmetric or not.

    :param a:
    :param rtol:
    :param atol:
    :return:
    """
    return np.allclose(a, a.T, rtol=rtol, atol=atol)




[docs]
def save_obj(obj, fname, method='pickle'):
    """

    :param obj: object that want to be saved
    :param fname: path to saved file
    :type fname: str
    :param method: {pickle, dill} specify package used for compressing
    :type method: str
    :raises ValueError: if **method** is not set to ``pickle`` or ``dill``
    """
    with open(fname, 'wb') as f:
        if method == 'pickle':
            pickle.dump(obj, f)
        elif method == 'dill':
            dill.dump(obj, f)
        else:
            raise ValueError('Please select method from {pickle, dill}!')
    return




[docs]
def load_obj(fname, method='pickle'):
    """
    Loading object that was saved in pickle format

    :param fname: path to file
    :type fname: str
    :param method: {pickle, dill} specify package used for compressing
    :type method: str
    :raises ValueError: if **method** is not set to ``pickle`` or ``dill``
    """
    with open(fname, 'rb') as f:
        if method == 'pickle':
            return pickle.load(f)
        elif method == 'dill':
            return dill.load(f)
        else:
            raise ValueError('Please select method from {pickle, dill}!')
        return




[docs]
def jaccard(setA, setB):
    """
    Calculates jaccard

    :param setA:
    :param setB:
    :return:
    """
    return len(setA.intersection(setB)) / len(setA.union(setB))




[docs]
def scaled_P_to_nm(scaled_P):
    """
    TODO: Add doc here

    :param scaled_P:
    :return:
    """
    power = -3.968 * scaled_P + 4.326
    return 10 ** power




[docs]
def num_comb(x):
    """
    TODO: Add doc here

    :param x:
    :return:
    """
    return x * (x - 1) / 2