Source code for BuildYourOwnEmbedding.functional

import numpy as np
import numpy.typing as npt
from typing import List, Union, Tuple
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA

##! =====================================
##!          Similarity Metrics:
##! =====================================


[docs] def inverse_correlation( x1: npt.NDArray, x2: npt.NDArray, dtype: npt.DTypeLike = np.float64 ) -> np.number: """Computes the inverse correlation between two numpy arrays. This function calculates the inverse of the Pearson correlation coefficient between two input arrays, `x1` and `x2`, and returns the result cast to the specified `dtype`. Args: x1 (npt.NDArray): The first input array x2 (npt.NDArray): The second input array dtype (npt.DTypeLike, optional): The desired data type of the result. Defaults to np.float64. Returns: np.number: The inverse correlation coefficient between `x1` and `x2`, cast to the specified `dtype`. """ return dtype(1 - np.corrcoef(x1, x2)[0, 1])
[docs] def manhattan_distance( x1: npt.NDArray, x2: npt.NDArray, dtype: npt.DTypeLike = np.float64 ) -> np.number: """Computes the Manhattan distance between two numpy arrays. This function calculates the Manhattan distance (L1 norm), between two input arrays, `x1` and `x2`, and returns the result cast to the specified `dtype`. Args: x1 (npt.NDArray): The first input array. x2 (npt.NDArray): The second input array. dtype (npt.DTypeLike, optional): The desired data type of the result. Defaults to np.float64. Returns: np.number: The Manhattan distance between `x1` and `x2`, cast to the specified `dtype`. """ return dtype(np.sum(np.abs(x1 - x2)))
[docs] def euclidian_distance( x1: npt.NDArray, x2: npt.NDArray, dtype: npt.DTypeLike = np.float64 ) -> np.number: """Computes the Euclidian distance between two numpy arrays. This function calculates the Euclidian distance (L2 norm), between two input arrays, `x1` and `x2`, and returns the result cast to the specified `dtype`. Args: x1 (npt.NDArray): The first input array. x2 (npt.NDArray): The second input array. dtype (npt.DTypeLike, optional): The desired data type of the result. Defaults to np.float64. Returns: np.number: The Euclidian distance between `x1` and `x2`, cast to the specified `dtype`. """ return dtype(np.linalg.norm(x1 - x2))
[docs] def cosine_similarity( x1: npt.NDArray, x2: npt.NDArray, dtype: npt.DTypeLike = np.float64 ) -> np.number: """Computes the cosine similarity between two numpy arrays. This function calculates the cosine similarity, between two input arrays, `x1` and `x2`, and returns the result cast to the specified `dtype`. If the input arrays are not 1D, they are flattened and then the cosine similarity is computed. Args: x1 (npt.NDArray): The first input array. x2 (npt.NDArray): The second input array. dtype (npt.DTypeLike, optional): The desired data type of the result. Defaults to np.float64. Returns: np.number: The cosine similarity between `x1` and `x2`, cast to the specified `dtype`. """ x1Flat = np.hstack(x1) x2Flat = np.hstack(x2) return dtype( np.dot(x1Flat, x2Flat) / (np.linalg.norm(x1Flat) * np.linalg.norm(x2Flat)) )
##! ===================================== ##! RDM-Related Functions: ##! =====================================
[docs] def plot_rdm( rdm: npt.NDArray, labels: Union[List[str], None] = None, cmap: str = "viridis", title: str = None, figsize: Tuple[int] = (7, 7), dissimilarityLabel: str = "Dissimilarity", ) -> None: """Plots a Representational Dissimilarity Matrix (RDM). This function visualises a given RDM given as a square matrix that represents the pairwise dissimilarities between different data points or conditions. The function provides customisation options for colormap, title, axis labels, and figure size, allowing users to tailor the appearance of the plot. Args: rdm (npt.NDArray): A 2D numpy array representing the RDM, where the element at position `(i,j)` indicates the dissimilarity between response `i` and response `j`. labels (Union[List[str], None], optional): A list of string labels for the x and y axes, corresponding to each data point. If `None`, the axes will be labeled with indices. Defaults to `None`. cmap (str, optional): The colormap used to display the RDM. The default colormap is "viridis", but any valid Matplotlib colormap string can be used. title (str, optional): The title of the plot. If `None`, a default title "RDM of Tuning Curves" will be used. figsize (Tuple[int], optional): The size of the figure in inches, specified as a tuple `(width, height)`. Defaults to `(7, 7)`. dissimilarityLabel (str, optional): The label for the colorbar, indicating the metric used for dissimilarity. Defaults to "Dissimilarity". """ title = "RDM of Tuning Curves" if title is None else title fig: plt.Figure = plt.figure(figsize=figsize) plt.imshow(rdm, cmap=cmap) plt.colorbar(label=dissimilarityLabel) plt.title(title) if labels is not None: plt.xticks(range(len(labels)), labels, rotation=75, ha="left") plt.yticks(range(len(labels)), labels) plt.show()
##! ===================================== ##! Dimensionality Reduction: ##! =====================================
[docs] def mds(dissimilarityMatrix: npt.NDArray, nComponents: int = 3) -> npt.NDArray: """Perform classical Multidimensional Scaling (MDS) on a dissimilarity matrix. This function implements classical MDS, a technique for visualising the level of similarity of dissimilarity between sets of data. Given a square dissimilarity matrix, the function computes a low-dimensional embedding of the data by finding the eigenvectors and eigenvalues of the matrix, resulting in a set of coordinates that best preserve the pairwise distances in the original matrix. This function implements an algorithm that follows these steps: 1. Constructs a centreing matrix `J` and computes the double-centred matrix `G`. 2. Calculates the eigenvalues and eigenvectors of `G`. 3. Sorts the eignevectors based on their corresponding eigenvalues in descending order. 4. Selects the top `nComponents` eigenvectors corresponding to the largest eigenvalues. 5. Computes the coordinates of the points in the reduced dimensionality space. Args: dissimilarityMatrix (npt.NDArray): A square matrix of shape `(n, n)` representing the dissimilarities between `n` samples. Each element at position `(i, j)` represents the dissimilarity between the `i-th` and `j-th` samples. nComponents (int, optional): The number of dimensions for the output embedding. Defaults to 3. Returns: npt.NDArray: An array of shape `(n, nComponents)` containing the coordinates of the points in the reduced dimensionality space, where `n` is the number of samples and `nComponents` is the number of dimensions specified. Example: >>> dissimilarity_matrix = np.array([[0.0, 0.5, 0.2], [0.5, 0.0, 0.8], [0.2, 0.8, 0.0]]) >>> embedding = mds(dissimilarity_matrix, nComponents=2) >>> print(embedding) Raises: ValueError: If `dissimilarityMatrix` is not a square matrix. """ # used notes from: https://www.sjsu.edu/faculty/guangliang.chen/Math253S20/lec9mds.pdf numSamples: int = dissimilarityMatrix.shape[0] # compute G matrix: J = np.eye(numSamples) - np.ones((numSamples, numSamples)) / numSamples G = -0.5 * J @ (dissimilarityMatrix**2) @ J # find eigenvalues and eigenvectors: eigenValues, eigenVectors = np.linalg.eigh(G) # sort eigenvectors based on their eigenvalues in descending order: indices = np.argsort(eigenValues) # ascending order indices = indices[::-1] # descending order eigenValues = eigenValues[indices] eigenVectors = eigenVectors[:, indices] # get top nComponents: topNEigenValues = eigenValues[:nComponents] topNEigenVectors = eigenVectors[:, :nComponents] # compute coordinates of points: return topNEigenVectors * np.sqrt(topNEigenValues)
[docs] def pca_variance_explained(dissimilarityMatrix: npt.NDArray) -> npt.NDArray: """Computes PCA Variance Explained Ratios given a dissimilarity matrix. Args: dissimilarityMatrix (npt.NDArray): A square matrix of shape `(n, n)` representing the dissimilarities between `n` samples. Each element at position `(i, j)` represents the dissimilarity between the `i-th` and `j-th` samples. Returns: npt.NDArray: An array of shape `n` containing the PCA variance explained ratios of the samples in the input matrix. """ pca = PCA() pca.fit(dissimilarityMatrix) return pca.explained_variance_ratio_
[docs] def pca(matrix: npt.NDArray, nComponents) -> npt.NDArray: """Computes the principal components of a given 2d matrix Args: matrix (npt.NDArray): A 2D array of the responses nComponents (_type_): The number of principal components to consider Returns: npt.NDArray: A n x `nComponents` array, containing the values of the principal components. """ pca = PCA(n_components=nComponents) return pca.fit_transform(matrix)
##! ===================================== ##! Information: ##! =====================================
[docs] def fisher_information(curves: npt.NDArray) -> npt.NDArray: """Calculates the Fisher Information for a set of tuning curves. This function supports both 1-D and arbitrary-dimensional tuning curves Args: curves (npt.NDArray): A numpy array of the shape (numCurves, dim1, dim2, ..., dimN) representing the tuning curves, where `numCurves` is the number of curves and `dim1, dim2, ..., dimN` are the dimensions of each response. Raises: ValueError: If the dimensionality of `curves` is not greater than a 2-dimensional array. Returns: npt.NDArray: A numpy array representing the FI values, with shape (dim1, dim2, ..., dimN) for the input curves. """ if len(curves.shape) < 2: raise ValueError("curves must be at least two dimensions") if len(curves.shape) == 2: fiValues = np.zeros(curves[0].shape) for i in range(curves.shape[0]): derivative = np.gradient(curves[i]) fi = derivative**2 fiValues += fi return fiValues else: fiValues = np.zeros(curves.shape[1:]) for i in range(curves.shape[0]): derivatives = np.gradient(curves[i]) fi = sum(derivative**2 for derivative in derivatives) fiValues += fi return fiValues
[docs] def mutual_information( response: npt.NDArray, stimulus: npt.NDArray, ) -> np.number: """Computes the mutual information between a single neural response and its stimulus. Args: response (npt.NDArray): Neural response with shape (dim1, dim2, ..., dimN). stimulus (npt.NDArray): Corresponding stimulus with shape (dim1, dim2, ..., dimN). Raises: ValueError: If the stimulus and response are different dimensions. Returns: np.number: The mutual information between the response and the stimulus. """ if response.shape != stimulus.shape: raise ValueError("Response and Stimulus must have the same dimensions") responseFlat = response.flatten() stimulusFlat = stimulus.flatten() # compute joint distribution: jointValues = np.stack((responseFlat, stimulusFlat), axis=-1) uniqueJoint, countsJoint = np.unique(jointValues, axis=0, return_counts=True) jointProbabilities = countsJoint / len(responseFlat) # Compute marginal distributions uniqueResponse, countsResponse = np.unique(responseFlat, return_counts=True) probResponse = countsResponse / len(responseFlat) uniqueStimulus, countsStimulus = np.unique(stimulusFlat, return_counts=True) probStimulus = countsStimulus / len(stimulusFlat) responsePDict = {val: probResponse[i] for i, val in enumerate(uniqueResponse)} stimulusPDict = {val: probStimulus[i] for i, val in enumerate(uniqueStimulus)} mi = 0.0 for i, jointVal in enumerate(uniqueJoint): respVal, stimVal = jointVal joint_prob = jointProbabilities[i] if joint_prob > 0 and responsePDict[respVal] > 0 and stimulusPDict[stimVal] > 0: mi += joint_prob * np.log( joint_prob / (responsePDict[respVal] * stimulusPDict[stimVal]) ) return mi