Source code for gridlib.fit

"""
Module for GRID and multi-exponential fitting.
"""

import math
from typing import Tuple, Dict, Union

import numpy as np
import scipy.optimize

import gridlib.calc as calc
import gridlib.data_utils as data_utils


def create_fixed_grid(
    k_min: float, k_max: float, n: int, scale: str
) -> Tuple[np.ndarray, np.ndarray]:
    """
    Function creates the fixed grid based on the provided parameters.

    Parameters
    ----------
    k_min : float
        Minimum decay rate value.
    k_max : float
        Maximum decay rate value.
    n : int
        Number of positions in the grid.
    scale : {"log", "linear"}
        The scale to be applied for the creation of the grid. "log" will create a fixed
        grid with a logarithmic scale, while "linear" will create a fixed grid with a
        linear scale.

    Returns
    -------
    k: np.ndarray
        The fixed grid positions based on the given parameter values.

    Raises
    ------
    ValueError
        If scale is not one of the allowed options.
    """

    if scale not in {"log", "linear"}:
        raise ValueError("Scale should be either 'log' or 'linear'.")

    # Create the grid with fixed spacing between every position
    if scale == "linear":
        k = np.linspace(k_min, k_max, num=n, endpoint=True)
    elif scale == "log":
        k = np.geomspace(k_min, k_max, num=n, endpoint=True)

    return k


# Module for GRID fitting, multi-exponential fitting, and GRID resampling
def con_eq(x0: np.ndarray):
    """Constraint equality: makes sure all the amplitudes sum to 1."""
    s = x0[:-1]
    return np.sum(s) - 1


[docs]def fit_grid( parameters: Dict, data: Dict[str, Dict[str, np.ndarray]], disp: bool = True, ) -> Dict[str, Dict[str, Union[np.ndarray, float]]]: """ Functions performs the complete GRID fitting procedure on the provided survival time distribution data and returns the fit results. Parameters ---------- parameters : Dict Dictionary containing all the parameters needed to perform the GRID fitting. data : Dict[str, Dict[str, np.ndarray]] A dictionary mapping keys (time-lapse conditions) to the corresponding time and value arrays of the survival functions. For example:: { "0.05s": { "time": array([0.05, 0.1, 0.15, ...]), "value": array([1.000e+04, 8.464e+03, 7.396e+03, ...]), }, "1s": { "time": array([1., 2., 3., 4., ...]), "value": array([1.000e+04, 6.925e+03, 5.541e+03, 4.756e+03, ...]), }, } disp : bool, optional If True, then messages and final minimization results are printed out, otherwise the there are no messages printed, by default True. Returns ------- fit_results: Dict[str, Dict[str, Union[np.ndarray, float]]] A dictionary mapping keys (fitting procedure) to the corresponding fit results. For example:: { "grid": { "k": array([1.00000000e-03, 1.04737090e-03, ...]), "s": array([3.85818587e-17, 6.42847878e-18, ...]), "a": 0.010564217803906671, "loss": 0.004705659331508584, }, } Raises ------ ValueError If an incorrect parameter value is provided or a value is missing. Examples -------- Assume that the survival time distributions are stored in the variable ``data``, so:: data = {...} parameters = { "k_min": 10 ** (-3), "k_max": 10**1, "N": 200, "scale": "log", "reg_weight": 0.01, "fit_a": True, "a_fixed": None, } fit_results = fit_grid(parameters, data, disp=True) """ if disp: print("Start GRID fitting...") data_processed = data_utils.process_data(data) if not data_utils.isvalid_parameters_grid(parameters): raise ValueError("parameters does not contain a valid value") if "k" in parameters: k = parameters["k"] else: k = create_fixed_grid( parameters["k_min"], parameters["k_max"], parameters["N"], parameters["scale"], ) reg_weight = parameters["reg_weight"] x0 = np.ones(k.shape[0], dtype=np.float64) * (1 / k.shape[0]) # Update the x0 array and set the bounds for both the amplitudes and the # photobleaching number lbq = 0 # lower bound photobleaching ubq = 3 # upper bound photobleaching bnds = [(0, 1) for _ in range(x0.shape[0])] bnds.append((lbq, ubq)) # add the bounds for the photobleaching if parameters["fit_a"]: tau_min = min(data_utils.get_time_sec(t_tl) for t_tl in data.keys()) # The initial kb guess is 2 s^-1 as default a = 1.0 * tau_min # # default bleaching rate is 1 s^-1 x0 = np.concatenate((x0, np.array([a], dtype=np.float64))) elif not parameters["fit_a"]: a = parameters["a_fixed"] x0 = np.concatenate((x0, np.array([a], dtype=np.float64))) bnds[-1] = (a - 10e-6, a + 10e-6) # Fix the photobleaching number # print(x0.shape) # print(bnds) cons = [{"type": "eq", "fun": con_eq}] # TODO: change iprint, see source code options = {"maxiter": 600, "disp": disp, "iprint": 1, "ftol": 10 ** (-10)} res = scipy.optimize.minimize( calc.lsqobj_grid, x0, args=(data_processed, k, reg_weight), method="SLSQP", jac=True, # jac=True, means the gradient is given by the function bounds=bnds, constraints=cons, options=options, ) grid_results = { "k": k, "s": res.x[: k.shape[0]], "a": res.x[-1], "loss": res.fun, } fit_results = {"grid": grid_results} if disp: print("GRID fitting finished.") return fit_results
def _fit_n_exp( parameters: Dict, data: Dict[str, Dict[str, np.ndarray]], n: int = 2, disp: bool = True, ): """ Function to fit an n-exponential to the provided data and returns the fit results. Parameters ---------- parameters : Dict Dictionary containing all the parameters needed to perform the multi-exponential fitting. data : Dict[str, Dict[str, np.ndarray]] A dictionary mapping keys (time-lapse conditions) to the corresponding time and value arrays of the survival functions. For example:: { "0.05s": { "time": array([0.05, 0.1, 0.15, ...]), "value": array([1.000e+04, 8.464e+03, 7.396e+03, ...]), }, "1s": { "time": array([1., 2., 3., 4., ...]), "value": array([1.000e+04, 6.925e+03, 5.541e+03, 4.756e+03, ...]), }, } n : int, optional Sets the number of exponentials to fit to the data, by default 2. disp : bool, optional If True, then messages and final minimization results are printed out, otherwise the there are no messages printed, by default True. Returns ------- fit_results: Dict[str, Dict[str, Union[np.ndarray, float]]] A dictionary mapping keys (fitting procedure) to the corresponding fit results. For example:: { "2-exp": { "k": array([0.03715506, 1.7248619]), "s": array([0.17296989, 0.82703011]), "a": 0.011938572088673213, "loss": 0.2868809590425386 }, } Raises ------ ValueError If an incorrect parameter value is provided or a value is missing. """ if disp: print(f"Start {n}-exp fitting...") data_processed = data_utils.process_data(data) if not data_utils.isvalid_parameters_n_exp(parameters): raise ValueError("parameters does not contain a valid value") k_min = parameters["k_min"] k_max = parameters["k_max"] coordinates = [] for i in range(n): coordinates.append( np.geomspace(k_min, k_max, num=2, endpoint=True, dtype=np.float64) ) # Create the grid x0_k = np.meshgrid(*coordinates) x0_k = np.reshape(x0_k, (-1, n)) # Initial guesses x0_s = np.linspace(0.05, 0.7, num=n, endpoint=True, dtype=np.float64) # Create the arrays for the bounds of the decay rates, and amplitude values # +1 for the photobleaching bounds lb = np.zeros(x0_k.shape[1] + x0_s.shape[0] + 1) ub = np.ones(x0_k.shape[1] + x0_s.shape[0] + 1) # Set the bounds for the decay rates lb[: x0_k.shape[1]] = k_min ub[: x0_k.shape[1]] = k_max # Bounds for the amplitudes are already set, since they are the # default values, see initialization of the lb and ub array # # Set the bounds for the amplitudes # lb[x0_k.shape[1] : (x0_k.shape[1] + x0_s.shape[0])] = 0 # ub[x0_k.shape[1] : (x0_k.shape[1] + x0_s.shape[0])] = 1 # Set the initial photobleaching number and the photobleaching bounds if parameters["fit_a"]: tau_min = min(data_utils.get_time_sec(t_tl) for t_tl in data.keys()) # The initial kb guess is 2 s^-1 as default a = 1.0 * tau_min # # default bleaching rate is 1 s^-1 # Default values for photobleaching bounds lbq = 0 # lower bound photobleaching ubq = 3 # upper bound photobleaching lb[-1] = lbq ub[-1] = ubq elif not parameters["fit_a"]: a = parameters["a_fixed"] lb[-1] = a - 10e-6 ub[-1] = a + 10e-6 bnds = (lb, ub) if disp: print(f"Total fits: {x0_k.shape[0]}") fit_results_all = [] for i in range(x0_k.shape[0]): x0 = np.concatenate((x0_k[i, :], x0_s, np.array([a]))) res = scipy.optimize.least_squares( calc.global_multi_exp, x0, args=(data_processed, n), bounds=bnds, method="trf", ) fit_results_all.append(res) # Find the best fitted parameters cost_min = math.inf x_best = None for res in fit_results_all: if res.cost < cost_min: cost_min = res.cost x_best = res.x # Retrieve the best decay rates k_temp = x_best[:n] # Retrieve the best weight values s_temp = x_best[n : (2 * n)] # Renormalize s s_norm = s_temp / np.sum(s_temp) # The indices to sort the k_array from low to high idx = np.argsort(k_temp) # Store the best results in a dictionary multi_exp_results = { "k": k_temp[idx], "s": s_norm[idx], "a": x_best[-1], "loss": cost_min, } # Store the results in a dictionary as a value with the key indicating the # number of decay rates fit_results = {f"{n}-exp": multi_exp_results} if disp: print(f"Finished {n}-exp fitting.") return fit_results
[docs]def fit_multi_exp(parameters, data, disp: bool = True): """ Function fits one or more n-exponentials to the provided data and returns the fit results. Parameters ---------- parameters : Dict Dictionary containing all the parameters needed to perform the multi-exponential fitting. data : Dict[str, Dict[str, np.ndarray]] A dictionary mapping keys (time-lapse conditions) to the corresponding time and value arrays of the survival functions. For example:: { "0.05s": { "time": array([0.05, 0.1, 0.15, ...]), "value": array([1.000e+04, 8.464e+03, 7.396e+03, ...]), }, "1s": { "time": array([1., 2., 3., 4., ...]), "value": array([1.000e+04, 6.925e+03, 5.541e+03, 4.756e+03, ...]), }, } disp : bool, optional If True, then messages and final minimization results are printed out, otherwise the there are no messages printed, by default True. Returns ------- fit_results: Dict[str, Dict[str, Union[np.ndarray, float]]] A dictionary mapping keys (fitting procedure) to the corresponding fit results. For example, if `parameters["n_exp"] = [1, 2, 3]`:: { "1-exp": { "k": array([0.02563639]), "s": array([1.]), "a": 0.08514936433699753, "loss": 1.2825570522448484 }, "2-exp": { "k": array([0.03715506, 1.7248619]), "s": array([0.17296989, 0.82703011]), "a": 0.011938572088673213, "loss": 0.2868809590425386 }, "3-exp": { "k": array([0.0137423 , 0.27889073, 3.6560956]), "s": array([0.06850312, 0.23560175, 0.69589513]), "a": 0.011125323730424764, "loss": 0.0379697542735324 }, } Raises ------ ValueError If an incorrect parameter value is provided or a value is missing. """ if not data_utils.isvalid_parameters_n_exp(parameters): raise ValueError("parameters does not contain a valid value") n = parameters["n_exp"] fit_results = dict() if isinstance(n, int): fit_result_n_exp = _fit_n_exp(parameters, data, n=n, disp=disp) fit_results = {**fit_results, **fit_result_n_exp} else: for n_value in n: fit_result_n_exp = _fit_n_exp(parameters, data, n=n_value, disp=disp) fit_results = {**fit_results, **fit_result_n_exp} return fit_results
[docs]def fit_all(parameters, data, disp: bool = True): """ Function performs GRID fitting procedure and fits one or more n-exponentials to the provided data and returns the fit results. Parameters ---------- parameters : Dict Dictionary containing all the parameters needed to perform the GRID fitting procedure and perform the multi-exponential fitting. data : Dict[str, Dict[str, np.ndarray]] A dictionary mapping keys (time-lapse conditions) to the corresponding time and value arrays of the survival functions. For example:: { "0.05s": { "time": array([0.05, 0.1, 0.15, ...]), "value": array([1.000e+04, 8.464e+03, 7.396e+03, ...]), }, "1s": { "time": array([1., 2., 3., 4., ...]), "value": array([1.000e+04, 6.925e+03, 5.541e+03, 4.756e+03, ...]), }, } disp : bool, optional If True, then messages and final minimization results are printed out, otherwise the there are no messages printed, by default True. Returns ------- fit_results: Dict[str, Dict[str, Union[np.ndarray, float]]] A dictionary mapping keys (fitting procedure) to the corresponding fit results. For example:: { "grid": { "k": array([1.00000000e-03, 1.04737090e-03, ...]), "s": array([3.85818587e-17, 6.42847878e-18, ...]), "a": 0.010564217803906671, "loss": 0.004705659331508584, }, "1-exp": { "k": array([0.02563639]), "s": array([1.]), "a": 0.08514936433699753, "loss": 1.2825570522448484 }, "2-exp": { "k": array([0.03715506, 1.7248619]), "s": array([0.17296989, 0.82703011]), "a": 0.011938572088673213, "loss": 0.2868809590425386 }, "3-exp": { "k": array([0.0137423 , 0.27889073, 3.6560956]), "s": array([0.06850312, 0.23560175, 0.69589513]), "a": 0.011125323730424764, "loss": 0.0379697542735324 }, } Raises ------ ValueError If an incorrect parameter value is provided or a value is missing. """ parameters_grid, parameters_n_exp = data_utils.split_parameters(parameters) fit_results_grid = fit_grid(parameters_grid, data, disp=disp) fit_results_multi_exp = fit_multi_exp(parameters_n_exp, data, disp=disp) fit_results = {**fit_results_grid, **fit_results_multi_exp} return fit_results
def _fit_n_exp_fixed_k(parameters, data, n: int = 2): """Function to fit an n-exponential to the provided data and returns the fitted parameter results.""" data_processed = data_utils.process_data(data) if not data_utils.isvalid_parameters(parameters): raise ValueError("parameters does not contain a valid value") if "k" in parameters: x0_k = np.copy(parameters["k"]) x0_k = np.reshape(x0_k, (-1,)) n = x0_k.shape[0] else: k_min = parameters["k_min"] k_max = parameters["k_max"] coordinates = [] for i in range(n): coordinates.append( np.geomspace(k_min, k_max, num=2, endpoint=True, dtype=np.float64) ) # Create the grid x0_k = np.meshgrid(*coordinates) x0_k = np.reshape(x0_k, (-1, n)) # Create the arrays for the bounds of the decay rates, and amplitude values # +1 for the photobleaching bounds lb = np.zeros(x0_k.shape[1] + x0_s.shape[0] + 1) ub = np.ones(x0_k.shape[1] + x0_s.shape[0] + 1) # Initial guesses x0_s = np.linspace(0.05, 0.7, num=n, endpoint=True, dtype=np.float64) if "k" in parameters: # Set the bounds for the decay rates lb[: x0_k.shape[1]] = x0_k ub[: x0_k.shape[1]] = x0_k else: # Set the bounds for the decay rates lb[: x0_k.shape[1]] = k_min ub[: x0_k.shape[1]] = k_max # Bounds for the amplitudes are already set, since they are the # default values, see initialization of the lb and ub array # # Set the bounds for the amplitudes # lb[x0_k.shape[1] : (x0_k.shape[1] + x0_s.shape[0])] = 0 # ub[x0_k.shape[1] : (x0_k.shape[1] + x0_s.shape[0])] = 1 # Set the initial photobleaching number and the photobleaching bounds if parameters["fit_a"]: tau_min = min(data_utils.get_time_sec(t_tl) for t_tl in data.keys()) # The initial kb guess is 2 s^-1 as default a = 1.0 * tau_min # # default bleaching rate is 1 s^-1 # Default values for photobleaching bounds lbq = 0 # lower bound photobleaching ubq = 3 # upper bound photobleaching lb[-1] = lbq ub[-1] = ubq elif not parameters["fit_a"]: a = parameters["a_fixed"] lb[-1] = parameters["a_fixed"] ub[-1] = parameters["a_fixed"] bnds = (lb, ub) print(f"Total fits: {x0_k.shape[0]}") fit_results_all = [] for i in range(x0_k.shape[0]): x0 = np.concatenate((x0_k[i, :], x0_s, np.array([a]))) res = scipy.optimize.least_squares( calc.global_multi_exp, x0, args=(data_processed, n), bounds=bnds, method="trf", ) fit_results_all.append(res) # Find the best fitted parameters cost_min = math.inf x_best = None for res in fit_results_all: if res.cost < cost_min: cost_min = res.cost x_best = res.x # Retrieve the best decay rates k_temp = x_best[:n] # Retrieve the best weight values s_temp = x_best[n : (2 * n)] # Renormalize s s_norm = s_temp / np.sum(s_temp) # The indices to sort the k_array from low to high idx = np.argsort(k_temp) # Store the best results in a dictionary multi_exp_results = { "k": k_temp[idx], "s": s_norm[idx], "a": x_best[-1], "loss": cost_min, } # Store the results in a dictionary as a value with the key indicating the # number of decay rates fit_results = {f"{n}-exp": multi_exp_results} return fit_results