Source code for gridlib.simulate

"""
Module with function to sample from a provided GRID spectrum or multi-exponential.
"""
import math
from typing import Sequence, Union, Dict

import numpy as np

from . import data_utils


[docs]def tl_simulation_single(
    k: np.ndarray,
    s: np.ndarray,
    kb: float,
    t_int: float,
    t_tl: float,
    N: int = 10000,
) -> Dict[str, Dict[str, np.ndarray]]:
    """Function simulates the fluorescence survival time distributions of a molecule for
    one time-lapse conditions. Dissociation and photobleaching of molecules with
    user-defined parameters is simulated and the resulting fluorescence survival time
    distribution is calculated.

    Parameters
    ----------
    k: np.ndarray
        Decay rates with units per second.
    s: np.ndarray
        Amplitudes for the respective decay rates.
    kb: float
        Photobleaching rate per second (kb = a / t_int).
    t_int: float
        The integration time in seconds.
    t_tl: float
        The time-lapse time to simulate in seconds.
    N: int, optional
        Number of molecules to simulate and use for the survival time distribution
        calculation (default 10000).

    Returns
    -------
    Dict[str, Dict[str, np.ndarray]]
        A dictionary mapping keys (time-lapse conditions) to the corresponding time and
        value arrays of the survival functions. For example::

            {
                "0.05s": {
                    "time": array([0.05, 0.1, 0.15, ...])
                    "value": array([1.000e+04, 8.464e+03, 7.396e+03, ...])
                },
            }

    Raises
    ------
    ValueError
        If kb, t_int, t_tl or N is incorrectly set. They should all atleast be larger
        than 0.

    Examples
    --------
    >>> data_sim = tl_simulation_single(np.array([0.005, 0.48, 5.2]),
    ... np.array([0.05, 0.25, 0.7]), 0.03, 0.05, 0.5, N=10000)

    The above function call simulates fluorescence survival time distributions of a
    type of molecule with three different dissociation rates, and a photobleaching
    rate of 0.03 s^-1. The integration time is set to 50 ms and the time-lapse time
    is set to 500 ms. For the survival time distribution 10000 observed molecules are
    simulated.
    """

    if not math.isclose(1.0, np.sum(s)):
        raise ValueError(f"Sum of amplitudes should be equal to 1, but is {np.sum(s)}.")

    if kb <= 0:
        raise ValueError(
            "Photobleaching rate ('kb' argument) should be larger than zero"
        )
    if t_int <= 0:
        raise ValueError(
            "Integration time ('t_int' argument) should be larger than zero"
        )
    if t_tl <= 0:
        raise ValueError("Time-lapse time ('t_tl' argument) should be larger than zero")
    if N <= 0:
        raise ValueError(
            "Number of simulated molecules ('N' argument) should be larger than zero."
        )

    BUFFER_SIZE_DEFAULT = 1000

    p = s / np.sum(s)

    # Can also change kb * t_int to a
    keff = (kb * t_int) / t_tl + k

    time = np.linspace(0, 1500 * t_tl, num=1501, endpoint=True)

    binding = np.zeros(time.shape[0])
    binding_sum = 0

    # TODO: future - improve speed: multiprocessing, or use "chunks", so do like
    # 1000 molecules at the same time with arrays?
    rng = np.random.default_rng()

    # Simulate until enough points have been captured or the computation time is
    # exceeded
    buffer_size = min(BUFFER_SIZE_DEFAULT, N)

    count = 0
    max_count = 1000 * N
    while binding_sum < N and count < max_count:

        keff_state = rng.choice(keff, size=(buffer_size,), p=p)
        lifetime = rng.exponential(scale=1 / keff_state)

        idx = lifetime // t_tl
        idx = idx.astype(np.int32)

        idx_uniq, idx_count = np.unique(idx, return_counts=True)

        binding[idx_uniq] = binding[idx_uniq] + idx_count

        # Only count the molecules that are bound for at least one delta t
        binding_sum = np.sum(binding) - binding[0]

        # Keep track of the number of simulations ran
        count = count + buffer_size

        # Reset the buffer size if required
        buffer_size = min(BUFFER_SIZE_DEFAULT, int(round(N - binding_sum, 0)))

    if count >= max_count:
        print(
            "The simulation was finished pre-emptively because max_count",
            "(1000*N) value was reached.",
        )

    # Remove the the molecules that are bound for 0 frames
    binding = binding[1:]
    # Remove the first time point as well
    time = time[1:]

    # Calculate the survival function values with cumulative summing
    value = np.cumsum(binding[::-1])[::-1]

    # Delete the empty elements
    time = time[value > 0]
    value = value[value > 0]

    data = {f"{t_tl}s": {"time": time, "value": value}}

    # Make sure the data is formatted correctly
    data = data_utils.fmt_t_str_data(data)

    return data


[docs]def tl_simulation(
    k: np.ndarray,
    s: np.ndarray,
    kb: float,
    t_int: float,
    t_tl_all: Sequence[Union[float, str]],
    N: Union[int, Sequence[int]] = 10000,
) -> Dict[str, Dict[str, np.ndarray]]:
    """
    Function simulates the fluorescence survival time distributions of a molecule for
    different time-lapse conditions. Dissociation and photobleaching of molecules with
    user-defined parameters is simulated and the resulting fluorescence survival time
    distribution is calculated. This is done for all the different user-specified
    time-lapse conditions.

    Parameters
    ----------
    k : np.ndarray
        Decay rates with units per second.
    s : np.ndarray
        Amplitudes for the respective decay rates.
    kb : float
        Photobleaching rate per second (kb = a / t_int).
    t_int : float
        The integration time in seconds.
    t_tl_all : Sequence[float | str]
        The time-lapse times to simulate. If the time-lapse times are provided as
        floats then they are interpreted as seconds. However, the time-lapse times can
        also be provided as strings, eg. '100ms' or '1.5s'.
    N : int | Sequence[int], optional
        Number of molecules to simulate and use for the survival time distribution
        calculation (default 10000). If only an integer value is provided then this
        value will be used for all the simulations. In the case that the provided
        N object is an iterable, each element is used as value for each time-lapse
        condition.

    Returns
    -------
    Dict[str, Dict[str, np.ndarray]]
        A dictionary mapping keys (time-lapse conditions) to the corresponding time and
        value arrays of the survival functions. For example::

            {
                "0.05s": {
                    "time": array([0.05, 0.1, 0.15, ...]),
                    "value": array([1.000e+04, 8.464e+03, 7.396e+03, ...]),
                },
                "1s": {
                    "time": array([1., 2., 3., ...]),
                    "value": array([1.000e+04, 6.925e+03, 5.541e+03, ...]),
                },
            }

    Raises
    ------
    ValueError
        If N is a list and does not have the same length as t_tl_all list.

    Examples
    --------
    >>> data_sim = tl_simulation(np.array([0.005, 0.48, 5.2]),
    ... np.array([0.05, 0.25, 0.7]), 0.03, 0.05, [0.05, 0.2, 1.0, 5.0], N=10000)

    The above function call simulates fluorescence survival time distributions of a
    type of molecule with three different dissociation rates, and a photobleaching
    rate of 0.03 s^-1. The integration time is set to 50 ms and the time-lapse times
    are set to 50 ms, 200 ms, 1 s and 5 s. For each survival time distribution,
    10000 observed molecules are simulated.

    >>> data_sim = tl_simulation(np.array([0.005, 0.48, 5.2]),
    ... np.array([0.05, 0.25, 0.7]), 0.03, 0.05, [0.05, 0.2, 1.0, 5.0],
    ... N=[10000, 5000, 2500, 1500])

    Same as the first example, but now there is a different number of molecules
    simulated for every time-lapse condition.
    )
    """
    # Dictionary with all the simulated data points
    data = dict()

    if isinstance(N, Sequence):
        if len(N) != len(t_tl_all):
            raise ValueError(
                f"Number of `N` values provided ({len(N)}) is not equal to the number "
                f"of provided time-lapse times provided in `t_tl_all` ({len(t_tl_all)})."
            )
    else:
        N = [N for _ in range(len(t_tl_all))]

    # TODO: future - allow this to be done in a multiprocessed way
    for i, t_tl in enumerate(t_tl_all):
        if isinstance(t_tl, str):
            t_tl_s = data_utils.get_time_sec(t_tl)
        elif isinstance(t_tl, float):
            t_tl_s = t_tl

        data_single = tl_simulation_single(k, s, kb, t_int, t_tl_s, N[i])

        data = {**data, **data_single}

    return data