"""
Module with function to sample from a provided GRID spectrum or multi-exponential.
"""
import math
from typing import Sequence, Union, Dict
import numpy as np
from . import data_utils
[docs]def tl_simulation_single(
k: np.ndarray,
s: np.ndarray,
kb: float,
t_int: float,
t_tl: float,
N: int = 10000,
) -> Dict[str, Dict[str, np.ndarray]]:
"""Function simulates the fluorescence survival time distributions of a molecule for
one time-lapse conditions. Dissociation and photobleaching of molecules with
user-defined parameters is simulated and the resulting fluorescence survival time
distribution is calculated.
Parameters
----------
k: np.ndarray
Decay rates with units per second.
s: np.ndarray
Amplitudes for the respective decay rates.
kb: float
Photobleaching rate per second (kb = a / t_int).
t_int: float
The integration time in seconds.
t_tl: float
The time-lapse time to simulate in seconds.
N: int, optional
Number of molecules to simulate and use for the survival time distribution
calculation (default 10000).
Returns
-------
Dict[str, Dict[str, np.ndarray]]
A dictionary mapping keys (time-lapse conditions) to the corresponding time and
value arrays of the survival functions. For example::
{
"0.05s": {
"time": array([0.05, 0.1, 0.15, ...])
"value": array([1.000e+04, 8.464e+03, 7.396e+03, ...])
},
}
Raises
------
ValueError
If kb, t_int, t_tl or N is incorrectly set. They should all atleast be larger
than 0.
Examples
--------
>>> data_sim = tl_simulation_single(np.array([0.005, 0.48, 5.2]),
... np.array([0.05, 0.25, 0.7]), 0.03, 0.05, 0.5, N=10000)
The above function call simulates fluorescence survival time distributions of a
type of molecule with three different dissociation rates, and a photobleaching
rate of 0.03 s^-1. The integration time is set to 50 ms and the time-lapse time
is set to 500 ms. For the survival time distribution 10000 observed molecules are
simulated.
"""
if not math.isclose(1.0, np.sum(s)):
raise ValueError(f"Sum of amplitudes should be equal to 1, but is {np.sum(s)}.")
if kb <= 0:
raise ValueError(
"Photobleaching rate ('kb' argument) should be larger than zero"
)
if t_int <= 0:
raise ValueError(
"Integration time ('t_int' argument) should be larger than zero"
)
if t_tl <= 0:
raise ValueError("Time-lapse time ('t_tl' argument) should be larger than zero")
if N <= 0:
raise ValueError(
"Number of simulated molecules ('N' argument) should be larger than zero."
)
BUFFER_SIZE_DEFAULT = 1000
p = s / np.sum(s)
# Can also change kb * t_int to a
keff = (kb * t_int) / t_tl + k
time = np.linspace(0, 1500 * t_tl, num=1501, endpoint=True)
binding = np.zeros(time.shape[0])
binding_sum = 0
# TODO: future - improve speed: multiprocessing, or use "chunks", so do like
# 1000 molecules at the same time with arrays?
rng = np.random.default_rng()
# Simulate until enough points have been captured or the computation time is
# exceeded
buffer_size = min(BUFFER_SIZE_DEFAULT, N)
count = 0
max_count = 1000 * N
while binding_sum < N and count < max_count:
keff_state = rng.choice(keff, size=(buffer_size,), p=p)
lifetime = rng.exponential(scale=1 / keff_state)
idx = lifetime // t_tl
idx = idx.astype(np.int32)
idx_uniq, idx_count = np.unique(idx, return_counts=True)
binding[idx_uniq] = binding[idx_uniq] + idx_count
# Only count the molecules that are bound for at least one delta t
binding_sum = np.sum(binding) - binding[0]
# Keep track of the number of simulations ran
count = count + buffer_size
# Reset the buffer size if required
buffer_size = min(BUFFER_SIZE_DEFAULT, int(round(N - binding_sum, 0)))
if count >= max_count:
print(
"The simulation was finished pre-emptively because max_count",
"(1000*N) value was reached.",
)
# Remove the the molecules that are bound for 0 frames
binding = binding[1:]
# Remove the first time point as well
time = time[1:]
# Calculate the survival function values with cumulative summing
value = np.cumsum(binding[::-1])[::-1]
# Delete the empty elements
time = time[value > 0]
value = value[value > 0]
data = {f"{t_tl}s": {"time": time, "value": value}}
# Make sure the data is formatted correctly
data = data_utils.fmt_t_str_data(data)
return data
[docs]def tl_simulation(
k: np.ndarray,
s: np.ndarray,
kb: float,
t_int: float,
t_tl_all: Sequence[Union[float, str]],
N: Union[int, Sequence[int]] = 10000,
) -> Dict[str, Dict[str, np.ndarray]]:
"""
Function simulates the fluorescence survival time distributions of a molecule for
different time-lapse conditions. Dissociation and photobleaching of molecules with
user-defined parameters is simulated and the resulting fluorescence survival time
distribution is calculated. This is done for all the different user-specified
time-lapse conditions.
Parameters
----------
k : np.ndarray
Decay rates with units per second.
s : np.ndarray
Amplitudes for the respective decay rates.
kb : float
Photobleaching rate per second (kb = a / t_int).
t_int : float
The integration time in seconds.
t_tl_all : Sequence[float | str]
The time-lapse times to simulate. If the time-lapse times are provided as
floats then they are interpreted as seconds. However, the time-lapse times can
also be provided as strings, eg. '100ms' or '1.5s'.
N : int | Sequence[int], optional
Number of molecules to simulate and use for the survival time distribution
calculation (default 10000). If only an integer value is provided then this
value will be used for all the simulations. In the case that the provided
N object is an iterable, each element is used as value for each time-lapse
condition.
Returns
-------
Dict[str, Dict[str, np.ndarray]]
A dictionary mapping keys (time-lapse conditions) to the corresponding time and
value arrays of the survival functions. For example::
{
"0.05s": {
"time": array([0.05, 0.1, 0.15, ...]),
"value": array([1.000e+04, 8.464e+03, 7.396e+03, ...]),
},
"1s": {
"time": array([1., 2., 3., ...]),
"value": array([1.000e+04, 6.925e+03, 5.541e+03, ...]),
},
}
Raises
------
ValueError
If N is a list and does not have the same length as t_tl_all list.
Examples
--------
>>> data_sim = tl_simulation(np.array([0.005, 0.48, 5.2]),
... np.array([0.05, 0.25, 0.7]), 0.03, 0.05, [0.05, 0.2, 1.0, 5.0], N=10000)
The above function call simulates fluorescence survival time distributions of a
type of molecule with three different dissociation rates, and a photobleaching
rate of 0.03 s^-1. The integration time is set to 50 ms and the time-lapse times
are set to 50 ms, 200 ms, 1 s and 5 s. For each survival time distribution,
10000 observed molecules are simulated.
>>> data_sim = tl_simulation(np.array([0.005, 0.48, 5.2]),
... np.array([0.05, 0.25, 0.7]), 0.03, 0.05, [0.05, 0.2, 1.0, 5.0],
... N=[10000, 5000, 2500, 1500])
Same as the first example, but now there is a different number of molecules
simulated for every time-lapse condition.
)
"""
# Dictionary with all the simulated data points
data = dict()
if isinstance(N, Sequence):
if len(N) != len(t_tl_all):
raise ValueError(
f"Number of `N` values provided ({len(N)}) is not equal to the number "
f"of provided time-lapse times provided in `t_tl_all` ({len(t_tl_all)})."
)
else:
N = [N for _ in range(len(t_tl_all))]
# TODO: future - allow this to be done in a multiprocessed way
for i, t_tl in enumerate(t_tl_all):
if isinstance(t_tl, str):
t_tl_s = data_utils.get_time_sec(t_tl)
elif isinstance(t_tl, float):
t_tl_s = t_tl
data_single = tl_simulation_single(k, s, kb, t_int, t_tl_s, N[i])
data = {**data, **data_single}
return data