Finding the parameters of a normal distribution
In this tutorial we will use black-it to find the mean and variance of a normal distribution by fitting the 'model' to a dataset. Obviously the parameters of a normal distribution can be obtained more efficiently (such as a by maximum likelihood), yet this example can be useful to understand how black-it works in practice.
# import standard libraries
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
We stard by loading the dataset that we want to fit with our model.
# load a dataset
true_params = [1, 1] # in general these are not known!
real_data = np.atleast_2d(
np.loadtxt(f"data/gaussian_mean{true_params[0]}_var{true_params[1]}.txt")
).T
plt.plot(real_data[:, 0])
plt.xlabel("time step")
plt.ylabel("value")
Initialize a calibrator object
Then, to set up a calibration one needs to define the following components first:
- a model to be calibrated
- a loss function to measure the distance between the real time series and the simulated time series
- a set of samplers that iteratively suggest a set of parameter values to explore
- the parameter space that should be explored
1 Model simulator
# a normal distribution with unknown mean and variance
from models.simple_models import NormalMV
# when called with a set of parameter values, the model provides a simulated time series
NormalMV([1, 1], N=10, seed=0)
2 Loss function
# loss function based on the Method Of Moments
from black_it.loss_functions.msm import MethodOfMomentsLoss
loss = MethodOfMomentsLoss()
3 Samplers
# import some samplers
from black_it.samplers.best_batch import BestBatchSampler
from black_it.samplers.halton import HaltonSampler
from black_it.samplers.r_sequence import RSequenceSampler
from black_it.samplers.random_forest import RandomForestSampler
from black_it.samplers.random_uniform import RandomUniformSampler
# initialize the samplers with their specific 'batch_size', i.e. the number of points
# explored every time they are called
batch_size = 4
random_sampler = RandomUniformSampler(batch_size=batch_size)
rseq_sampler = RSequenceSampler(batch_size=batch_size)
halton_sampler = HaltonSampler(batch_size=batch_size)
best_batch_sampler = BestBatchSampler(batch_size=batch_size)
random_forest_sampler = RandomForestSampler(batch_size=batch_size)
samplers = [
random_sampler,
rseq_sampler,
halton_sampler,
random_forest_sampler,
best_batch_sampler,
]
4) Parameter space (bounds and precision)
# the full space of parameters is defined by the lower and upper bounds
# and by the precision of each parameter
bounds = [[0.00, 0.01], [2.00, 2.00]]
precisions = [0.0001, 0.0001]
Finally, initialise a Calibrator object
from black_it.calibrator import Calibrator
# initialize a Calibrator object
cal = Calibrator(
samplers=samplers,
real_data=real_data,
model=NormalMV,
parameters_bounds=bounds,
parameters_precision=precisions,
ensemble_size=5,
loss_function=loss,
saving_folder=None,
)
Calibration
params, losses = cal.calibrate(10)
# best parameters obtained so far
params[0]
Plots
# index of mimumum loss
idxmin = np.argmin(cal.losses_samp)
param_min = cal.params_samp[idxmin]
# scatter plot of losses
plt.figure(figsize=(6, 5))
plt.scatter(
cal.params_samp[:, 0], cal.params_samp[:, 1], c=cal.losses_samp, edgecolor="k"
)
plt.scatter(param_min[0], param_min[1], marker="x", s=500, color="y")
plt.scatter(true_params[0], true_params[1], marker="x", s=500, color="red")
plt.xlabel("mean")
plt.ylabel("variance")
plt.colorbar()
from black_it.utils.time_series import get_mom_ts
real_moments = get_mom_ts(cal.real_data)
simulated_moments = get_mom_ts(cal.series_samp[idxmin, 0, :, :])
# agreement of moments
plt.figure()
plt.plot(real_moments, "-o", label="real series")
plt.plot(simulated_moments, "-o", label="simulated series")
plt.xlabel("moment index")
plt.xticks(np.arange(18))
plt.ylabel("moments")
plt.legend()
# convergence
losses_per_batch = [
cal.losses_samp[cal.batch_num_samp == i]
for i in range(int(max(cal.batch_num_samp)) + 1)
]
mins_per_batch = np.array([np.min(l) for l in losses_per_batch])
cummin_per_batch = [
np.min(mins_per_batch[: i + 1]) for i in range(mins_per_batch.shape[0])
]
plt.figure()
plt.plot(mins_per_batch, "-o", label="batch minimum")
plt.plot(cummin_per_batch, "-o", label="cumulative minimum")
plt.ylabel("loss")
plt.xlabel("batch number")
plt.legend()