Skip to content

Boltzmann wealth model

A simple model for wealth inequality

Here we will explore and calibrate a simple model of agents exchanging wealth. The agents start with a Bernoulli distributed wealth. Then each agent with one unit of money or more gives one unit of wealth to another random agents encountered, unless the ratio of the agents wealths is above a certain threshold. This model is adapted from this source.

Note that this notebook requires the installation of the mesa library: you can install it through pip install mesa.

import numpy as np
import matplotlib.pyplot as plt

from models.economics.boltzmann_wealth import BoltzmannWealthModel, compute_gini


model = BoltzmannWealthModel(
    num_agents=50, width=5, height=5, generosity_ratio=2.0, mean_init_wealth=10
)
model.run_model(1000)
agent_wealths = [agent.wealth for agent in model.schedule.agents]

plt.hist(agent_wealths)

plt.xlabel("wealth")
plt.ylabel("number of agents")
Text(0, 0.5, 'number of agents')
compute_gini(model)
0.18516634050880632
plt.figure(figsize=(5, 5))

s_w = sorted(agent_wealths)

X_lorenz = np.cumsum(s_w) / np.sum(s_w)
X_lorenz = np.insert(X_lorenz, 0, 0)
X_lorenz[0], X_lorenz[-1]

plt.plot(
    np.arange(X_lorenz.size) / (X_lorenz.size - 1),
    X_lorenz,
)
plt.plot(
    np.arange(X_lorenz.size) / (X_lorenz.size - 1),
    np.linspace(0, 1, len(X_lorenz)),
    "k--",
)

plt.xlabel("fraction of (sorted) population")
plt.ylabel("fraction of wealth")
Text(0, 0.5, 'fraction of wealth')
plt.plot(model.datacollector.get_model_vars_dataframe())
[<matplotlib.lines.Line2D at 0x7fbd2c5c33d0>]

Calibrate the model on the Italian Gini Index

In 2017 the Italian Gini index was 0.36 (World Bank estimate)

# real GDP of the italian economy

italian_data = 0.36 * np.ones((100, 1))
# wrapper of the ABM with a specific signature
def boltzmann_model(theta, N, seed=None):
    model = BoltzmannWealthModel(
        num_agents=100,
        width=10,
        height=10,
        generosity_ratio=theta[0],
        mean_init_wealth=5,
    )

    # burn-in phase of 200 steps
    model.run_model(200 + N)

    data = model.datacollector.get_model_vars_dataframe().to_numpy()

    # discard initialization and burn-in phase of 200 steps
    return data[201:]


boltzmann_model([1], N=5, seed=None)
array([[0.10088409],
       [0.1002947 ],
       [0.10170923],
       [0.10332024],
       [0.1046169 ]])
# Euclidean distance between series
from black_it.loss_functions.minkowski import MinkowskiLoss

loss = MinkowskiLoss(p=2)
# choose samplers
from black_it.samplers.halton import HaltonSampler
from black_it.samplers.random_forest import RandomForestSampler

halton = HaltonSampler(batch_size=4)
forest = RandomForestSampler(batch_size=4)
# reasonable range of parameter values
parameters_bounds = np.array([[0.01, 100.0]]).T
parameters_precision = [
    0.01,
]
# initialize calibrator
from black_it.calibrator import Calibrator

cal = Calibrator(
    model=boltzmann_model,
    samplers=[halton, forest],
    loss_function=loss,
    parameters_bounds=parameters_bounds,
    parameters_precision=parameters_precision,
    real_data=italian_data,
    ensemble_size=3,
)

***
Number of free params:       1.
Explorable param space size: 10000.
***

Selecting 4 processes for the parallel evaluation of the model

# calibrate for 5 epochs
params, losses = cal.calibrate(5)

BATCH NUMBER:   1
PARAMS SAMPLED: 0

METHOD: HaltonSampler
----> sim exec elapsed time: 4.0s
---->   min loss new params: 1.27
---->   avg loss new params: 1.55
----> avg loss exist params: 1.55
---->         curr min loss: 1.2709260887871952
====>    total elapsed time: 4.0s

METHOD: RandomForestSampler
Warning: Repeated samples still found after 5 duplication passes. This is probably due to a small search space.
----> sim exec elapsed time: 2.8s
---->   min loss new params: 1.01
---->   avg loss new params: 1.37
----> avg loss exist params: 1.46
---->         curr min loss: 1.0059668811538809
====>    total elapsed time: 4.1s

BATCH NUMBER:   2
PARAMS SAMPLED: 8

METHOD: HaltonSampler
----> sim exec elapsed time: 3.3s
---->   min loss new params: 0.7
---->   avg loss new params: 1.42
----> avg loss exist params: 1.45
---->         curr min loss: 0.6952058780196919
====>    total elapsed time: 3.3s

METHOD: RandomForestSampler
Warning: Repeated samples still found after 5 duplication passes. This is probably due to a small search space.
----> sim exec elapsed time: 2.7s
---->   min loss new params: 0.09
---->   avg loss new params: 0.44
----> avg loss exist params: 1.2
---->         curr min loss: 0.09205704142039979
====>    total elapsed time: 4.1s

BATCH NUMBER:   3
PARAMS SAMPLED: 16

METHOD: HaltonSampler
----> sim exec elapsed time: 2.5s
---->   min loss new params: 1.27
---->   avg loss new params: 1.65
----> avg loss exist params: 1.29
---->         curr min loss: 0.09205704142039979
====>    total elapsed time: 2.5s

METHOD: RandomForestSampler
Warning: Repeated samples still found after 5 duplication passes. This is probably due to a small search space.
----> sim exec elapsed time: 2.7s
---->   min loss new params: 0.08
---->   avg loss new params: 0.16
----> avg loss exist params: 1.1
---->         curr min loss: 0.07538664969409653
====>    total elapsed time: 3.9s

BATCH NUMBER:   4
PARAMS SAMPLED: 24

METHOD: HaltonSampler
----> sim exec elapsed time: 2.7s
---->   min loss new params: 0.37
---->   avg loss new params: 1.22
----> avg loss exist params: 1.12
---->         curr min loss: 0.07538664969409653
====>    total elapsed time: 2.7s

METHOD: RandomForestSampler
Warning: Repeated samples still found after 5 duplication passes. This is probably due to a small search space.
----> sim exec elapsed time: 2.6s
---->   min loss new params: 0.08
---->   avg loss new params: 0.1
----> avg loss exist params: 0.99
---->         curr min loss: 0.07538664969409653
====>    total elapsed time: 3.7s

BATCH NUMBER:   5
PARAMS SAMPLED: 32

METHOD: HaltonSampler
----> sim exec elapsed time: 2.9s
---->   min loss new params: 1.53
---->   avg loss new params: 1.55
----> avg loss exist params: 1.05
---->         curr min loss: 0.07538664969409653
====>    total elapsed time: 2.9s

METHOD: RandomForestSampler
Warning: Repeated samples still found after 5 duplication passes. This is probably due to a small search space.
----> sim exec elapsed time: 2.9s
---->   min loss new params: 0.11
---->   avg loss new params: 0.13
----> avg loss exist params: 0.96
---->         curr min loss: 0.07538664969409653
====>    total elapsed time: 4.1s

# optimal parameter found
params[0]
array([3.7])
min_idx = np.argmin(cal.losses_samp)
print(min_idx)
max_idx = np.argmax(cal.losses_samp)
print(max_idx)
23
18

plt.figure()
for i, simulated_series in enumerate(cal.series_samp[:, 0, :, 0]):
    if i in (max_idx,):
        continue
    plt.plot(simulated_series, color="orange", alpha=0.2)

plt.plot(
    cal.series_samp[min_idx, 0, :, 0],
    color="green",
    label="optimal simulated series",
    alpha=0.9,
    linewidth=3,
)
plt.plot(italian_data, "k--", label="reference value", alpha=0.9)

plt.xlabel("time step")
plt.ylabel("Gini index")
plt.legend()
plt.ylim(0, 0.6)
(0.0, 0.6)