-
Notifications
You must be signed in to change notification settings - Fork 54
Expand file tree
/
Copy pathpanel_data_example.py
More file actions
105 lines (93 loc) · 4.05 KB
/
panel_data_example.py
File metadata and controls
105 lines (93 loc) · 4.05 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
# -*- coding: utf-8 -*-
"""
Example on how to use the GPBoost algorithm for panel data
@author: Fabio Sigrist
"""
import gpboost as gpb
from statsmodels.datasets import grunfeld
import numpy as np
import matplotlib.pyplot as plt
plt.style.use('ggplot')
# Load data
data = grunfeld.load_pandas().data
# Visualize response variable
plt.hist(data['invest'], bins=50)
plt.title("Histogram of response variable")
"""
Boosting with two crossed firm and year grouped random effects
"""
# Define random effects model (assuming firm and year random effects)
gp_model = gpb.GPModel(group_data=data[['firm', 'year']])
# Create dataset for gpb.train
data_train = gpb.Dataset(data=data[['value', 'capital']], label=data['invest'])
# Specify boosting parameters as dict
# Note: no attempt has been done to optimaly choose tuning parameters
params = { 'objective': 'regression_l2',
'learning_rate': 1,
'max_depth': 6,
'min_data_in_leaf': 1,
'verbose': 0 }
# Train GPBoost model
bst = gpb.train(params=params,
train_set=data_train,
gp_model=gp_model,
num_boost_round=1800)
# Estimated random effects model (variances of random effects)
gp_model.summary()
# Cross-validation for determining number of boosting iterations
gp_model = gpb.GPModel(group_data=data[['firm', 'year']])
data_train = gpb.Dataset(data=data[['value', 'capital']], label=data['invest'])
cvbst = gpb.cv(params=params, train_set=data_train,
gp_model=gp_model, use_gp_model_for_validation=True,
num_boost_round=5000, early_stopping_rounds=5,
nfold=2, verbose_eval=True, show_stdv=False, seed=1)
print("Best number of iterations: " + str(np.argmin(cvbst['l2-mean'])))
"""
Linear mixed effecst model with two crossed firm and year grouped random effects
"""
lin_gp_model = gpb.GPModel(group_data=data[['firm', 'year']])
# Add interecept for linear model
X = data[['value', 'capital']]
X['intercept'] = 1
lin_gp_model.fit(y=data['invest'], X=X, params={"std_dev": True})
lin_gp_model.summary()
"""
Boosting with grouped firm random effects and one "global" AR(1) year random effect
I.e., all firms share the same temporal AR(1) effect.
"""
gp_model_ar1 = gpb.GPModel(group_data=data['firm'], gp_coords=data['year'], cov_function="exponential")
data_train = gpb.Dataset(data=data[['value', 'capital']], label=data['invest'])
# Train GPBoost model (takes a few seconds)
bst = gpb.train(params=params,
train_set=data_train,
gp_model=gp_model_ar1,
num_boost_round=1800)
# Estimated random effects model (variances of random effects and range parameters)
gp_model_ar1.summary()
cov_pars = gp_model_ar1.get_cov_pars()
phi_hat = np.exp(-1 / cov_pars['GP_range'][0])
sigma2_hat = cov_pars['GP_var'][0] * (1. - phi_hat ** 2)
print("Estimated innovation variance and AR(1) coefficient of year effect:")
print([sigma2_hat ,phi_hat])
"""
Boosting with grouped firm random effects and separate AR(1) year random effects per firm
I.e., every firms has its own temporal AR(1) effect. This can be done using
the 'cluster_ids' parameter.
"""
gp_model_ar1 = gpb.GPModel(group_data=data['firm'], gp_coords=data['year'],
cluster_ids=data['firm'], cov_function="exponential")
# Need to use the more robust option gradient_descent instead of fisher_scoring in this example
gp_model_ar1.set_optim_params(params={"optimizer_cov": "gradient_descent"})
data_train = gpb.Dataset(data=data[['value', 'capital']], label=data['invest'])
# Train GPBoost model (takes a few seconds)
bst = gpb.train(params=params,
train_set=data_train,
gp_model=gp_model_ar1,
num_boost_round=1800)
# Estimated random effects model (variances of random effects and range parameters)
gp_model_ar1.summary()
cov_pars = gp_model_ar1.get_cov_pars()
phi_hat = np.exp(-1 / cov_pars['GP_range'][0])
sigma2_hat = cov_pars['GP_var'][0] * (1. - phi_hat ** 2)
print("Estimated innovation variance and AR(1) coefficient of year effect:")
print([sigma2_hat ,phi_hat])