-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathml.py
More file actions
396 lines (351 loc) · 13.9 KB
/
ml.py
File metadata and controls
396 lines (351 loc) · 13.9 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
"""Maximum Likelihood estimation of bandit parameters.
See [1] for details.
[1] N. D. Daw, "Trial-by-trial data analysis using computational models,"
Decision making, affect, and learning: Attention and performance XXIII,
vol. 23, p. 1, 2011.
"""
import os
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from scipy.optimize import minimize
from collections import defaultdict
from utils import save_figs_as_pdf, softmax
from models import Bandit, BanditCard, BanditCardCues
from models import Agent, AgentCard, AgentCardCues
Data_Behavior_Dir = 'data_behavior'
Fig_Dir = 'figs'
DF_Dir = 'df'
class ML(object):
def __init__(self, df, n_actions, cues=None, bounds=None,
model='constant_step_size',
param_value={'alpha':None, 'beta':None, 'r_bar':None}):
"""The DataFrame df must contain columns 'action' and reward'.
If `len(cues) > 1`, then it also must include the 'cue' column.
model can be 'sample_average', 'constant_step_size', 'policy', or
'policy_daw'.
"""
if model not in ('sample_average', 'constant_step_size', 'policy',
'policy_daw'):
raise ValueError("model must be 'sample_average' or "
"'constant_step_size'")
self.n_actions = n_actions
self.model = model
if cues is None:
if 'cue' in df.columns:
self.cues = (df['cue'].values[0],)
print('Using {:d} for the cue.'.format(self.cues[0]))
else:
self.cues = (0,)
df['cue'] = 0
else:
if type(cues) is not tuple:
raise TypeError('cues must be a tuple')
self.cues = cues
if type(cues) is int:
self.cues = (cues,)
self.df = df
self.bounds = bounds
# Initial conditions passed to the optimization function
if self.model in ('sample_average', 'constant_step_size'):
self.opt_ic = [0.1, 0.5]
elif (self.model == 'policy') or (self.model == 'policy_daw'):
self.param_value = param_value
n_unknow = list(param_value.values()).count(None)
if n_unknow == 0:
raise ValueError('At least one unknown parameter is needed')
self.opt_ic = np.random.uniform(0.1, 0.2, n_unknow)
print(f'ML estimation model:{model} param:{param_value}\n')
def neg_log_likelihood(self, params):
"""Compute the negative log likelihood of the parameters.
The data consist of a sequence of (cue,action,reward) observations.
"""
df = self.df
if self.model in ('sample_average', 'constant_step_size'):
alpha, beta = params
elif (self.model == 'policy') or (self.model == 'policy_daw'):
if self.param_value['alpha'] == None:
alpha, params = params[0], params[1:]
else:
alpha = self.param_value['alpha']
if self.param_value['beta'] == None:
beta, params = params[0], params[1:]
else:
beta = self.param_value['beta']
if self.param_value['r_bar'] == None:
r_bar, params = params[0], params[1:]
else:
r_bar = self.param_value['r_bar']
df = self.df[self.df['cue'].isin(self.cues)]
actions, rewards = df['action'].values, df['reward'].values
cues = df['cue'].values
prob_log = 0
Q = dict([[cue, np.zeros(self.n_actions)] for cue in self.cues])
k = 1
for action, reward, cue in zip(actions, rewards, cues):
if self.model == 'sample_average':
Q[cue][action] += alpha * (reward - Q[cue][action]) / k
k += 1
elif self.model == 'constant_step_size':
Q[cue][action] += alpha * (reward - Q[cue][action])
elif self.model == 'policy':
# We are reusing Q, but the right name should be pi
probs = softmax(Q[cue], beta)
for a in (0,1): # (0, 1) should be something like self.actions
indicator = 1 if a == action else 0
Q[cue][a] += alpha * (reward - r_bar) * (indicator - probs[a])
elif self.model == 'policy_daw': # Daw simplified rule
# We are reusing Q, but the right name should be pi
Q[cue][action] += alpha * (reward - r_bar)
else:
print('Something bad happen :(')
prob_log += np.log(softmax(Q[cue], beta)[action])
return -prob_log
#def ml_estimation(self, method_name='Nelder-Mead'):
def ml_estimation(self, method_name='BFGS'):
#def ml_estimation(self, method_name=None):
if self.bounds is None:
r = minimize(self.neg_log_likelihood, self.opt_ic,
method=method_name)
else:
r = minimize(self.neg_log_likelihood, self.opt_ic,
method='L-BFGS-B',
bounds=self.bounds)
return r
def fit_model(self):
r = self.ml_estimation('Nelder-Mead')
if r.status != 0:
print('trying with Powell')
r = self.ml_estimation('Powell')
return r
def plot_ml(self, ax, alpha, beta, alpha_hat, beta_hat):
from itertools import product
n = 30#50
alpha_max = 0.2
beta_max = 1.3
if alpha is not None:
alpha_max = alpha_max if alpha < alpha_max else 1.1 * alpha
beta_max = beta_max if beta < beta_max else 1.1 * beta
if alpha_hat is not None:
alpha_max = alpha_max if alpha_hat < alpha_max else 1.1 * alpha_hat
beta_max = beta_max if beta_hat < beta_max else 1.1 * beta_hat
alphas = np.linspace(0, alpha_max, n)
betas = np.linspace(0, beta_max, n)
Alpha, Beta = np.meshgrid(alphas, betas)
Z = np.zeros(len(Alpha) * len(Beta))
for i, (a, b) in enumerate(product(alphas, betas)):
Z[i] = self.neg_log_likelihood((a, b))
Z.resize((len(alphas), len(betas)))
ax.contourf(Alpha, Beta, Z.T, 50, cmap='jet')
if alpha is not None:
ax.plot(alpha, beta, 'rs', ms=5)
if alpha_hat is not None:
ax.plot(alpha_hat, beta_hat, 'ro', ms=5)
ax.set_xlabel(r'$\alpha$', fontsize=20)
ax.set_ylabel(r'$\beta$', fontsize=20)
return
def plot_single_subject(self, ax, r, subject, cue):
alpha, beta = r.x
converged = ('yes', 'no')[r.status]
cue = ''.join([str(c) for c in self.cues])
title = 'Subject: {}, cue: {}, converged: {}'.format(subject, cue,
converged)
if r.status == 0:
self.plot_ml(ax, alpha, beta, None, None)
else:
self.plot_ml(ax, None, None, None, None)
ax.set_title(title)
def simple_bandit_experiment():
b = Bandit()
alpha = 0.2
beta = 0.3
r_bar = 0.5
model = 'policy_daw'
# Optimizing only beta and:
# - Seed 2 and trials 300 produces 'Desired error ...'.
# - Same seed with more than 300 trials produces succesful optimization.
np.random.seed(2)
trials = 100
agent = Agent(b, alpha, beta, model=model)
for _ in range(trials):
agent.run()
df = agent.get_df()
ml = ML(df, 2, model=model,
param_value={'alpha':None, 'beta':None, 'r_bar':0.5})
r = ml.ml_estimation()
print(r)
plt.close('all')
###################### Plot when fitting only one parameter
# betas = np.linspace(0, 1)
# mlf = np.frompyfunc(lambda x:ml.neg_log_likelihood([x]), 1, 1)
# nll = mlf(betas)
# plt.plot(betas, nll)
# plt.axvline(beta, c='r')
# plt.axvline(r.x)
# plt.show()
###################### Plot when fitting alpha and beta
alpha_hat, beta_hat = r.x[:2]
fig, ax = plt.subplots(1, 1)
print('Plotting the results...')
ml.plot_ml(ax, alpha, beta, alpha_hat, beta_hat)
plt.show()
globals().update(locals())
def card_bandit_experiment():
b = BanditCard()
alpha = 0.2
beta = 0.5
print('alpha: {:.2f} beta: {:.2f}\n'.format(alpha, beta))
agent = AgentCard(b, alpha, beta)
trials = 360
for _ in range(trials):
agent.run()
df = agent.get_df()
df.to_csv(os.path.join(DF_Dir, 'data.csv'), index_label='trial')
print('Total reward: {:d}\n'.format(df['reward'].sum()))
ml = ML(df, 4)
r = ml.ml_estimation()
alpha_hat, beta_hat = r.x
print(r)
fig, ax = plt.subplots(1, 1)
ml.plot_ml(ax, alpha, beta, alpha_hat, beta_hat)
plt.show()
globals().update(locals())
def card_cue_bandit_experiment():
b = BanditCardCues()
alpha = 0.2
beta = 0.5
print('alpha: {:.2f} beta: {:.2f}\n'.format(alpha, beta))
agent = AgentCardCues(b, alpha, beta)
trials = 360*3
for _ in range(trials):
agent.run()
df = agent.get_df()
df.to_csv('data.csv', index_label='trial')
ml = ML(df, 4, (0,1))
r = ml.ml_estimation()
print(r)
alpha_hat, beta_hat = r.x
fig, ax = plt.subplots(1, 1)
ml.plot_ml(ax, alpha, beta, alpha_hat, beta_hat)
plt.show()
globals().update(locals())
def fit_behavioral_data(bounds=None, cues=((0,),(1,)),
do_plot=False, model='sample-average'):
"""Fit a model for all subjects.
The data has been previously parsed by parse.py.
"""
pkls = os.listdir(Data_Behavior_Dir)
pkls.sort()
data = defaultdict(list)
figs = []
cues_label = dict((cue, ''.join([str(c) for c in cue])) for cue in cues)
for pkl in pkls:
print(pkl)
df = pd.read_pickle(os.path.join(Data_Behavior_Dir, pkl))
for cue in cues:
print('\tcue', cue)
ml = ML(df, 4, cue, bounds, model)
r = ml.fit_model()
alpha, beta = r.x
data[cues_label[cue] + '_alpha'].append(alpha)
data[cues_label[cue] + '_beta'].append(beta)
data[cues_label[cue] + '_status'].append(r.status)
if do_plot:
fig, ax = plt.subplots(1, 1)
ml.plot_single_subject(ax, r, int(pkl[:2]), cue)
figs.append(fig)
plt.close()
data['subject'].append(int(pkl[:2]))
cols = ['subject']
for cue in cues:
col = '{c}_alpha {c}_beta {c}_status'.format(c=cues_label[cue]).split()
cols.extend(col)
df = pd.DataFrame(data, columns=cols)
# Add HPS data to the data frame.
hps = pd.read_pickle(os.path.join(DF_Dir, 'hps_df.pkl'))
df = df.merge(hps, on='subject', how='left')
cues_str = ''.join(str(cues_label[a]) for a in cues)
bound_str = 'unbounded' if bounds is None else 'bounded'
fn = os.path.join(DF_Dir,
'fit_{}_{}_{}'.format(model, cues_str, bound_str))
df.to_excel(fn + '.xlsx', index=False)
df.to_pickle(fn + '.pkl')
print('File saved as', fn)
if do_plot:
fn = 'nllf_{}_{}_{}.pdf'.format(cues_str, bound_str, model)
save_figs_as_pdf(figs, os.path.join(Fig_Dir, fn))
def fit_single_subject(subject_number, bounds=None, cues=(0,)):
fn = os.path.join(Data_Behavior_Dir, '{:0>2d}.pkl'.format(subject_number))
if os.path.isfile(fn) is False:
print('No data for subject', subject_number)
return
df = pd.read_pickle(fn)
ml = ML(df, 4, cues, bounds)
r = ml.fit_model()
plt.close('all')
fig, ax = plt.subplots(1, 1)
ml.plot_single_subject(ax, r, subject_number, cues)
plt.show()
return r
def get_learner_class(actions, opt_action):
"""Determine if the actions correspond to a learner behavior.
Parameters
----------
actions: ndArray or Series
List of actions selected by the subject
opt_action: int
Optimal action
Returns
-------
learner: bool
True if the subject is a learner
n_optimals: list
List with the number of optimal actions in each segment of 20 trials
Notes
-----
See the following for details
T. Schonberg, N. D. Daw, D. Joel, and J. P. O'Doherty, "Reinforcement
Learning Signals in the Human Striatum Distinguish Learners from
Nonlearners during Reward-Based Decision Making," J. Neurosci., vol. 27,
no. 47, pp. 12860–12867, Nov. 2007.
"""
block_size = 20
n_blocks = int(len(actions) / block_size)
blocks = np.array_split(actions, n_blocks)
n_optimals = [(block==opt_action).sum() for block in blocks]
last_n_size = 40
threshold = 25
learner = (actions[-last_n_size:]==opt_action).sum() > threshold
return learner, n_optimals
def make_learner_df():
df = pd.read_pickle(os.path.join(DF_Dir, 'all_data.pkl'))
cue = 1
opt_choice = 23
subjects = df.index.get_level_values('subject').unique()
learners = {}
n_optimum = defaultdict(list)
for subject in subjects:
actions = df[df['cue'] == cue].ix[subject]['action']
learner , n_per_block = get_learner_class(actions, opt_choice)
learners[subject] = learner
for i, n in enumerate(n_per_block):
n_optimum['subject'].append(subject)
n_optimum['block'].append(i + 1)
n_optimum['n_optimum'].append(n)
n_optimum['learner'].append(learner)
df_learners = pd.DataFrame(pd.Series(learners), columns=['learner'])
df_learners.index.set_names('subject', inplace=True)
cols = ['subject', 'block', 'n_optimum', 'learner']
df_n_optimum = pd.DataFrame(n_optimum, columns=cols)
return df_learners, df_n_optimum
def fit_all():
bounds = ((0,1), (0,2))
fit_behavioral_data(bounds=bounds, model='constant_step_size',
do_plot=False)
# fit_behavioral_data(model='sample_average',
# do_plot=False)
if __name__ == '__main__':
# bounds = ((0,1), (0,2))
# fit_single_subject(int(sys.argv[1]), bounds, cues=(0,1))
#fit_all()
simple_bandit_experiment()