diff --git a/docs/dot/forwardmodel.rst b/docs/dot/forwardmodel.rst index 3c7f8ba..55624a9 100644 --- a/docs/dot/forwardmodel.rst +++ b/docs/dot/forwardmodel.rst @@ -68,7 +68,7 @@ We can now call our model on the start dictionary, and plot it like so: import matplotlib.pyplot as plt - forward_model_start = m(start_params) + forward_model_start, var = m(start_params) plt.plot(m.lc.time[m.mask], m.lc.flux[m.mask], m.lc.flux_err[m.mask], color='k', fmt='.', ecolor='silver') @@ -114,7 +114,7 @@ We can now call our model on the start dictionary, and plot it like so: import matplotlib.pyplot as plt - forward_model_start = m(start_params) + forward_model_start, var = m(start_params) plt.plot(m.lc.time[m.mask][::m.skip_n_points], forward_model_start, color='DodgerBlue') @@ -191,7 +191,7 @@ rate. This time when we plot the result we'll see a more complicated model: import matplotlib.pyplot as plt - forward_model_two = m(two_spot_params) + forward_model_two, var = m(two_spot_params) plt.plot(m.lc.time[m.mask][::m.skip_n_points], forward_model_two, color='DodgerBlue') diff --git a/docs/dot/gettingstarted.rst b/docs/dot/gettingstarted.rst index 08efed5..4eae8b3 100644 --- a/docs/dot/gettingstarted.rst +++ b/docs/dot/gettingstarted.rst @@ -124,10 +124,12 @@ a posteriori solution: with m: map_soln = pm.find_MAP() + fit, var = m(map_soln) + plt.errorbar(m.lc.time[m.mask], m.lc.flux[m.mask], m.scale_errors * m.lc.flux_err[m.mask], color='k', ecolor='silver', fmt='.') - plt.plot(m.lc.time[m.mask][::m.skip_n_points], m(map_soln), + plt.plot(m.lc.time[m.mask][::m.skip_n_points], fit, color='DodgerBlue') plt.gca().set(xlabel='Time [d]', ylabel='Flux') plt.show() @@ -158,7 +160,7 @@ plots, not to give converged publication-ready results. Always make the ``draws`` parameter as large as you can tolerate! The ``init`` keyword argument is set to ``'jitter+adapt_full'``, and this is -very important. This uses `Daniel Foreman-Mackey's dense mass matrix setting +important. This uses `Daniel Foreman-Mackey's dense mass matrix setting `_ which is critical for getting fast results from highly degenerate model parameterizations (like this one). diff --git a/docs/dot/plots.rst b/docs/dot/plots.rst index fd1346a..140b3e5 100644 --- a/docs/dot/plots.rst +++ b/docs/dot/plots.rst @@ -16,7 +16,6 @@ the one below: m, trace_nuts, summary = load_results(results_dir) - posterior_shear(m, trace_nuts) movie(results_dir, m, trace_nuts, xsize=250) .. raw:: html diff --git a/dot/model.py b/dot/model.py index 9cab764..62f1aa6 100644 --- a/dot/model.py +++ b/dot/model.py @@ -2,27 +2,27 @@ import numpy as np import pymc3 as pm +import theano.tensor as tt +from celerite2.theano import terms, GaussianProcess __all__ = ['Model'] -class MeanModel(pm.gp.mean.Mean): +class MeanModel(object): """ Mean model for Gaussian process regression on photometry with starspots """ def __init__(self, light_curve, rotation_period, n_spots, contrast, t0, latitude_cutoff=10, partition_lon=True): - pm.gp.mean.Mean.__init__(self) if contrast is None: contrast = pm.TruncatedNormal("contrast", lower=0.01, upper=0.99, testval=0.4, mu=0.5, sigma=0.5) self.contrast = contrast - self.f0 = pm.TruncatedNormal("f0", mu=0, sigma=1, - testval=0, - lower=-1, upper=2) - + self.f0 = pm.TruncatedNormal("f0", mu=light_curve.flux.max(), sigma=1, + testval=light_curve.flux.max(), + lower=-2, upper=2) self.eq_period = pm.TruncatedNormal("P_eq", lower=0.8 * rotation_period, upper=1.2 * rotation_period, @@ -30,13 +30,14 @@ def __init__(self, light_curve, rotation_period, n_spots, contrast, t0, sigma=0.2 * rotation_period, testval=rotation_period) - self.ln_shear = pm.Uniform("ln_shear", lower=-5, upper=np.log(0.8), - testval=np.log(0.1)) + eps = 1e-5 # Small but non-zero number + BoundedHalfNormal = pm.Bound(pm.HalfNormal, lower=eps, upper=0.8) + self.shear = BoundedHalfNormal("shear", testval=0.2) - self.comp_inclination = pm.Uniform("comp_inc", - lower=0, - upper=np.pi/2, - testval=np.radians(1)) + self.comp_inclination = 0 #pm.Uniform("comp_inc", + # lower=0, + # upper=np.pi/2, + # testval=np.radians(1)) if partition_lon: lon_lims = 2 * np.pi * np.arange(n_spots + 1) / n_spots @@ -59,42 +60,40 @@ def __init__(self, light_curve, rotation_period, n_spots, contrast, t0, shape=(1, n_spots), testval=np.pi/2) - eps = 1e-5 # Small but non-zero number - BoundedHalfNormal = pm.Bound(pm.HalfNormal, lower=eps, upper=0.8) self.rspot = BoundedHalfNormal("R_spot", - sigma=0.2, + sigma=0.4, shape=(1, n_spots), testval=0.3) - self.spot_period = self.eq_period / (1 - pm.math.exp(self.ln_shear) * - pm.math.sin(self.lat - np.pi / 2) ** 2) - self.sin_lat = pm.math.sin(self.lat) - self.cos_lat = pm.math.cos(self.lat) - self.sin_c_inc = pm.math.sin(self.comp_inclination) - self.cos_c_inc = pm.math.cos(self.comp_inclination) + self.spot_period = self.eq_period / (1 - self.shear * + tt.sin(self.lat - np.pi / 2) ** 2) + self.sin_lat = tt.sin(self.lat) + self.cos_lat = tt.cos(self.lat) + self.sin_c_inc = tt.sin(self.comp_inclination) + self.cos_c_inc = tt.cos(self.comp_inclination) self.t0 = t0 def __call__(self, X): - phi = 2 * np.pi / self.spot_period * (X - self.t0) - self.lon + phi = 2 * np.pi / self.spot_period * (X[:, None] - self.t0) - self.lon - spot_position_x = (pm.math.cos(phi - np.pi / 2) * + spot_position_x = (tt.cos(phi - np.pi / 2) * self.sin_c_inc * self.sin_lat + self.cos_c_inc * self.cos_lat) - spot_position_y = -(pm.math.sin(phi - np.pi/2) * + spot_position_y = -(tt.sin(phi - np.pi/2) * self.sin_lat) spot_position_z = (self.cos_lat * self.sin_c_inc - - pm.math.sin(phi) * + tt.sin(phi) * self.cos_c_inc * self.sin_lat) rsq = spot_position_x ** 2 + spot_position_y ** 2 - spot_model = self.f0 - pm.math.sum(self.rspot ** 2 * (1 - self.contrast) * - pm.math.where(spot_position_z > 0, - pm.math.sqrt(1 - rsq), - 0), - axis=1) + spot_model = self.f0 - tt.sum(self.rspot ** 2 * + (1 - self.contrast) * + tt.where(spot_position_z > 0, + tt.sqrt(1 - rsq), 0), + axis=1) return spot_model @@ -104,7 +103,7 @@ class DisableLogger(object): Simple logger disabler to minimize info-level messages during PyMC3 integration """ - def __init__(self, verbose): + def __init__(self, verbose=False): self.verbose = verbose def __enter__(self): @@ -120,7 +119,7 @@ class Model(object): def __init__(self, light_curve, rotation_period, n_spots, scale_errors=1, skip_n_points=1, latitude_cutoff=10, rho_factor=250, verbose=False, min_time=None, max_time=None, contrast=0.7, - partition_lon=True): + partition_lon=True, use_gp=False, name=None): """ Construct a new instance of `~dot.Model`. @@ -166,15 +165,15 @@ def __init__(self, light_curve, rotation_period, n_spots, scale_errors=1, self.scale_errors = scale_errors self.pymc_model = None - self.pymc_gp = None - self.pymc_gp_white = None - self.pymc_gp_matern = None + self.gp = None self._initialize_model(latitude_cutoff=latitude_cutoff, rho_factor=rho_factor, - partition_lon=partition_lon) + partition_lon=partition_lon, + use_gp=use_gp, name=name) - def _initialize_model(self, latitude_cutoff, partition_lon, rho_factor): + def _initialize_model(self, latitude_cutoff, partition_lon, rho_factor, + use_gp=False, name=None): """ Construct a PyMC3 model instance for use with samplers. @@ -188,7 +187,7 @@ def _initialize_model(self, latitude_cutoff, partition_lon, rho_factor): Scale up the GP length scale by a factor `rho_factor` larger than the estimated `rotation_period` """ - with pm.Model(name='dot') as model: + with pm.Model(name=name) as model: mean_func = MeanModel( self.lc, self.rotation_period, @@ -199,28 +198,27 @@ def _initialize_model(self, latitude_cutoff, partition_lon, rho_factor): partition_lon=partition_lon ) - x = self.lc.time[self.mask][::self.skip_n_points] - y = self.lc.flux[self.mask][::self.skip_n_points] - yerr = self.scale_errors * self.lc.flux_err[self.mask][::self.skip_n_points] + x = np.ascontiguousarray(self.lc.time[self.mask][::self.skip_n_points], dtype='float64') + y = np.ascontiguousarray(self.lc.flux[self.mask][::self.skip_n_points], dtype='float64') + yerr = np.ascontiguousarray(self.scale_errors * self.lc.flux_err[self.mask][::self.skip_n_points], dtype='float64') ls = rho_factor * self.rotation_period mean_err = yerr.mean() - gp_white = pm.gp.Marginal(mean_func=mean_func, - cov_func=pm.gp.cov.WhiteNoise(mean_err)) - gp_matern = pm.gp.Marginal(cov_func=mean_err ** 2 * - pm.gp.cov.Matern32(1, ls=ls)) + sigma = pm.HalfNormal("sigma", sigma=mean_err) - gp = gp_white + gp_matern + if use_gp: + # Set up the kernel an GP + kernel = terms.Matern32Term(sigma=sigma, rho=ls) + gp = GaussianProcess(kernel, t=x, yerr=yerr, mean=mean_func) + gp.marginal("gp", observed=y) + else: + pm.Normal("obs", mu=mean_func(x), sigma=yerr, observed=y) - gp.marginal_likelihood("y", X=x[:, None], y=y, noise=yerr) + if use_gp: + self.gp = gp self.pymc_model = model - self.pymc_gp = gp - self.pymc_gp_white = gp_white - self.pymc_gp_matern = gp_matern - self.mean_model = mean_func(x[:, None]) - - return self.pymc_model + self.mean_model = mean_func(x) def __enter__(self): """ @@ -243,7 +241,7 @@ def _check_model(self): if self.pymc_model is None: raise ValueError('Must first call `Model._initialize_model` first.') - def __call__(self, point=None, **kwargs): + def __call__(self, point=None, use_gp=False, **kwargs): """ Evaluate the model with input parameters at ``point`` @@ -252,9 +250,16 @@ def __call__(self, point=None, **kwargs): from exoplanet import eval_in_model with self.pymc_model: - result = eval_in_model( - self.mean_model, - point=point, - **kwargs + if use_gp: + mu, var = eval_in_model( + self.gp.predict(self.lc.time[self.mask][::self.skip_n_points], + return_var=True), point + ) + + mean_eval = eval_in_model( + self.mean_model, point ) - return result + if use_gp: + return mu + mean_eval, var + else: + return mean_eval \ No newline at end of file diff --git a/dot/plots.py b/dot/plots.py index 13ec692..701ab56 100644 --- a/dot/plots.py +++ b/dot/plots.py @@ -5,6 +5,7 @@ from matplotlib import animation from corner import corner as dfm_corner import pymc3 as pm +from exoplanet import eval_in_model __all__ = ['corner', 'posterior_predictive', 'movie', 'gp_from_posterior'] @@ -21,7 +22,7 @@ def corner(trace, **kwargs): return dfm_corner(pm.trace_to_dataframe(trace), **kwargs) -def posterior_predictive(model, trace, samples=100, path=None, **kwargs): +def posterior_predictive(model, trace, samples=10, path=None, **kwargs): """ Take draws from the posterior predictive given a trace and a model. @@ -41,23 +42,29 @@ def posterior_predictive(model, trace, samples=100, path=None, **kwargs): fig, ax : `~matplotlib.figure.Figure`, `~matplotlib.axes.Axes` Resulting figure and axis """ - with model: - ppc = pm.sample_posterior_predictive(trace, samples=samples, - **kwargs) - fig, ax = plt.subplots(figsize=(10, 2.5)) - plt.errorbar(model.lc.time, - model.lc.flux, - model.lc.flux_err, - fmt='.', color='k', ecolor='silver') - - plt.plot(model.lc.time[model.mask][::model.skip_n_points], - ppc['dot_y'].T, - color='DodgerBlue', lw=2, alpha=10/samples) - - plt.gca().set(xlabel='Time [d]', ylabel='Flux', - xlim=[model.lc.time[model.mask].min(), - model.lc.time[model.mask].max()]) + ax.errorbar(model.lc.time, + model.lc.flux, + model.lc.flux_err, + fmt='.', color='k', ecolor='silver') + + x = model.lc.time[model.mask][::model.skip_n_points] + + for i in np.random.randint(0, len(trace), size=samples): + with model: + mu, var = eval_in_model( + model.gp.predict(x, return_var=True), trace[i] + ) + mean_eval = eval_in_model( + model.mean_model, trace[i] + ) + + ax.fill_between(x, mu + np.sqrt(var) + mean_eval, + mu - np.sqrt(var) + mean_eval, alpha=0.2) + + ax.set(xlabel='Time [d]', ylabel='Flux', + xlim=[model.lc.time[model.mask].min(), + model.lc.time[model.mask].max()]) if path is not None: fig.savefig(path, bbox_inches='tight') return fig, ax @@ -80,7 +87,7 @@ def posterior_shear(model, trace, path=None): Resulting figure and axis """ fig, ax = plt.subplots(figsize=(4, 3)) - ax.hist(np.exp(trace['dot_ln_shear']), + ax.hist(np.exp(trace['dot_shear']), bins=25, range=[0, 0.6], color='k') @@ -131,7 +138,7 @@ def movie(results_dir, model, trace, xsize=250, fps=10, """ # Get median parameter values for system setup: n_spots = model.n_spots - shear = np.exp(np.median(trace['dot_ln_shear'])) + shear = np.median(trace['dot_shear']) complement_to_inclination = np.median(trace['dot_comp_inc']) eq_period = np.median(trace['dot_P_eq']) @@ -281,50 +288,7 @@ def animate_func(ii): return fig, m -def last_step(model, trace, x=None): - """ - Plot the last step in the trace, including the GP prediction. - - Parameters - ---------- - model : `~dot.Model` - Model object - trace : `~pymc3.backends.base.MultiTrace` - Trace from SMC/NUTS - """ - if x is None: - x = model.lc.time[model.mask][::model.skip_n_points][:, None] - - if len(x.shape) < 2: - x = x[:, None] - - x_data = model.lc.time[model.mask][::model.skip_n_points] - y_data = model.lc.flux[model.mask][::model.skip_n_points] - yerr_data = model.scale_errors * model.lc.flux_err[model.mask][::model.skip_n_points] - - given = { - "gp": model.pymc_gp, - "X": x_data[:, None], - "y": y_data, - "noise": yerr_data - } - - mu, var = model.pymc_gp.predict(x, - point=trace[-1], - given=given, - diag=True, - ) - sd = np.sqrt(var) - plt.fill_between(x, mu+sd, mu-sd, color='DodgerBlue', alpha=0.2) - plt.plot(x, mu, color='DodgerBlue') - - plt.errorbar(x_data, y_data, yerr_data, - fmt='.', color='k', ecolor='silver', zorder=10) - - return plt.gca() - - -def gp_from_posterior(model, trace_nuts, xnew, path): +def gp_from_posterior(model, trace_nuts, path=None): """ Plot a GP regression with the mean model defined in ``model``, drawn from the posterior distribution in ``trace_nuts``, at times ``xnew``. @@ -340,35 +304,23 @@ def gp_from_posterior(model, trace_nuts, xnew, path): path : None or str Save the resulting plot to ``path`` """ - x_data = model.lc.time[model.mask][::model.skip_n_points] - y_data = model.lc.flux[model.mask][::model.skip_n_points] - yerr_data = model.scale_errors * model.lc.flux_err[model.mask][::model.skip_n_points] - - given = { - "gp": model.pymc_gp, - "X": x_data[:, None], - "y": y_data, - "noise": yerr_data - } - - mu, var = model.pymc_gp.predict(xnew[:, None], - point=trace_nuts[-1], - given=given, - diag=True - ) - sd = np.sqrt(var) - - plt.fill_between(xnew, 1 + mu + sd, 1 + mu - sd, - color='DodgerBlue', alpha=0.5) - - plt.errorbar(x_data, 1 + y_data, yerr_data, - fmt='.', color='k', ecolor='silver', zorder=10) - - residuals = y_data - np.interp(x_data, xnew, mu) - plt.errorbar(x_data, - 1 + residuals + 1.25 * y_data.min(), - yerr_data, - fmt='.', color='k', ecolor='silver') + x_data = np.ascontiguousarray(model.lc.time[model.mask][::model.skip_n_points], dtype='float64') + + plt.errorbar(model.lc.time, model.lc.flux, + model.scale_errors * model.lc.flux_err, + fmt='.', color='k') + for i in np.random.randint(0, len(trace_nuts), size=10): + with model: + mu, var = eval_in_model( + model.gp.predict(x_data, return_var=True), trace_nuts[i] + ) + mean_eval = eval_in_model( + model.mean_model, trace_nuts[i] + ) + + plt.fill_between(x_data, mu + np.sqrt(var) + mean_eval, + mu - np.sqrt(var) + mean_eval, color='DodgerBlue', + alpha=0.2) plt.xlabel('Time [d]') plt.ylabel('Flux') diff --git a/dot/tests/test_model.py b/dot/tests/test_model.py index 36471d3..78c2aa8 100644 --- a/dot/tests/test_model.py +++ b/dot/tests/test_model.py @@ -19,7 +19,7 @@ # This decorator at the top allows us to iterate over the various pairs # of spot lons, lats, rads, and stellar inclinations. We compare the result -# of the chi^2 computation with the 1e-9 and make sure it's smaller, i.e. +# of the chi^2 computation with the 1e-6 and make sure it's smaller, i.e. # we test for excellent agreement between fleck and dot. @pytest.mark.parametrize("test_input,expected", [((lons, lats, rads, inc), -6) @@ -61,7 +61,7 @@ def test_against_fleck(test_input, expected): "dot_lat": np.array([np.pi / 2 - lats]), "dot_lon": np.array([lons]), "dot_comp_inc": np.radians(90 - inc), - "dot_ln_shear": np.log(1e-2), + "dot_shear": 1e-4, "dot_P_eq": 2 * np.pi, "dot_f0": m.lc.flux.max() } @@ -70,7 +70,7 @@ def test_against_fleck(test_input, expected): pm.util.update_start_vals(start, m.pymc_model.test_point, m.pymc_model) # the fit is not normalized to its median like the input light curve is - fit = m(start) + fit, var = m(start) # ...so we normalize it before we compare: fit -= np.median(fit) diff --git a/example.py b/example.py index 9e76940..65cbe7c 100644 --- a/example.py +++ b/example.py @@ -4,21 +4,22 @@ import os import numpy as np import matplotlib.pyplot as plt -from dot import Model, save_results, load_results, ab_dor_example_lc +from dot import Model, ab_dor_example_lc from dot.plots import gp_from_posterior +import pymc3 as pm +import multiprocessing as mp # dot parameters rotation_period = 0.5 # Rotation period in units of light curve time interval n_spots = 4 # Number of starspots results_dir = 'test-go' # Save results here -draws_smc = 100 # Number of draws from the NUTS draws_nuts = 100 # Number of draws from the NUTS tune = 100 # Tuning steps (default: >1000) -cores = 4 # This controls the NUTS, for SMC we recommend cores=4. +cores = 2 # This controls the NUTS, for SMC we recommend cores=4. skip_n_points = 5 # skip every n photometric measurements -rho_factor = 0.2 # length scale of GP in units of the rotation period -scale_errors = 15 # scale up the uncertainties by this factor -contrast = None # If `None` allow to float, else fix it +rho_factor = 0.5 # length scale of GP in units of the rotation period +scale_errors = 3 # scale up the uncertainties by this factor +contrast = 0.4 # If `None` allow to float, else fix it partition_lon = True # Bound the longitudes explored by each spot verbose = True # Allow PyMC3 logger to print to stdout @@ -48,25 +49,16 @@ partition_lon=partition_lon, verbose=verbose ) - print('Running SMC...') - trace_smc = m.sample_smc(draws=draws_smc, parallel=True, cores=4) - print('Running NUTS...') - trace_nuts, summary = m.sample_nuts(trace_smc, draws=draws_nuts, - cores=cores, tune=tune) - - os.mkdir(results_dir) - save_results(results_dir, m, trace_nuts, summary) + with m: + map_soln = pm.find_MAP() - # Otherwise load the previously computed model, results: - else: - m, trace_nuts, summary = load_results(results_dir) - - # Times at which to compute the mean model + GP - xnew = np.linspace(min_time, max_time, 1000) + print('Running NUTS...') + with m: + trace_nuts = pm.sample(start=map_soln, draws=draws_nuts, + cores=cores, tune=tune, + init='jitter+adapt_full', + mp_ctx=mp.get_context("fork")) - # Plot the resulting fit to the light curve with a GP: - plt.figure(figsize=(20, 5)) - gp_from_posterior(m, trace_nuts, xnew, - path=os.path.join(results_dir, 'gp.png')) - plt.show() + gp_from_posterior(m, trace_nuts) + plt.show() \ No newline at end of file