diff --git a/Dockerfile b/Dockerfile index 411c63f..bb25b56 100644 --- a/Dockerfile +++ b/Dockerfile @@ -43,7 +43,7 @@ RUN curl -sLo ~/miniconda.sh https://repo.anaconda.com/miniconda/Miniconda3-late && rm ~/miniconda.sh # Install Conda Environment -RUN conda env update -f environment.yml -n base --prune +RUN conda env update -f environment_gcp.yml -n base --prune # Check environments and versions ENV PYTHONPATH="/app" diff --git a/README.rst b/README.rst index bf8e9e3..2903d62 100644 --- a/README.rst +++ b/README.rst @@ -12,9 +12,8 @@ dot :target: http://www.astropy.org :alt: Powered by Astropy Badge -.. raw:: html - - +.. image:: https://vectr.com/bmmorris/a1PT00ATbi.png?width=300&height=300&select=a1PT00ATbipage0 + :alt: dot logo Dot is a forward model for starspot rotational modulation in Python. It is similar to `fleck `_ diff --git a/docs/conf.py b/docs/conf.py index 763a22f..9c94e4c 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -76,8 +76,10 @@ # |version| and |release|, also used in various other places throughout the # built documents. -import_module(setup_cfg['name']) -package = sys.modules[setup_cfg['name']] +# Note: For dot, the package name is different from the project name! +package_name = 'dot' +import_module(package_name) +package = sys.modules[package_name] # The short X.Y version. version = package.__version__.split('-', 1)[0] diff --git a/docs/dot/dev.rst b/docs/dot/dev.rst new file mode 100644 index 0000000..0ad75c4 --- /dev/null +++ b/docs/dot/dev.rst @@ -0,0 +1,72 @@ +************** +For developers +************** + +Contributing +------------ + +``dot`` is open source and built on open source, and we'd love to have your +contributions to the software. + +To make a code contribution for the first time, please follow these +`delightfully detailed instructions from astropy +`_. + +For coding style guidelines, we also point you to the +`astropy style guide `_. + +Building the docs +^^^^^^^^^^^^^^^^^ + +You can check that your documentation builds successfully by building the docs +locally. Run:: + + pip install tox + tox -e build_docs + +Testing +^^^^^^^ + +You can check that your new code doesn't break the old code by running the tests +locally. Run:: + + tox -e test + + +Releasing dot +^^^^^^^^^^^^^ + +Here are some quick notes on releasing dot. + +The astropy package template that dot is built on requires the following +steps to prepare a release. First you need to clean up the repo before you +release it. + +.. warning:: + + This step will delete everything in the repository that isn't already + tracked by git. This is not reversible. Be careful! + +To clean up the repo (double warning: this deletes everything not already +tracked by git), run:: + + git clean -dfx # warning: this deletes everything not tracked by git + +Next we use PEP517 to build the source distribution:: + + pip install pep517 + python -m pep517.build --source --out-dir dist . + +There should now be a ``.tar.gz`` file in the ``dist`` directory which +contains the package as it will appear on PyPI. Unpack it, and check that it +looks the way you expect it to. + +To validate the package and upload to PyPI, run:: + + pip install twine + twine check dist/* + twine upload dist/spotdot*.tar.gz + +For more on package releases, check out `the OpenAstronomy packaging guide +`_. + diff --git a/docs/dot/forwardmodel.rst b/docs/dot/forwardmodel.rst new file mode 100644 index 0000000..55624a9 --- /dev/null +++ b/docs/dot/forwardmodel.rst @@ -0,0 +1,203 @@ +.. _forward-model: + +************* +Forward Model +************* + +Single Spot +----------- + +Perhaps the best way to learn how ``dot`` works is to experiment with it, which +we will do in this tutorial. Let's suppose first we have a star with a single +spot, defined its latitude, longitude, and radius. We'll place the star on the +prime meridian (zero point in longitude) and the equator. We'll give the star +a rotation period of 0.5 days, and the spot will have contrast 0.3. To do this, +we'll first need to generate an instance of the `~dot.Model` object: + +.. code-block:: python + + import pymc3 as pm + import numpy as np + from lightkurve import LightCurve + + from dot import Model + + times = np.linspace(-1, 1, 1000) + fluxes = np.zeros_like(times) + errors = np.ones_like(times) + + rotation_period = 0.5 # days + stellar_inclination = 90 # deg + + m = Model( + light_curve=LightCurve(times, fluxes, errors), + rotation_period=rotation_period, + n_spots=1, + contrast=0.3 + ) + +Now that our `~dot.Model` is specified and the time points are defined in the +`~lightkurve.lightcurve.LightCurve` object, we can specify the spot properties: + +.. code-block:: python + + # Create a starting point for the dot model + start_params = { + "dot_R_spot": np.array([0.1]), + "dot_lat": np.array([np.pi/2]), + "dot_lon": np.array([0]), + "dot_comp_inc": np.radians(90 - stellar_inclination), + "dot_ln_shear": -3, + "dot_P_eq": rotation_period, + "dot_f0": 1 + } + + # Need to call this to validate ``start`` + pm.util.update_start_vals(start, m.pymc_model.test_point, m.pymc_model) + +We specify spot properties with a dictionary which contains the spot radii, +latitudes, longitudes, the complementary angle to the stellar inclination, the +natural log of the shear rate, the equatorial rotation period and the +baseline flux of the star. We then call `~pymc3.util.update_start_vals` on the +dictionary with our `~dot.Model` object, which translates the user-facing, +human-friendly coordinates into the optimizer-friendly transformed coordinates. + +We can now call our model on the start dictionary, and plot it like so: + +.. code-block:: python + + import matplotlib.pyplot as plt + + forward_model_start, var = m(start_params) + + plt.plot(m.lc.time[m.mask], m.lc.flux[m.mask], m.lc.flux_err[m.mask], + color='k', fmt='.', ecolor='silver') + plt.plot(m.lc.time[m.mask][::m.skip_n_points], forward_model_start, + color='DodgerBlue') + plt.show() + +.. plot:: + + import pymc3 as pm + import numpy as np + from lightkurve import LightCurve + + from dot import Model + + times = np.linspace(-1, 1, 1000) + fluxes = np.zeros_like(times) + errors = np.ones_like(times) + + rotation_period = 0.5 # days + stellar_inclination = 90 # deg + + m = Model( + light_curve=LightCurve(times, fluxes, errors), + rotation_period=rotation_period, + n_spots=1, + contrast=0.3 + ) + + # Create a starting point for the dot model + start_params = { + "dot_R_spot": np.array([[0.1]]), + "dot_lat": np.array([[np.pi/2]]), + "dot_lon": np.array([[0]]), + "dot_comp_inc": np.radians(90 - stellar_inclination), + "dot_ln_shear": -3, + "dot_P_eq": rotation_period, + "dot_f0": 1 + } + + # Need to call this to validate ``start`` + pm.util.update_start_vals(start_params, m.pymc_model.test_point, m.pymc_model) + + import matplotlib.pyplot as plt + + forward_model_start, var = m(start_params) + + plt.plot(m.lc.time[m.mask][::m.skip_n_points], forward_model_start, + color='DodgerBlue') + plt.gca().set(xlabel='Time [d]', ylabel='Flux') + plt.show() + +In the above plot, we see the forward model for the spot modulation of a single +spot on a rotating star. + +Differentially rotating spots +----------------------------- + +The syntax is similar for multiple spots, we just add extra elements to the +numpy arrays which determine the spot parameters, like so: + +.. code-block:: python + + m = Model( + light_curve=LightCurve(times, fluxes, errors), + rotation_period=rotation_period, + n_spots=2, + contrast=0.3 + ) + + # Create a starting point for the dot model + two_spot_params = { + "dot_R_spot": np.array([[0.1, 0.05]]), + "dot_lat": np.array([[np.pi/2, np.pi/4]]), + "dot_lon": np.array([[0, np.pi]]), + "dot_comp_inc": np.radians(90 - stellar_inclination), + "dot_ln_shear": np.log(0.2), + "dot_P_eq": rotation_period, + "dot_f0": 1 + } + +Note this time that we've set the shear rate to 0.2, which is the solar shear +rate. This time when we plot the result we'll see a more complicated model: + +.. plot:: + + import pymc3 as pm + import numpy as np + from lightkurve import LightCurve + + from dot import Model + + times = np.linspace(-1, 1, 1000) + fluxes = np.zeros_like(times) + errors = np.ones_like(times) + + rotation_period = 0.5 # days + stellar_inclination = 90 # deg + + m = Model( + light_curve=LightCurve(times, fluxes, errors), + rotation_period=rotation_period, + n_spots=2, + contrast=0.3 + ) + + # Create a starting point for the dot model + two_spot_params = { + "dot_R_spot": np.array([[0.1, 0.05]]), + "dot_lat": np.array([[np.pi/2, np.pi/4]]), + "dot_lon": np.array([[0, np.pi]]), + "dot_comp_inc": np.radians(90 - stellar_inclination), + "dot_ln_shear": np.log(0.2), + "dot_P_eq": rotation_period, + "dot_f0": 1 + } + + # Need to call this to validate ``two_spot_params`` + pm.util.update_start_vals(two_spot_params, m.pymc_model.test_point, m.pymc_model) + + import matplotlib.pyplot as plt + + forward_model_two, var = m(two_spot_params) + + plt.plot(m.lc.time[m.mask][::m.skip_n_points], forward_model_two, + color='DodgerBlue') + plt.gca().set(xlabel='Time [d]', ylabel='Flux') + plt.show() + +Now you can see the effect of differential rotation on the light curve -- the +smaller, higher latitude spot is rotating around the stellar surface with a +different rate than the large, equatorial spot. diff --git a/docs/dot/gettingstarted.rst b/docs/dot/gettingstarted.rst index a5f1bb9..4eae8b3 100644 --- a/docs/dot/gettingstarted.rst +++ b/docs/dot/gettingstarted.rst @@ -84,16 +84,22 @@ In real observations, you should make ``skip_n_points`` closer to unity. The first thing we should do is check if our model can approximate the data. Here's a quick sanity check that our model is defined on the correct bounds, -our errorbar scaling is appropriate, and the number of spots is a good guess: +our errorbar scaling is appropriate, and the number of spots is a good guess, +which we get from running `~pymc3.tuning.find_MAP` which finds the maximum +a posteriori solution: .. code-block:: python - m.optimize(plot=True) + import pymc3 as pm + + with m: + map_soln = pm.find_MAP() .. plot:: import numpy as np from dot import ab_dor_example_lc, Model + import pymc3 as pm min_time = 0 max_time = 2 @@ -112,10 +118,21 @@ our errorbar scaling is appropriate, and the number of spots is a good guess: skip_n_points=10, min_time=min_time, max_time=max_time, - scale_errors=10 + scale_errors=3 ) - map_soln = m.optimize(plot=True) + with m: + map_soln = pm.find_MAP() + + fit, var = m(map_soln) + + plt.errorbar(m.lc.time[m.mask], m.lc.flux[m.mask], + m.scale_errors * m.lc.flux_err[m.mask], color='k', + ecolor='silver', fmt='.') + plt.plot(m.lc.time[m.mask][::m.skip_n_points], fit, + color='DodgerBlue') + plt.gca().set(xlabel='Time [d]', ylabel='Flux') + plt.show() That fit looks pretty good for an initial guess with no manual-tuning and only two spots! It looks to me like the model probably has sufficient but not @@ -125,28 +142,27 @@ distributions for the stellar and spot parameters. Sampling -------- -Next, we can sample the posterior distributions using the -`Sequential Monte Carlo (SMC) `_ -sampler in `pymc3 `_ with: +We'll sample the posterior distributions using the +`No U-Turn Sampler (NUTS) `_ implemented by +`pymc3 `_ by using the normal syntax for pymc3: .. code-block:: python - trace_smc = m.sample_smc(draws=100) - -This will give us a quick fit to the light curve while exploring parameter -degeneracies that were ignored in the first optimization step. - -Next we'll sample the posterior distributions using the -`No U-Turn Sampler (NUTS) `_ implemented by -pymc3: + import pymc3 as pm -.. code-block:: python + with m: + trace_nuts = pm.sample(start=map_soln, draws=1000, cores=2, + init='jitter+adapt_full') - trace_nuts, summary = m.sample_nuts(trace_smc, draws=1000, cores=2) +where we use `~pymc3.sampling.sample` to draw samples from the posterior +distribution. The value for ``draws`` used above are chosen to produce quick +plots, not to give converged publication-ready results. Always make the +``draws`` parameter as large as you can tolerate! -The values for ``draws`` and ``tune`` used above are chosen to produce quick -plots, not to give converged publication-ready results. Always make these -parameters as large as you can tolerate! +The ``init`` keyword argument is set to ``'jitter+adapt_full'``, and this is +important. This uses `Daniel Foreman-Mackey's dense mass matrix setting +`_ which is critical for getting fast +results from highly degenerate model parameterizations (like this one). Finally, let's plot our results: @@ -160,9 +176,11 @@ Finally, let's plot our results: .. plot:: import numpy as np + import matplotlib.pyplot as plt + import pymc3 as pm + from dot import ab_dor_example_lc, Model from dot.plots import posterior_predictive - import matplotlib.pyplot as plt min_time = 0 max_time = 2 @@ -181,12 +199,17 @@ Finally, let's plot our results: skip_n_points=10, min_time=min_time, max_time=max_time, - scale_errors=10 + scale_errors=3 ) - trace_smc = m.sample_smc(draws=50) - trace_nuts, summary = m.sample_nuts(trace_smc, draws=10, - cores=2, tune=10) + + with m: + map_soln = pm.find_MAP() + + with m: + trace_nuts = pm.sample(start=map_soln, draws=100, tune=100, cores=2, + init='jitter+adapt_full') + fig, ax = posterior_predictive(m, trace_nuts, samples=10) ax.set_xlim([min_time, max_time]) fig.tight_layout() diff --git a/docs/dot/install.rst b/docs/dot/install.rst new file mode 100644 index 0000000..48c79f0 --- /dev/null +++ b/docs/dot/install.rst @@ -0,0 +1,16 @@ +Installation +============ + +To install the latest version of ``dot`` via pip, type:: + + pip install spotdot + +You can also get the latest version hot off the github via:: + + pip install git+https://github.com/bmorris3/dot.git + +To build ``dot`` from source, clone the git repository and install with:: + + git clone https://github.com/bmorris3/dot.git + cd dot + python setup.py install diff --git a/docs/dot/plots.rst b/docs/dot/plots.rst index fd1346a..140b3e5 100644 --- a/docs/dot/plots.rst +++ b/docs/dot/plots.rst @@ -16,7 +16,6 @@ the one below: m, trace_nuts, summary = load_results(results_dir) - posterior_shear(m, trace_nuts) movie(results_dir, m, trace_nuts, xsize=250) .. raw:: html diff --git a/docs/index.rst b/docs/index.rst index ad68e38..01a9d62 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -14,9 +14,12 @@ is `available on GitHub `_. .. toctree:: :maxdepth: 2 + dot/install.rst dot/gettingstarted.rst + dot/forwardmodel.rst dot/plots.rst dot/index.rst + dot/dev.rst Acknowledgements ---------------- diff --git a/dot/io.py b/dot/io.py index d2c1ae8..e179199 100644 --- a/dot/io.py +++ b/dot/io.py @@ -5,16 +5,23 @@ import pandas as pd from lightkurve import LightCurve, search_lightcurvefile import h5py +import dot +from pymc3.backends.base import MultiTrace -__all__ = ['save_results', 'load_results', 'ab_dor_example_lc', - 'load_light_curve', 'load_rotation_period'] +__all__ = [ + "save_results", + "load_results", + "ab_dor_example_lc", + "load_light_curve", + "load_rotation_period", +] -hdf5_archive_disk = '/110k_pdcsap/' -hdf5_index_path = '110k_rotation_mcquillan_pdcsap_smooth_index_0724.csv' +hdf5_archive_disk = "/110k_pdcsap/" +hdf5_index_path = "110k_rotation_mcquillan_pdcsap_smooth_index_0724.csv" -def save_results(name, model, trace, summary): +def save_results(name, model, trace, summary=None): """ Save a trace to a pickle file. @@ -29,13 +36,14 @@ def save_results(name, model, trace, summary): summary : `~pandas.DataFrame` Dataframe containing summary statistics """ - with open(os.path.join(name, 'model.pkl'), 'wb') as buff: + with open(os.path.join(name, "model.pkl"), "wb") as buff: pickle.dump(model, buff) - with open(os.path.join(name, 'trace_nuts.pkl'), 'wb') as buff: + with open(os.path.join(name, "trace.pkl"), "wb") as buff: pickle.dump(trace, buff) - summary.to_pickle(os.path.join(name, 'summary.pkl')) + summary.to_pickle(os.path.join(name, "summary.pkl")) + def load_results(name): @@ -56,13 +64,13 @@ def load_results(name): summary : `~pandas.DataFrame` Dataframe containing summary statistics """ - with open(os.path.join(name, 'model.pkl'), 'rb') as buff: + with open(os.path.join(name, "model.pkl"), "rb") as buff: model = pickle.load(buff) - with open(os.path.join(name, 'trace_nuts.pkl'), 'rb') as buff: + with open(os.path.join(name, "trace_nuts.pkl"), "rb") as buff: trace_nuts = pickle.load(buff) - summary = pd.read_pickle(os.path.join(name, 'summary.pkl')) + summary = pd.read_pickle(os.path.join(name, "summary.pkl")) return model, trace_nuts, summary @@ -83,8 +91,7 @@ def ab_dor_example_lc(path=None): Light curve of AB Doradus """ if path is None: - path = os.path.join(os.path.dirname(__file__), 'data', - 'abdor_lc_example.npy') + path = os.path.join(os.path.dirname(__file__), "data", "abdor_lc_example.npy") return LightCurve(*np.load(path)) @@ -102,8 +109,7 @@ def load_light_curve(kic): lc : `~lightkurve.lightcurve.LightCurve` PDCSAP light curve object """ - on_gcp = os.path.exists(os.path.join(hdf5_archive_disk, - hdf5_index_path)) + on_gcp = os.path.exists(os.path.join(hdf5_archive_disk, hdf5_index_path)) # If on Google Cloud if on_gcp: try: @@ -134,7 +140,7 @@ def load_from_hdf5(kic, data_path=None, index_file=None): star_path_list = stars_index.loc[stars_index["KIC"] == kic]["filepath"].values if len(star_path_list) == 0: - raise ValueError(f'Target KIC {kic} not in database.') + raise ValueError(f"Target KIC {kic} not in database.") star_path = star_path_list[0] with h5py.File(os.path.join(data_path, star_path), "r") as f: @@ -142,9 +148,7 @@ def load_from_hdf5(kic, data_path=None, index_file=None): flux = np.array(f[str(kic)].get("PDC_SAP_flux")) flux_err = np.array(f[str(kic)].get("PDC_SAP_flux_err")) - pdcsap = LightCurve( - time=time, flux=flux, flux_err=flux_err, targetid=kic - ) + pdcsap = LightCurve(time=time, flux=flux, flux_err=flux_err, targetid=kic) return pdcsap @@ -153,10 +157,11 @@ def download_from_lightkurve(kic): """ Download a light curve from lightkurve """ - lc = search_lightcurvefile( - target=f"KIC {kic}", - mission='Kepler' - ).download_all().PDCSAP_FLUX.stitch() + lc = ( + search_lightcurvefile(target=f"KIC {kic}", mission="Kepler") + .download_all() + .PDCSAP_FLUX.stitch() + ) return lc @@ -192,8 +197,8 @@ def load_rotation_period(kic, data_path=None, index_file=None): stars_index = pd.read_csv(index_path) star_prot_list = stars_index.loc[stars_index["KIC"] == kic]["PRot"].values if not math.isfinite(star_prot_list[0]): - raise ValueError(f'Target KIC {kic} does not have a McQuillan period.') + raise ValueError(f"Target KIC {kic} does not have a McQuillan period.") else: - print(f'Using McQuillan period for KIC {kic}.') + print(f"Using McQuillan period for KIC {kic}.") star_prot = star_prot_list[0] return star_prot diff --git a/dot/model.py b/dot/model.py index 0bc6c31..0d86d38 100644 --- a/dot/model.py +++ b/dot/model.py @@ -2,26 +2,26 @@ import numpy as np import pymc3 as pm -from pymc3.smc import sample_smc +from exoplanet.gp import terms, GP __all__ = ['Model'] -class MeanModel(pm.gp.mean.Mean): +class MeanModel(object): """ Mean model for Gaussian process regression on photometry with starspots """ def __init__(self, light_curve, rotation_period, n_spots, contrast, t0, latitude_cutoff=10, partition_lon=True): - pm.gp.mean.Mean.__init__(self) if contrast is None: contrast = pm.TruncatedNormal("contrast", lower=0.01, upper=0.99, testval=0.4, mu=0.5, sigma=0.5) + self.contrast = contrast self.f0 = pm.TruncatedNormal("f0", mu=0, sigma=1, - testval=np.percentile(light_curve.flux, 80), + testval=0, lower=-1, upper=2) self.eq_period = pm.TruncatedNormal("P_eq", @@ -33,11 +33,11 @@ def __init__(self, light_curve, rotation_period, n_spots, contrast, t0, eps = 1e-5 # Small but non-zero number BoundedHalfNormal = pm.Bound(pm.HalfNormal, lower=eps, upper=0.8) - self.shear = BoundedHalfNormal("shear", sigma=0.2, testval=0.01) + self.shear = BoundedHalfNormal("shear", testval=0.1) self.comp_inclination = pm.Uniform("comp_inc", - lower=np.radians(eps), - upper=np.radians(90-eps), + lower=0, + upper=np.pi/2, testval=np.radians(1)) if partition_lon: @@ -65,15 +65,9 @@ def __init__(self, light_curve, rotation_period, n_spots, contrast, t0, sigma=0.2, shape=(1, n_spots), testval=0.3) - self.contrast = contrast - # Need to wrap this equation with a where statement so that there isn't - # a divide by zero in the tensor math (even though these parameters are - # bounded to prevent this from happening during sampling) - self.spot_period = pm.math.where(self.shear < 1, - self.eq_period / (1 - self.shear * - pm.math.sin(self.lat - np.pi / 2) ** 2), - self.eq_period) + self.spot_period = self.eq_period / (1 - self.shear * + pm.math.sin(self.lat - np.pi / 2) ** 2) self.sin_lat = pm.math.sin(self.lat) self.cos_lat = pm.math.cos(self.lat) self.sin_c_inc = pm.math.sin(self.comp_inclination) @@ -81,7 +75,7 @@ def __init__(self, light_curve, rotation_period, n_spots, contrast, t0, self.t0 = t0 def __call__(self, X): - phi = 2 * np.pi / self.spot_period * (X - self.t0) - self.lon + phi = 2 * np.pi / self.spot_period * (X[:, None] - self.t0) - self.lon spot_position_x = (pm.math.cos(phi - np.pi / 2) * self.sin_c_inc * @@ -105,7 +99,7 @@ def __call__(self, X): return spot_model -class DisableLogger(): +class DisableLogger(object): """ Simple logger disabler to minimize info-level messages during PyMC3 integration @@ -126,7 +120,7 @@ class Model(object): def __init__(self, light_curve, rotation_period, n_spots, scale_errors=1, skip_n_points=1, latitude_cutoff=10, rho_factor=250, verbose=False, min_time=None, max_time=None, contrast=0.7, - partition_lon=False): + partition_lon=True): """ Construct a new instance of `~dot.Model`. @@ -172,9 +166,7 @@ def __init__(self, light_curve, rotation_period, n_spots, scale_errors=1, self.scale_errors = scale_errors self.pymc_model = None - self.pymc_gp = None - self.pymc_gp_white = None - self.pymc_gp_matern = None + self.gp = None self._initialize_model(latitude_cutoff=latitude_cutoff, rho_factor=rho_factor, @@ -211,22 +203,16 @@ def _initialize_model(self, latitude_cutoff, partition_lon, rho_factor): ls = rho_factor * self.rotation_period mean_err = yerr.mean() - gp_white = pm.gp.Marginal(mean_func=mean_func, - cov_func=pm.gp.cov.WhiteNoise(mean_err)) - gp_matern = pm.gp.Marginal(cov_func=mean_err ** 2 * - pm.gp.cov.Matern32(1, ls=ls)) - gp = gp_white + gp_matern + # Set up the kernel an GP + kernel = terms.Matern32Term(sigma=mean_err, rho=ls) + gp = GP(kernel, x, yerr ** 2) - gp.marginal_likelihood("y", X=x[:, None], y=y, noise=yerr) + gp.marginal("gp", observed=y - mean_func(x)) self.pymc_model = model - self.pymc_gp = gp - self.pymc_gp_white = gp_white - self.pymc_gp_matern = gp_matern - self.mean_model = mean_func(x[:, None]) - - return self.pymc_model + self.gp = gp + self.mean_model = mean_func(x) def __enter__(self): """ @@ -249,91 +235,6 @@ def _check_model(self): if self.pymc_model is None: raise ValueError('Must first call `Model._initialize_model` first.') - def sample_smc(self, draws, random_seed=42, **kwargs): - """ - Sample the posterior distribution of the model given the data using - Sequential Monte Carlo. - - Parameters - ---------- - draws : int - Draws for the SMC sampler - random_seed : int - Random seed - parallel : bool - If True, run in parallel - cores : int - If `parallel`, run on this many cores - - Returns - ------- - trace : `~pymc3.backends.base.MultiTrace` - """ - self._check_model() - with DisableLogger(self.verbose): - with self.pymc_model: - trace = sample_smc(draws, random_seed=random_seed, **kwargs) - return trace - - def sample_nuts(self, trace_smc, draws, cores=96, - target_accept=0.99, **kwargs): - """ - Sample the posterior distribution of the model given the data using - the No U-Turn Sampler. - - Parameters - ---------- - trace_smc : `~pymc3.backends.base.MultiTrace` - Results from the SMC sampler - draws : int - Draws for the SMC sampler - cores : int - Run on this many cores - target_accept : float - Increase this number up to unity to decrease divergences - - Returns - ------- - trace : `~pymc3.backends.base.MultiTrace` - Results of the NUTS sampler - """ - self._check_model() - with DisableLogger(self.verbose): - with self.pymc_model: - trace = pm.sample(draws, - start=trace_smc.point(-1), cores=cores, - target_accept=target_accept, **kwargs) - summary = pm.summary(trace) - - return trace, summary - - def optimize(self, start=None, plot=False, **kwargs): - """ - Optimize the free parameters in `Model` using - `~scipy.optimize.minimize` via `~exoplanet.optimize` - - Thanks x1000 to Daniel Foreman-Mackey for making this possible. - """ - from exoplanet import optimize - - with self.pymc_model: - map_soln = optimize(start=start, **kwargs) - - if plot: - best_fit = self(map_soln) - - import matplotlib.pyplot as plt - ax = plt.gca() - ax.errorbar(self.lc.time[self.mask][::self.skip_n_points], - self.lc.flux[self.mask][::self.skip_n_points], - self.lc.flux_err[self.mask][::self.skip_n_points], - fmt='.', color='k', ecolor='silver', label='obs') - ax.plot(self.lc.time[self.mask][::self.skip_n_points], - best_fit, label='dot') - ax.set(xlabel='Time', ylabel='Flux') - ax.legend(loc='lower left') - return map_soln - def __call__(self, point=None, **kwargs): """ Evaluate the model with input parameters at ``point`` @@ -343,9 +244,13 @@ def __call__(self, point=None, **kwargs): from exoplanet import eval_in_model with self.pymc_model: - result = eval_in_model( - self.mean_model, - point=point, - **kwargs + mu, var = eval_in_model( + self.gp.predict(self.lc.time[self.mask][::self.skip_n_points], + return_var=True), point ) - return result + + mean_eval = eval_in_model( + self.mean_model, point + ) + + return mu + mean_eval, var diff --git a/dot/plots.py b/dot/plots.py index c4440a1..ace1a44 100644 --- a/dot/plots.py +++ b/dot/plots.py @@ -5,6 +5,7 @@ from matplotlib import animation from corner import corner as dfm_corner import pymc3 as pm +from exoplanet import eval_in_model __all__ = ['corner', 'posterior_predictive', 'movie', 'gp_from_posterior'] @@ -21,7 +22,7 @@ def corner(trace, **kwargs): return dfm_corner(pm.trace_to_dataframe(trace), **kwargs) -def posterior_predictive(model, trace, samples=100, path=None, **kwargs): +def posterior_predictive(model, trace, samples=10, path=None, **kwargs): """ Take draws from the posterior predictive given a trace and a model. @@ -41,23 +42,29 @@ def posterior_predictive(model, trace, samples=100, path=None, **kwargs): fig, ax : `~matplotlib.figure.Figure`, `~matplotlib.axes.Axes` Resulting figure and axis """ - with model: - ppc = pm.sample_posterior_predictive(trace, samples=samples, - **kwargs) - fig, ax = plt.subplots(figsize=(10, 2.5)) - plt.errorbar(model.lc.time, - model.lc.flux, - model.lc.flux_err, - fmt='.', color='k', ecolor='silver') - - plt.plot(model.lc.time[model.mask][::model.skip_n_points], - ppc['dot_y'].T, - color='DodgerBlue', lw=2, alpha=10/samples) - - plt.gca().set(xlabel='Time [d]', ylabel='Flux', - xlim=[model.lc.time[model.mask].min(), - model.lc.time[model.mask].max()]) + ax.errorbar(model.lc.time, + model.lc.flux, + model.lc.flux_err, + fmt='.', color='k', ecolor='silver') + + x = model.lc.time[model.mask][::model.skip_n_points] + + for i in np.random.randint(0, len(trace), size=samples): + with model: + mu, var = eval_in_model( + model.gp.predict(x, return_var=True), trace[i] + ) + mean_eval = eval_in_model( + model.mean_model, trace[i] + ) + + ax.fill_between(x, mu + np.sqrt(var) + mean_eval, + mu - np.sqrt(var) + mean_eval, alpha=0.2) + + ax.set(xlabel='Time [d]', ylabel='Flux', + xlim=[model.lc.time[model.mask].min(), + model.lc.time[model.mask].max()]) if path is not None: fig.savefig(path, bbox_inches='tight') return fig, ax @@ -80,7 +87,7 @@ def posterior_shear(model, trace, path=None): Resulting figure and axis """ fig, ax = plt.subplots(figsize=(4, 3)) - ax.hist(trace['dot_shear'], + ax.hist(np.exp(trace['dot_shear']), bins=25, range=[0, 0.6], color='k') @@ -281,50 +288,7 @@ def animate_func(ii): return fig, m -def last_step(model, trace, x=None): - """ - Plot the last step in the trace, including the GP prediction. - - Parameters - ---------- - model : `~dot.Model` - Model object - trace : `~pymc3.backends.base.MultiTrace` - Trace from SMC/NUTS - """ - if x is None: - x = model.lc.time[model.mask][::model.skip_n_points][:, None] - - if len(x.shape) < 2: - x = x[:, None] - - x_data = model.lc.time[model.mask][::model.skip_n_points] - y_data = model.lc.flux[model.mask][::model.skip_n_points] - yerr_data = model.scale_errors * model.lc.flux_err[model.mask][::model.skip_n_points] - - given = { - "gp": model.pymc_gp, - "X": x_data[:, None], - "y": y_data, - "noise": yerr_data - } - - mu, var = model.pymc_gp.predict(x, - point=trace[-1], - given=given, - diag=True, - ) - sd = np.sqrt(var) - plt.fill_between(x, mu+sd, mu-sd, color='DodgerBlue', alpha=0.2) - plt.plot(x, mu, color='DodgerBlue') - - plt.errorbar(x_data, y_data, yerr_data, - fmt='.', color='k', ecolor='silver', zorder=10) - - return plt.gca() - - -def gp_from_posterior(model, trace_nuts, xnew, path): +def gp_from_posterior(model, trace_nuts, path=None): """ Plot a GP regression with the mean model defined in ``model``, drawn from the posterior distribution in ``trace_nuts``, at times ``xnew``. @@ -341,34 +305,22 @@ def gp_from_posterior(model, trace_nuts, xnew, path): Save the resulting plot to ``path`` """ x_data = model.lc.time[model.mask][::model.skip_n_points] - y_data = model.lc.flux[model.mask][::model.skip_n_points] - yerr_data = model.scale_errors * model.lc.flux_err[model.mask][::model.skip_n_points] - - given = { - "gp": model.pymc_gp, - "X": x_data[:, None], - "y": y_data, - "noise": yerr_data - } - - mu, var = model.pymc_gp.predict(xnew[:, None], - point=trace_nuts[-1], - given=given, - diag=True - ) - sd = np.sqrt(var) - - plt.fill_between(xnew, 1 + mu + sd, 1 + mu - sd, - color='DodgerBlue', alpha=0.5) - - plt.errorbar(x_data, 1 + y_data, yerr_data, - fmt='.', color='k', ecolor='silver', zorder=10) - - residuals = y_data - np.interp(x_data, xnew, mu) - plt.errorbar(x_data, - 1 + residuals + 1.25 * y_data.min(), - yerr_data, - fmt='.', color='k', ecolor='silver') + + plt.errorbar(model.lc.time, model.lc.flux, + model.scale_errors * model.lc.flux_err, + fmt='.', color='k') + for i in np.random.randint(0, len(trace_nuts), size=10): + with model: + mu, var = eval_in_model( + model.gp.predict(x_data, return_var=True), trace_nuts[i] + ) + mean_eval = eval_in_model( + model.mean_model, trace_nuts[i] + ) + + plt.fill_between(x_data, mu + np.sqrt(var) + mean_eval, + mu - np.sqrt(var) + mean_eval, color='DodgerBlue', + alpha=0.2) plt.xlabel('Time [d]') plt.ylabel('Flux') diff --git a/dot/tests/test_model.py b/dot/tests/test_model.py index 63aafec..78c2aa8 100644 --- a/dot/tests/test_model.py +++ b/dot/tests/test_model.py @@ -19,7 +19,7 @@ # This decorator at the top allows us to iterate over the various pairs # of spot lons, lats, rads, and stellar inclinations. We compare the result -# of the chi^2 computation with the 1e-9 and make sure it's smaller, i.e. +# of the chi^2 computation with the 1e-6 and make sure it's smaller, i.e. # we test for excellent agreement between fleck and dot. @pytest.mark.parametrize("test_input,expected", [((lons, lats, rads, inc), -6) @@ -61,7 +61,7 @@ def test_against_fleck(test_input, expected): "dot_lat": np.array([np.pi / 2 - lats]), "dot_lon": np.array([lons]), "dot_comp_inc": np.radians(90 - inc), - "dot_shear": 1e-2, + "dot_shear": 1e-4, "dot_P_eq": 2 * np.pi, "dot_f0": m.lc.flux.max() } @@ -70,7 +70,7 @@ def test_against_fleck(test_input, expected): pm.util.update_start_vals(start, m.pymc_model.test_point, m.pymc_model) # the fit is not normalized to its median like the input light curve is - fit = m(start) + fit, var = m(start) # ...so we normalize it before we compare: fit -= np.median(fit) diff --git a/dot/utils/gcp.py b/dot/utils/gcp.py new file mode 100644 index 0000000..2083941 --- /dev/null +++ b/dot/utils/gcp.py @@ -0,0 +1,113 @@ +from pathlib import Path +from google.cloud import storage +import pickle + + +def get_home_dir(): + return str(Path.home()) + + +def save_pkl(data, file) -> None: + with open(f"{file}.pkl", "wb") as buff: + pickle.dump(data, buff) + return None + + +# def save_summary(summary: pd.DataFrame, file_path: str, name: str = "summary") -> None: + +# summary.to_pickle(os.path.join(file_path, f"{name}.pkl")) +# return None + + +def create_folder(directory: str) -> None: + """Creates directory if doesn't exist""" + try: + Path(directory).mkdir(parents=True, exist_ok=False) + except FileExistsError: + print(f"Folder '{directory}' Is Already There.") + else: + print(f"Folder '{directory}' is created.") + + +def save_file_from_bucket(bucket_id: str, file_name: str, destination_file_path: str): + """Saves a file from a bucket + + Parameters + ---------- + bucket_id : str + the name of the bucket + file_name : str + the name of the file in bucket (include the directory) + destination_file_path : str + the directory of where you want to save the + data locally (not including the filename) + + Examples + -------- + + >>> bucket_id = ... + >>> file_name = 'path/to/file/and/file.csv' + >>> dest = 'path/in/bucket/' + >>> load_file_from_bucket( + bucket_id=bucket_id, + file_name=file_name, + destimation_file_path=dest + ) + """ + client = storage.Client() + + bucket = client.get_bucket(bucket_id) + # get blob + blob = bucket.get_blob(file_name) + + # create directory if needed + create_folder(destination_file_path) + + # get full path + destination_file_name = Path(destination_file_path).joinpath( + file_name.split("/")[-1] + ) + + # download data + blob.download_to_filename(str(destination_file_name)) + + return None + + +def save_file_to_bucket(bucket_id: str, local_save_file: str, full_bucket_name: str): + """Saves a file to a bucket + + Parameters + ---------- + bucket_id : str + the name of the bucket + local_save_file : str + the name of the file to save (include the directory) + destination_file_path : str + the directory (not including) of the bucket where + you want to save. + + Examples + -------- + + >>> bucket_id = ... + >>> file_name = 'path/to/file/and/file.csv' + >>> dest = 'path/in/bucket/' + >>> load_file_from_bucket( + bucket_id=bucket_id, + file_name=file_name, + destimation_file_path=dest + ) + """ + client = storage.Client() + + bucket = client.get_bucket(bucket_id) + + # get blob + blob = bucket.blob(full_bucket_name) + + # download data + blob.upload_from_filename(local_save_file) + + return None + diff --git a/environment_gcp.yml b/environment_gcp.yml new file mode 100644 index 0000000..f336fa6 --- /dev/null +++ b/environment_gcp.yml @@ -0,0 +1,22 @@ +name: dotenv_gcp +channels: + - anaconda +dependencies: + - python=3.8 + - numpy>=1.19 + - pandas>=1.1 + - astropy>3.2.1 + - conda-forge::lightkurve + - h5py>=2.10 + - conda-forge::pymc3 + - conda-forge::celerite + - matplotlib>=3.3 + - seaborn>=0.11 + - pip + - pip: + - wandb + - tqdm + - mypy + - corner + - exoplanet==0.3.3 + - google-cloud-storage>1.16 \ No newline at end of file diff --git a/example.py b/example.py index 9e76940..65cbe7c 100644 --- a/example.py +++ b/example.py @@ -4,21 +4,22 @@ import os import numpy as np import matplotlib.pyplot as plt -from dot import Model, save_results, load_results, ab_dor_example_lc +from dot import Model, ab_dor_example_lc from dot.plots import gp_from_posterior +import pymc3 as pm +import multiprocessing as mp # dot parameters rotation_period = 0.5 # Rotation period in units of light curve time interval n_spots = 4 # Number of starspots results_dir = 'test-go' # Save results here -draws_smc = 100 # Number of draws from the NUTS draws_nuts = 100 # Number of draws from the NUTS tune = 100 # Tuning steps (default: >1000) -cores = 4 # This controls the NUTS, for SMC we recommend cores=4. +cores = 2 # This controls the NUTS, for SMC we recommend cores=4. skip_n_points = 5 # skip every n photometric measurements -rho_factor = 0.2 # length scale of GP in units of the rotation period -scale_errors = 15 # scale up the uncertainties by this factor -contrast = None # If `None` allow to float, else fix it +rho_factor = 0.5 # length scale of GP in units of the rotation period +scale_errors = 3 # scale up the uncertainties by this factor +contrast = 0.4 # If `None` allow to float, else fix it partition_lon = True # Bound the longitudes explored by each spot verbose = True # Allow PyMC3 logger to print to stdout @@ -48,25 +49,16 @@ partition_lon=partition_lon, verbose=verbose ) - print('Running SMC...') - trace_smc = m.sample_smc(draws=draws_smc, parallel=True, cores=4) - print('Running NUTS...') - trace_nuts, summary = m.sample_nuts(trace_smc, draws=draws_nuts, - cores=cores, tune=tune) - - os.mkdir(results_dir) - save_results(results_dir, m, trace_nuts, summary) + with m: + map_soln = pm.find_MAP() - # Otherwise load the previously computed model, results: - else: - m, trace_nuts, summary = load_results(results_dir) - - # Times at which to compute the mean model + GP - xnew = np.linspace(min_time, max_time, 1000) + print('Running NUTS...') + with m: + trace_nuts = pm.sample(start=map_soln, draws=draws_nuts, + cores=cores, tune=tune, + init='jitter+adapt_full', + mp_ctx=mp.get_context("fork")) - # Plot the resulting fit to the light curve with a GP: - plt.figure(figsize=(20, 5)) - gp_from_posterior(m, trace_nuts, xnew, - path=os.path.join(results_dir, 'gp.png')) - plt.show() + gp_from_posterior(m, trace_nuts) + plt.show() \ No newline at end of file diff --git a/scripts/README.md b/scripts/README.md new file mode 100644 index 0000000..554e9f9 --- /dev/null +++ b/scripts/README.md @@ -0,0 +1,24 @@ +# Docker Guide + + +## Docker Files + + +1. Build the Docker File + +```bash +docker build -t dot_docker . +``` + +2. Push the updated Docker file to the gcp + +```bash +docker push gcr.io/fdl-us-star-spots/dot_docker dot_docker:latest +``` + +## Demo Scripts + + + + +## Multiple Scripts \ No newline at end of file diff --git a/scripts/demo.py b/scripts/demo.py new file mode 100644 index 0000000..e032fcb --- /dev/null +++ b/scripts/demo.py @@ -0,0 +1,132 @@ +from argparse import ArgumentParser +from lightkurve import search_lightcurvefile +import numpy as np +from pathlib import Path + +home = str(Path.home()) + + +# MATPLOTLIB Settings +import matplotlib as mpl +import matplotlib.pyplot as plt +from dot import Model, ab_dor_example_lc +from dot.plots import gp_from_posterior +from dot.utils.gcp import create_folder, save_pkl, save_file_to_bucket +import pymc3 as pm +import multiprocessing as mp +from dot import save_results + +# # SEABORN SETTINGS +# import seaborn as sns + +# sns.set_context(context="talk", font_scale=0.7) + + +def main(args): + + # dot parameters + rotation_period = 0.5 # Rotation period in units of light curve time interval + n_spots = 4 # Number of starspots + results_dir = "test-go" # Save results here + draws_nuts = 100 # Number of draws from the NUTS + tune = 100 # Tuning steps (default: >1000) + cores = 2 # This controls the NUTS, for SMC we recommend cores=4. + skip_n_points = 5 # skip every n photometric measurements + rho_factor = 0.5 # length scale of GP in units of the rotation period + scale_errors = 3 # scale up the uncertainties by this factor + contrast = 0.4 # If `None` allow to float, else fix it + partition_lon = True # Bound the longitudes explored by each spot + verbose = True # Allow PyMC3 logger to print to stdout + + # Fetch example light curve from the package: + lcf = search_lightcurvefile( + args.target, mission=args.mission, sector=args.sector + ).download_all() + lc = lcf.PDCSAP_FLUX.stitch() + lc.time -= lc.time.mean() # Remove the mean from the time and median from + lc.flux -= np.median(lc.flux) # for efficient/stable GP regression + min_time = lc.time.min() # Minimum time to fit + max_time = lc.time.max() # Maximum time to fit + + # ========================================== + # BUILD MODEL + # ========================================== + print("Constructing model...") + # Construct an instance of `Model` (this is surprisingly expensive) + m = Model( + light_curve=lc, + rotation_period=rotation_period, + n_spots=n_spots, + contrast=contrast, + skip_n_points=skip_n_points, + min_time=min_time, + max_time=max_time, + rho_factor=rho_factor, + scale_errors=scale_errors, + partition_lon=partition_lon, + verbose=verbose, + ) + + print("Running MAP...") + with m: + map_soln = pm.find_MAP() + + # =================================== + # SAVE RESULTS + # =================================== + results_dir = str(Path(args.data_path).joinpath("example")) + bucket_id = "spotdot_results" + target = args.target.replace(" ", "_") + bucketpath = Path(f"{args.mission}/{target}_{args.seed}") + + create_folder(results_dir) + + # save model + + save_pkl(m, str(Path(results_dir).joinpath("map_model"))) + print("Uploading Model to bucket...") + save_file_to_bucket( + bucket_id, + str(Path(results_dir).joinpath("map_model.pkl")), + str(bucketpath.joinpath("map_model.pkl")), + ) + + # save trace + print("Saving Trace...") + save_pkl(map_soln, str(Path(results_dir).joinpath("map_trace"))) + + # upload to bucket + print("Uploading Trace to bucket...") + save_file_to_bucket( + bucket_id, + str(Path(results_dir).joinpath("map_trace.pkl")), + str(bucketpath.joinpath("map_trace.pkl")), + ) + + +if __name__ == "__main__": + + parser = ArgumentParser(add_help=False) + + # ====================== + # Data parameters + # ====================== + parser.add_argument( + "--data-path", type=str, default=f"{home}/", help="path to load light curves.", + ) + parser.add_argument( + "--target", type=str, default="HD 222259 A", help="path to load light curves.", + ) + parser.add_argument( + "--mission", type=str, default="TESS", help="path to load light curves.", + ) + parser.add_argument( + "--sector", type=int, default=27, help="path to load light curves.", + ) + parser.add_argument( + "--seed", type=int, default=1, help="path to load light curves.", + ) + + args = parser.parse_args() + + main(args) diff --git a/setup.cfg b/setup.cfg index 953cec7..c41c87e 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,5 +1,5 @@ [metadata] -name = dot +name = spotdot author = Brett M. Morris author_email = morrisbrettm@gmail.com license = BSD 3-Clause