diff --git a/stan/math/fwd/meta/is_fvar.hpp b/stan/math/fwd/meta/is_fvar.hpp index c5d67de8894..e208d08bc71 100644 --- a/stan/math/fwd/meta/is_fvar.hpp +++ b/stan/math/fwd/meta/is_fvar.hpp @@ -21,8 +21,5 @@ struct is_fvar>::value>> : std::true_type {}; -template -inline constexpr bool is_fvar_v = is_fvar::value; - } // namespace stan #endif diff --git a/stan/math/prim/fun/log_gamma_q_dgamma.hpp b/stan/math/prim/fun/log_gamma_q_dgamma.hpp new file mode 100644 index 00000000000..742b23784ba --- /dev/null +++ b/stan/math/prim/fun/log_gamma_q_dgamma.hpp @@ -0,0 +1,134 @@ +#ifndef STAN_MATH_PRIM_FUN_LOG_GAMMA_Q_DGAMMA_HPP +#define STAN_MATH_PRIM_FUN_LOG_GAMMA_Q_DGAMMA_HPP + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace stan { +namespace math { + +namespace internal { + +constexpr double LOG_Q_GAMMA_CF_PRECISION = 1.49012e-12; + +/** + * Compute log(Q(a,z)) using continued fraction expansion for upper incomplete + * gamma function. + * + * @tparam T_a Type of shape parameter a (double or fvar types) + * @tparam T_z Type of value parameter z (double or fvar types) + * @param a Shape parameter + * @param z Value at which to evaluate + * @param precision Convergence threshold, default of sqrt(machine_epsilon) + * @param max_steps Maximum number of continued fraction iterations + * @return log(Q(a,z)) with the return type of T_a and T_z + */ +template +inline return_type_t log_q_gamma_cf(const T_a& a, const T_z& z, + double precision + = LOG_Q_GAMMA_CF_PRECISION, + int max_steps = 250) { + using T_return = return_type_t; + const T_return log_prefactor = a * log(z) - z - lgamma(a); + + T_return b_init = z + 1.0 - a; + T_return C = (fabs(value_of_rec(b_init)) >= EPSILON) + ? b_init + : std::decay_t(EPSILON); + T_return D = 0.0; + T_return f = C; + for (int i = 1; i <= max_steps; ++i) { + T_a an = -i * (i - a); + const T_return b = b_init + 2.0 * i; + D = b + an * D; + D = (fabs(value_of_rec(D)) >= EPSILON) ? D + : std::decay_t(EPSILON); + C = b + an / C; + C = (fabs(value_of_rec(C)) >= EPSILON) ? C + : std::decay_t(EPSILON); + D = inv(D); + const T_return delta = C * D; + f *= delta; + const double delta_m1 = fabs(value_of_rec(delta) - 1.0); + if (delta_m1 < precision) { + break; + } + } + return log_prefactor - log(f); +} + +} // namespace internal + +/** + * Compute log(Q(a,z)) and its gradient with respect to a using continued + * fraction expansion, where Q(a,z) = Gamma(a,z) / Gamma(a) is the regularized + * upper incomplete gamma function. + * + * This uses a continued fraction representation for numerical stability when + * computing the upper incomplete gamma function in log space, along with + * analytical gradient computation. + * + * @tparam T_a type of the shape parameter + * @tparam T_z type of the value parameter + * @param a shape parameter (must be positive) + * @param z value parameter (must be non-negative) + * @param precision convergence threshold, default of sqrt(machine_epsilon) + * @param max_steps maximum iterations for continued fraction + * @return structure containing log(Q(a,z)) and d/da log(Q(a,z)) + */ +template +inline std::pair, return_type_t> +log_gamma_q_dgamma(const T_a& a, const T_z& z, + double precision = internal::LOG_Q_GAMMA_CF_PRECISION, + int max_steps = 250) { + using T_return = return_type_t; + const double a_val = value_of(a); + const double z_val = value_of(z); + // For z > a + 1, use continued fraction for better numerical stability + if (z_val > a_val + 1.0) { + std::pair result{ + internal::log_q_gamma_cf(a_val, z_val, precision, max_steps), 0.0}; + // For gradient, use: d/da log(Q) = (1/Q) * dQ/da + // grad_reg_inc_gamma computes dQ/da + const T_return Q_val = exp(result.first); + const double dQ_da + = grad_reg_inc_gamma(a_val, z_val, tgamma(a_val), digamma(a_val)); + result.second = dQ_da / Q_val; + return result; + } else { + // For z <= a + 1, use log1m(P(a,z)) for better numerical accuracy + const double P_val = gamma_p(a_val, z_val); + std::pair result{log1m(P_val), 0.0}; + // Gradient: d/da log(Q) = (1/Q) * dQ/da + // grad_reg_inc_gamma computes dQ/da + const T_return Q_val = exp(result.first); + if (Q_val > 0) { + const double dQ_da + = grad_reg_inc_gamma(a_val, z_val, tgamma(a_val), digamma(a_val)); + result.second = dQ_da / Q_val; + } else { + // Fallback if Q rounds to zero - use asymptotic approximation + result.second = log(z_val) - digamma(a_val); + } + return result; + } +} + +} // namespace math +} // namespace stan + +#endif diff --git a/stan/math/prim/meta/is_fvar.hpp b/stan/math/prim/meta/is_fvar.hpp index 0a26f93c1b5..a6b0e540494 100644 --- a/stan/math/prim/meta/is_fvar.hpp +++ b/stan/math/prim/meta/is_fvar.hpp @@ -14,6 +14,9 @@ namespace stan { template struct is_fvar : std::false_type {}; +template +inline constexpr bool is_fvar_v = is_fvar::value; + /** \ingroup type_trait * Specialization for pointers returns the underlying value the pointer is * pointing to. diff --git a/stan/math/prim/prob/gamma_lccdf.hpp b/stan/math/prim/prob/gamma_lccdf.hpp index a670cefcecf..c5538fa5966 100644 --- a/stan/math/prim/prob/gamma_lccdf.hpp +++ b/stan/math/prim/prob/gamma_lccdf.hpp @@ -6,28 +6,102 @@ #include #include #include -#include +#include +#include #include +#include +#include #include +#include #include #include #include #include #include -#include +#include +#include #include #include +#include namespace stan { namespace math { +namespace internal { + +/** + * Computes log q and d(log q) / d(alpha) using continued fraction. + */ +template +inline std::optional, return_type_t>> +eval_q_cf(const T1& alpha, const T2& beta_y) { + using scalar_t = return_type_t; + using ret_t = std::pair; + if constexpr (!any_fvar && is_autodiff_v) { + std::pair log_q_result + = log_gamma_q_dgamma(value_of(alpha), value_of(beta_y)); + if (likely(std::isfinite(log_q_result.first))) { + return std::optional{log_q_result}; + } else { + return std::optional{std::nullopt}; + } + } else { + ret_t out{internal::log_q_gamma_cf(alpha, beta_y), 0.0}; + if (unlikely(!std::isfinite(value_of_rec(out.first)))) { + return std::optional{std::nullopt}; + } + if constexpr (is_autodiff_v) { + if constexpr (!partials_fvar) { + out.second + = grad_reg_inc_gamma(alpha, beta_y, tgamma(alpha), digamma(alpha)) + / exp(out.first); + } else { + auto alpha_unit = alpha; + alpha_unit.d_ = 1; + auto beta_y_unit = beta_y; + beta_y_unit.d_ = 0; + auto log_Q_fvar = internal::log_q_gamma_cf(alpha_unit, beta_y_unit); + out.second = log_Q_fvar.d_; + } + } + return std::optional{out}; + } +} + +/** + * Computes log q and d(log q) / d(alpha) using log1m. + */ +template +inline std::optional, return_type_t>> +eval_q_log1m(const T1& alpha, const T2& beta_y) { + using scalar_t = return_type_t; + using ret_t = std::pair; + ret_t out{log1m(gamma_p(alpha, beta_y)), 0.0}; + if (unlikely(!std::isfinite(value_of_rec(out.first)))) { + return std::optional{std::nullopt}; + } + if constexpr (is_autodiff_v) { + if constexpr (partials_fvar) { + auto alpha_unit = alpha; + alpha_unit.d_ = 1; + auto beta_unit = beta_y; + beta_unit.d_ = 0; + auto log_Q_fvar = log1m(gamma_p(alpha_unit, beta_unit)); + out.second = log_Q_fvar.d_; + } else { + out.second = -grad_reg_lower_inc_gamma(alpha, beta_y) / exp(out.first); + } + } + return std::optional{out}; +} +} // namespace internal template inline return_type_t gamma_lccdf( const T_y& y, const T_shape& alpha, const T_inv_scale& beta) { - using T_partials_return = partials_return_t; using std::exp; using std::log; - using std::pow; + using T_partials_return = partials_return_t; using T_y_ref = ref_type_t; using T_alpha_ref = ref_type_t; using T_beta_ref = ref_type_t; @@ -51,61 +125,70 @@ inline return_type_t gamma_lccdf( scalar_seq_view y_vec(y_ref); scalar_seq_view alpha_vec(alpha_ref); scalar_seq_view beta_vec(beta_ref); - size_t N = max_size(y, alpha, beta); - - // Explicit return for extreme values - // The gradients are technically ill-defined, but treated as zero - for (size_t i = 0; i < stan::math::size(y); i++) { - if (y_vec.val(i) == 0) { - // LCCDF(0) = log(P(Y > 0)) = log(1) = 0 - return ops_partials.build(0.0); - } - } + const size_t N = max_size(y, alpha, beta); + + constexpr bool is_y_fvar = is_fvar_v>; + constexpr bool is_shape_fvar = is_fvar_v>; + constexpr bool is_beta_fvar = is_fvar_v>; + constexpr bool any_fvar = is_y_fvar || is_shape_fvar || is_beta_fvar; + constexpr bool partials_fvar = is_fvar_v; for (size_t n = 0; n < N; n++) { // Explicit results for extreme values // The gradients are technically ill-defined, but treated as zero - if (y_vec.val(n) == INFTY) { - // LCCDF(∞) = log(P(Y > ∞)) = log(0) = -∞ + const T_partials_return y_val = y_vec.val(n); + if (y_val == 0.0) { + continue; + } + if (y_val == INFTY) { return ops_partials.build(negative_infinity()); } - const T_partials_return y_dbl = y_vec.val(n); - const T_partials_return alpha_dbl = alpha_vec.val(n); - const T_partials_return beta_dbl = beta_vec.val(n); - const T_partials_return beta_y_dbl = beta_dbl * y_dbl; + const T_partials_return alpha_val = alpha_vec.val(n); + const T_partials_return beta_val = beta_vec.val(n); - // Qn = 1 - Pn - const T_partials_return Qn = gamma_q(alpha_dbl, beta_y_dbl); - const T_partials_return log_Qn = log(Qn); + const T_partials_return beta_y = beta_val * y_val; + if (beta_y == INFTY) { + return ops_partials.build(negative_infinity()); + } + std::optional> result; + if (beta_y > alpha_val + 1.0) { + result = internal::eval_q_cf(alpha_val, + beta_y); + } else { + result + = internal::eval_q_log1m(alpha_val, beta_y); + if (!result && beta_y > 0.0) { + // Fallback to continued fraction if log1m fails + result = internal::eval_q_cf( + alpha_val, beta_y); + } + } + if (unlikely(!result)) { + return ops_partials.build(negative_infinity()); + } - P += log_Qn; + P += result->first; - if constexpr (is_any_autodiff_v) { - const T_partials_return log_y_dbl = log(y_dbl); - const T_partials_return log_beta_dbl = log(beta_dbl); - const T_partials_return log_pdf - = alpha_dbl * log_beta_dbl - lgamma(alpha_dbl) - + (alpha_dbl - 1.0) * log_y_dbl - beta_y_dbl; - const T_partials_return common_term = exp(log_pdf - log_Qn); + if constexpr (is_autodiff_v || is_autodiff_v) { + const T_partials_return log_y = log(y_val); + const T_partials_return alpha_minus_one = fma(alpha_val, log_y, -log_y); + + const T_partials_return log_pdf = alpha_val * log(beta_val) + - lgamma(alpha_val) + alpha_minus_one + - beta_y; + + const T_partials_return hazard = exp(log_pdf - result->first); // f/Q if constexpr (is_autodiff_v) { - // d/dy log(1-F(y)) = -f(y)/(1-F(y)) - partials<0>(ops_partials)[n] -= common_term; + partials<0>(ops_partials)[n] -= hazard; } if constexpr (is_autodiff_v) { - // d/dbeta log(1-F(y)) = -y*f(y)/(beta*(1-F(y))) - partials<2>(ops_partials)[n] -= y_dbl / beta_dbl * common_term; + partials<2>(ops_partials)[n] -= (y_val / beta_val) * hazard; } } - if constexpr (is_autodiff_v) { - const T_partials_return digamma_val = digamma(alpha_dbl); - const T_partials_return gamma_val = tgamma(alpha_dbl); - // d/dalpha log(1-F(y)) = grad_upper_inc_gamma / (1-F(y)) - partials<1>(ops_partials)[n] - += grad_reg_inc_gamma(alpha_dbl, beta_y_dbl, gamma_val, digamma_val) - / Qn; + partials<1>(ops_partials)[n] += result->second; } } return ops_partials.build(P); diff --git a/test/unit/math/prim/prob/gamma_lccdf_test.cpp b/test/unit/math/prim/prob/gamma_lccdf_test.cpp index 2893f2f0166..e0c84861e0c 100644 --- a/test/unit/math/prim/prob/gamma_lccdf_test.cpp +++ b/test/unit/math/prim/prob/gamma_lccdf_test.cpp @@ -66,6 +66,51 @@ TEST(ProbGamma, lccdf_small_alpha_small_y) { EXPECT_LT(result, 0.0); } +TEST(ProbGamma, lccdf_alpha_gt_30_small_y_old_code_rounds_to_zero) { + using stan::math::gamma_lccdf; + using stan::math::gamma_p; + using stan::math::gamma_q; + using stan::math::log1m; + + // For large alpha and very small y, the CCDF is extremely close to 1. + // The old implementation computed `log(gamma_q(alpha, beta * y))`, which can + // round to `log(1) == 0`. The updated implementation uses `log1m(gamma_p)`, + // which preserves the tiny negative value. + double y = 1e-8; + double alpha = 31.25; + double beta = 1.0; + + double new_val = gamma_lccdf(y, alpha, beta); + double expected = log1m(gamma_p(alpha, beta * y)); + + // Old code: log(gamma_q(alpha, beta * y)) + double old_val = std::log(gamma_q(alpha, beta * y)); + + EXPECT_EQ(old_val, 0.0); + EXPECT_LT(new_val, 0.0); + EXPECT_DOUBLE_EQ(new_val, expected); +} + +TEST(ProbGamma, lccdf_log1m_exp_lcdf_rounds_to_inf) { + using stan::math::gamma_lccdf; + using stan::math::gamma_lcdf; + using stan::math::log1m_exp; + using stan::math::negative_infinity; + + double y = 20000.0; + double alpha = 800.0; + double beta = 0.1; + + double lcdf = gamma_lcdf(y, alpha, beta); + double log1m_lcdf = log1m_exp(lcdf); + double lccdf = gamma_lccdf(y, alpha, beta); + + EXPECT_EQ(lcdf, 0.0); + EXPECT_EQ(log1m_lcdf, negative_infinity()); + EXPECT_TRUE(std::isfinite(lccdf)); + EXPECT_LT(lccdf, 0.0); +} + TEST(ProbGamma, lccdf_large_alpha_large_y) { using stan::math::gamma_lccdf; @@ -154,6 +199,29 @@ TEST(ProbGamma, lccdf_extreme_large_alpha) { EXPECT_TRUE(std::isfinite(result)); } +TEST(ProbGamma, lccdf_large_alpha_1000_beta_3) { + using stan::math::gamma_lccdf; + + // Large alpha = 1000, beta = 3 + double alpha = 1000.0; + double beta = 3.0; + + // Test various y values + std::vector y_values = {100.0, 300.0, 333.333, 400.0, 500.0}; + + for (double y : y_values) { + double result = gamma_lccdf(y, alpha, beta); + + // Result should be finite + EXPECT_TRUE(std::isfinite(result)) + << "Failed for y=" << y << ", alpha=" << alpha << ", beta=" << beta; + + // Result should be <= 0 (log of probability) + EXPECT_LE(result, 0.0) << "Positive value for y=" << y + << ", alpha=" << alpha << ", beta=" << beta; + } +} + TEST(ProbGamma, lccdf_monotonic_in_y) { using stan::math::gamma_lccdf; diff --git a/test/unit/math/rev/prob/gamma_lccdf_test.cpp b/test/unit/math/rev/prob/gamma_lccdf_test.cpp index 89347153337..d069a365578 100644 --- a/test/unit/math/rev/prob/gamma_lccdf_test.cpp +++ b/test/unit/math/rev/prob/gamma_lccdf_test.cpp @@ -231,6 +231,74 @@ TEST_F(AgradRev, ProbDistributionsGamma_lccdf_extreme_values_small) { } } +TEST_F( + AgradRev, + ProbDistributionsGammalccdf_alpha_gt_30_small_y_old_code_rounds_to_zero) { + using stan::math::gamma_lccdf; + using stan::math::gamma_p; + using stan::math::gamma_q; + using stan::math::log1m; + using stan::math::var; + + // Same comparison as the prim test, but also exercises autodiff for + // alpha > 30. + double y_d = 1e-8; + double alpha_d = 31.25; + double beta_d = 1.0; + + var y_v = y_d; + var alpha_v = alpha_d; + var beta_v = beta_d; + + var lccdf_var = gamma_lccdf(y_v, alpha_v, beta_v); + + // Old code: log(gamma_q(alpha, beta * y)) + double old_val = std::log(gamma_q(alpha_d, beta_d * y_d)); + double expected = log1m(gamma_p(alpha_d, beta_d * y_d)); + + EXPECT_EQ(old_val, 0.0); + EXPECT_LT(lccdf_var.val(), 0.0); + EXPECT_DOUBLE_EQ(lccdf_var.val(), expected); + + std::vector vars = {y_v, alpha_v, beta_v}; + std::vector grads; + lccdf_var.grad(vars, grads); + + for (size_t i = 0; i < grads.size(); ++i) { + EXPECT_FALSE(std::isnan(grads[i])) << "Gradient " << i << " is NaN"; + EXPECT_TRUE(std::isfinite(grads[i])) + << "Gradient " << i << " is not finite"; + } + + // d/dy log(CCDF) should be <= 0 (can underflow to -0) + EXPECT_LE(grads[0], 0.0); +} + +TEST_F(AgradRev, ProbDistributionsGammalccdf_log1m_exp_lcdf_rounds_to_inf) { + using stan::math::gamma_lccdf; + using stan::math::gamma_lcdf; + using stan::math::log1m_exp; + using stan::math::negative_infinity; + using stan::math::var; + + double y_d = 20000.0; + double alpha_d = 800.0; + double beta_d = 0.1; + + double lcdf = gamma_lcdf(y_d, alpha_d, beta_d); + double log1m_lcdf = log1m_exp(lcdf); + + var y_v = y_d; + var alpha_v = alpha_d; + var beta_v = beta_d; + var lccdf_var = gamma_lccdf(y_v, alpha_v, beta_v); + + EXPECT_EQ(lcdf, 0.0); + EXPECT_EQ(log1m_lcdf, negative_infinity()); + EXPECT_TRUE(std::isfinite(lccdf_var.val())); + EXPECT_LT(lccdf_var.val(), 0.0); +} + TEST_F(AgradRev, ProbDistributionsGamma_lccdf_extreme_values_large) { using stan::math::gamma_lccdf; using stan::math::var; @@ -259,6 +327,36 @@ TEST_F(AgradRev, ProbDistributionsGamma_lccdf_extreme_values_large) { } } +TEST_F(AgradRev, ProbDistributionsGammalccdf_large_alpha_1000_beta_3) { + using stan::math::gamma_lccdf; + using stan::math::var; + + // Large alpha = 1000, beta = 3 + // Note: This test only checks values, not gradients, as large alpha values + // can cause numerical issues with gradient computation + double alpha_d = 1000.0; + double beta_d = 3.0; + + // Test various y values + std::vector y_values = {100.0, 300.0, 333.333, 400.0, 500.0}; + + for (double y_d : y_values) { + var y_v = y_d; + var alpha_v = alpha_d; + var beta_v = beta_d; + + var lccdf_var = gamma_lccdf(y_v, alpha_v, beta_v); + + // Value should be finite and <= 0 + EXPECT_TRUE(std::isfinite(lccdf_var.val())) + << "Failed for y=" << y_d << ", alpha=" << alpha_d + << ", beta=" << beta_d; + EXPECT_LE(lccdf_var.val(), 0.0) + << "Positive value for y=" << y_d << ", alpha=" << alpha_d + << ", beta=" << beta_d; + } +} + TEST_F(AgradRev, ProbDistributionsGamma_lccdf_alpha_one_derivatives) { using stan::math::gamma_lccdf; using stan::math::var;