-
Notifications
You must be signed in to change notification settings - Fork 256
ENH: add parameter finder for degrees or freedom for students_t distribution #1385
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: develop
Are you sure you want to change the base?
Changes from all commits
de6a562
7b015ec
a1609a9
1cf9ea2
1132241
123fdd1
24c88ba
0c8365a
1868814
b97dceb
9a26afc
eecf3d0
0308d3c
4957f2e
9c865a3
80de922
2540e5f
739df4b
ef2e0d2
a16d4bd
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||
|---|---|---|---|---|
|
|
@@ -19,10 +19,13 @@ | |||
| #include <boost/math/distributions/fwd.hpp> | ||||
| #include <boost/math/special_functions/beta.hpp> // for ibeta(a, b, x). | ||||
| #include <boost/math/special_functions/digamma.hpp> | ||||
| #include <boost/math/special_functions/relative_difference.hpp> | ||||
| #include <boost/math/distributions/complement.hpp> | ||||
| #include <boost/math/distributions/detail/common_error_handling.hpp> | ||||
| #include <boost/math/distributions/normal.hpp> | ||||
| #include <boost/math/distributions/normal.hpp> | ||||
| #include <boost/math/distributions/cauchy.hpp> | ||||
| #include <boost/math/policies/policy.hpp> | ||||
| #include <iostream> | ||||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||
|
|
||||
| #ifdef _MSC_VER | ||||
| # pragma warning(push) | ||||
|
|
@@ -58,6 +61,10 @@ class students_t_distribution | |||
| RealType sd, | ||||
| RealType hint = 100); | ||||
|
|
||||
| BOOST_MATH_GPU_ENABLED static RealType invert_probability_with_respect_to_degrees_of_freedom( | ||||
| RealType x, | ||||
| RealType p); | ||||
|
|
||||
| private: | ||||
| // Data member: | ||||
| RealType df_; // degrees of freedom is a real number > 0 or +infinity. | ||||
|
|
@@ -281,6 +288,26 @@ BOOST_MATH_GPU_ENABLED inline RealType quantile(const complemented2_type<student | |||
| // Parameter estimation follows: | ||||
| // | ||||
| namespace detail{ | ||||
|
|
||||
| // Checks if the CDF of a given distribution at x matches p within 4 epsilon. | ||||
| // Returns true and sets result=df if so, otherwise returns false. | ||||
| template <class Distribution, class RealType> | ||||
| BOOST_MATH_GPU_ENABLED bool analytical_df_if_cdf_matches(const Distribution& dist, RealType x, RealType p, RealType df, RealType& result) | ||||
| { | ||||
| RealType cdf_val = cdf(dist, x); | ||||
| RealType epsilon_diff = epsilon_difference(cdf_val, p); | ||||
| if (epsilon_diff < RealType(4.)) | ||||
| { | ||||
| result = df; | ||||
| return true; | ||||
| } | ||||
| return false; | ||||
| } | ||||
|
|
||||
| // Minimum degrees-of-freedom used as the warm-start fallback when the | ||||
| // Edgeworth approximation yields no valid positive root or is inaccurate | ||||
| constexpr double df_hint_fallback = 0.01; | ||||
|
|
||||
| // | ||||
| // Functors for finding degrees of freedom: | ||||
| // | ||||
|
|
@@ -308,6 +335,94 @@ struct sample_size_func | |||
| RealType alpha, beta, ratio; | ||||
| }; | ||||
|
|
||||
| template <class RealType, class Policy> | ||||
| struct cdf_to_df_func | ||||
| { | ||||
| BOOST_MATH_GPU_ENABLED cdf_to_df_func(RealType x_val, RealType p_val, bool c) | ||||
| : x(x_val), p(p_val), comp(c) {} | ||||
|
|
||||
| BOOST_MATH_GPU_ENABLED RealType operator()(const RealType& df) | ||||
| { | ||||
| students_t_distribution<RealType, Policy> t(df); | ||||
| return comp ? | ||||
| RealType(p - cdf(complement(t, x))) | ||||
| : RealType(cdf(t, x) - p); | ||||
| } | ||||
| RealType x, p; | ||||
| bool comp; | ||||
| }; | ||||
|
|
||||
| // | ||||
| // Shared root-finding helper used by both find_degrees_of_freedom and | ||||
| // invert_probability_with_respect_to_degrees_of_freedom. | ||||
| // | ||||
| template <class RealType, class Policy, class Func> | ||||
| BOOST_MATH_GPU_ENABLED RealType solve_for_degrees_of_freedom( | ||||
| Func f, | ||||
| RealType hint, | ||||
| bool rising, | ||||
| const char* function, | ||||
| Policy const&) | ||||
| { | ||||
| tools::eps_tolerance<RealType> tol(policies::digits<RealType, Policy>()); | ||||
| boost::math::uintmax_t max_iter = policies::get_max_root_iterations<Policy>(); | ||||
| boost::math::pair<RealType, RealType> r = tools::bracket_and_solve_root( | ||||
| f, hint, RealType(2), rising, tol, max_iter, Policy()); | ||||
| RealType result = r.first + (r.second - r.first) / 2; | ||||
| if (max_iter >= policies::get_max_root_iterations<Policy>()) | ||||
| { | ||||
| return policies::raise_evaluation_error<RealType>(function, // LCOV_EXCL_LINE | ||||
| "Unable to locate solution in a reasonable time: either there is no answer to " // LCOV_EXCL_LINE | ||||
| "the degrees of freedom or the answer is infinite. Current best guess is %1%", // LCOV_EXCL_LINE | ||||
| result, Policy()); // LCOV_EXCL_LINE | ||||
| } | ||||
| return result; | ||||
| } | ||||
|
|
||||
| // | ||||
| // Edgeworth warm-start for invert_probability_with_respect_to_degrees_of_freedom. | ||||
| // On success writes a df estimate into 'result' and returns true. | ||||
| // Returns false when no positive root is found; 'result' is left unchanged. | ||||
| // | ||||
| template <class RealType, class Policy> | ||||
| BOOST_MATH_GPU_ENABLED bool approximate_df_with_edgeworth_expansion(RealType x, RealType p, RealType& result) | ||||
| { | ||||
| BOOST_MATH_STD_USING | ||||
| // F(x; nu) ~ cdf_normal(x) - pdf_normal(x)*(x + x^3)/(4*nu) + pdf_normal(x)*(3x + 5x^3 + 7x^5 - 3x^7)/(96*nu^2) | ||||
| // Substituting u = 1/nu and setting F(x; nu) = p gives b*u^2 - a*u + c = 0, where: | ||||
| // a = pdf_normal(x) * (x + x^3) / 4 | ||||
| // b = pdf_normal(x) * (3x + 5x^3 + 7x^5 - 3x^7) / 96 | ||||
| // c = cdf_normal(x) - p | ||||
| normal_distribution<RealType, Policy> std_normal(0, 1); | ||||
| RealType pdf_val = pdf(std_normal, x); | ||||
| RealType cdf_val = cdf(std_normal, x); | ||||
|
|
||||
| RealType x2 = x * x; | ||||
| RealType x3 = x2 * x; | ||||
| RealType x5 = x3 * x2; | ||||
| RealType x7 = x5 * x2; | ||||
|
|
||||
| RealType a = pdf_val * (x + x3) / 4; | ||||
| RealType b = pdf_val * (3 * x + 5 * x3 + 7 * x5 - 3 * x7) / 96; | ||||
| RealType c = cdf_val - p; | ||||
|
|
||||
| RealType discriminant = a * a - 4 * b * c; | ||||
| if (discriminant >= 0 && b != 0) | ||||
| { | ||||
| RealType sqrt_disc = sqrt(discriminant); | ||||
| // The two roots of b*u^2 - a*u + c = 0. Pick the smallest positive u (= largest df = 1/u). | ||||
| RealType u = (a - sqrt_disc) / (2 * b); | ||||
| if (u <= 0) | ||||
| u = (a + sqrt_disc) / (2 * b); | ||||
| if (u > 0) | ||||
| { | ||||
| result = 1 / u; | ||||
| return true; | ||||
| } | ||||
| } | ||||
| return false; | ||||
| } | ||||
|
|
||||
| } // namespace detail | ||||
|
|
||||
| template <class RealType, class Policy> | ||||
|
|
@@ -332,16 +447,62 @@ BOOST_MATH_GPU_ENABLED RealType students_t_distribution<RealType, Policy>::find_ | |||
| hint = 1; | ||||
|
|
||||
| detail::sample_size_func<RealType, Policy> f(alpha, beta, sd, difference_from_mean); | ||||
| tools::eps_tolerance<RealType> tol(policies::digits<RealType, Policy>()); | ||||
| boost::math::uintmax_t max_iter = policies::get_max_root_iterations<Policy>(); | ||||
| boost::math::pair<RealType, RealType> r = tools::bracket_and_solve_root(f, hint, RealType(2), false, tol, max_iter, Policy()); | ||||
| RealType result = r.first + (r.second - r.first) / 2; | ||||
| if(max_iter >= policies::get_max_root_iterations<Policy>()) | ||||
| return detail::solve_for_degrees_of_freedom(f, hint, false, function, Policy()); | ||||
| } | ||||
|
|
||||
| template <class RealType, class Policy> | ||||
| BOOST_MATH_GPU_ENABLED RealType students_t_distribution<RealType, Policy>::invert_probability_with_respect_to_degrees_of_freedom( | ||||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We already have a function for this here:
However, you have a better initial guess to the root finder which would be a very welcome addition!
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I found this function but was not sure how to map the inversion problem we have here (finding degrees of freedom given a probability and a quantile) to this one arising from sample size calculation. The signatures are:
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah OK, slightly different problems, I would still overload the existing function though for consistency and call it: find_degrees_of_freedom(RealType t_stat, RealType p) |
||||
| RealType x, | ||||
| RealType p) | ||||
| { | ||||
| BOOST_MATH_STD_USING // for ADL of std functions | ||||
| constexpr auto function = "boost::math::students_t_distribution<%1%>::invert_probability_with_respect_to_degrees_of_freedom"; | ||||
| // | ||||
| // Check for domain errors: | ||||
| // | ||||
| RealType error_result; | ||||
| if (false == detail::check_probability(function, p, &error_result, Policy())) | ||||
| return error_result; | ||||
|
|
||||
| if (x == 0) | ||||
| { | ||||
| return policies::raise_evaluation_error<RealType>(function, "Unable to locate solution in a reasonable time: either there is no answer to how many degrees of freedom are required" // LCOV_EXCL_LINE | ||||
| " or the answer is infinite. Current best guess is %1%", result, Policy()); // LCOV_EXCL_LINE | ||||
| // CDF(0; df) = 0.5 for all df; only solvable when p == 0.5. | ||||
| if (p == static_cast<RealType>(0.5)) | ||||
| return policies::raise_overflow_error<RealType>(function, 0, Policy()); | ||||
| return policies::raise_domain_error<RealType>( | ||||
| function, | ||||
| "No degrees of freedom can satisfy CDF(0; df) == %1% (must be 0.5).", | ||||
| p, Policy()); | ||||
| } | ||||
| return result; | ||||
|
|
||||
| // Analytical cases: df = infinity (normal), df = 1 (cauchy) | ||||
| RealType analytical_df; | ||||
| boost::math::normal_distribution<RealType, Policy> norm(0, 1); | ||||
| if (detail::analytical_df_if_cdf_matches(norm, x, p, std::numeric_limits<RealType>::infinity(), analytical_df)) | ||||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm really nit picking here, but there's no need for the last two parameters to analytical_df_if_cdf_matches, instead I would tend to write this as: Which also removes the need for the Except, in this particular case, we have one more wrinkle in that strictly speaking type RealType may not have an infinity, and in any case, this is an overflow error so: And the next pair of lines become: and the function simplifies to: |
||||
| return analytical_df; | ||||
| boost::math::cauchy_distribution<RealType, Policy> cauchy(0, 1); | ||||
| if (detail::analytical_df_if_cdf_matches(cauchy, x, p, static_cast<RealType>(1), analytical_df)) | ||||
| return analytical_df; | ||||
|
|
||||
| // Edgeworth warm start: compute a df estimate; fall back to df_hint_fallback if it fails | ||||
| // or is too inaccurate | ||||
| RealType hint = static_cast<RealType>(detail::df_hint_fallback); | ||||
| if (detail::approximate_df_with_edgeworth_expansion<RealType, Policy>(x, p, hint)) | ||||
| { | ||||
| // Check that approximation is at least somewhat close; | ||||
| // for small degrees of freedom it does not fail but is very inaccurate. | ||||
| RealType relative_error_threshold = static_cast<RealType>(0.1); | ||||
| students_t_distribution<RealType, Policy> t_approx(hint); | ||||
| RealType p_approx = cdf(t_approx, x); | ||||
| if (relative_difference(p_approx, p) > relative_error_threshold) | ||||
| hint = static_cast<RealType>(detail::df_hint_fallback); | ||||
| } | ||||
| // Root-find on f(df) = CDF(x; df) - p. | ||||
| // CDF(x; df) is strictly increasing in df for x > 0, decreasing for x < 0. | ||||
| // Use the smaller of p and q=1-p to avoid cancellation near 1. | ||||
| RealType q = 1 - p; | ||||
| detail::cdf_to_df_func<RealType, Policy> f(x, p < q ? p : q, p < q ? false : true); | ||||
| return detail::solve_for_degrees_of_freedom(f, hint, x > 0, function, Policy()); | ||||
| } | ||||
|
|
||||
| template <class RealType, class Policy> | ||||
|
|
||||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can we please rename this to find_degrees_of_freedom for consistency? IMO It's fine to overload the existing function.