diff --git a/engine/util/sample_data.hpp b/engine/util/sample_data.hpp index 86b9b786220..ac85aae6e6d 100644 --- a/engine/util/sample_data.hpp +++ b/engine/util/sample_data.hpp @@ -13,6 +13,7 @@ #include "util/generic.hpp" #include "util/string_view.hpp" +#include "util/rng.hpp" /* Collection of statistical formulas for sequences * Note: Returns 0 for empty sequences @@ -54,7 +55,7 @@ range::value_type_t calculate_variance( const Range& r, } auto length = std::size( r ); if ( length > 1 ) - tmp /= length; + tmp /= length - 1; return tmp; } @@ -188,6 +189,27 @@ inline std::vector normalize_histogram( const std::vector& in ) return normalize_histogram( in, count ); } +template +bool is_normal( const Range& r, range::value_type_t mean, + range::value_type_t stddev ) +{ + range::value_type_t tmp {}; + range::value_type_t y_cdf {}; + + size_t length = std::size( r ); + if ( length < 2 ) + return false; + + for ( size_t i = 0; i < length; ++i ) + { + y_cdf = rng::stdnormal_cdf( ( r[ i ] - mean ) / stddev ); + tmp += ( 2 * i - 1 ) * std::log( y_cdf ); + tmp += ( 2 * ( length - i ) + 1 ) * std::log( 1 - y_cdf ); + } + + return - length - tmp / length > 0.752; +} + } // end sd namespace /* Simplest Samplest Data container. Only tracks sum and count @@ -387,12 +409,17 @@ class extended_sample_data_t : public simple_sample_data_with_min_max_t } // Analyze collected data + + // void analyze( sim_t& sim ) <- allows sample data objects to emit warnings void analyze() { sort(); analyze_basics(); analyze_variance(); create_histogram(); + + // cannot warn, just asserting for now. + assert( is_normal() ); } /* @@ -492,6 +519,21 @@ class extended_sample_data_t : public simple_sample_data_with_min_max_t base_t::min(), base_t::max() ); } + /* + * Test Normality + * Requires: Analyzed Mean, stddev, sorted + */ + bool is_normal() + { + if ( simple ) + return true; + + if ( data().empty() ) + return true; + + return count() > 10 ? statistics::is_normal( sorted_data(), _mean, std_dev ) : true; + } + void clear() { base_t::reset();