| // find_mean_and_sd_normal.cpp |
| |
| // Copyright Paul A. Bristow 2007, 2010. |
| |
| // Use, modification and distribution are subject to the |
| // Boost Software License, Version 1.0. |
| // (See accompanying file LICENSE_1_0.txt |
| // or copy at http://www.boost.org/LICENSE_1_0.txt) |
| |
| // Example of finding mean or sd for normal distribution. |
| |
| // Note that this file contains Quickbook mark-up as well as code |
| // and comments, don't change any of the special comment mark-ups! |
| |
| //[normal_std |
| /*` |
| First we need some includes to access the normal distribution, |
| the algorithms to find location and scale |
| (and some std output of course). |
| */ |
| |
| #include <boost/math/distributions/normal.hpp> // for normal_distribution |
| using boost::math::normal; // typedef provides default type is double. |
| #include <boost/math/distributions/cauchy.hpp> // for cauchy_distribution |
| using boost::math::cauchy; // typedef provides default type is double. |
| #include <boost/math/distributions/find_location.hpp> |
| using boost::math::find_location; |
| #include <boost/math/distributions/find_scale.hpp> |
| using boost::math::find_scale; |
| using boost::math::complement; |
| using boost::math::policies::policy; |
| |
| #include <iostream> |
| using std::cout; using std::endl; using std::left; using std::showpoint; using std::noshowpoint; |
| #include <iomanip> |
| using std::setw; using std::setprecision; |
| #include <limits> |
| using std::numeric_limits; |
| #include <stdexcept> |
| using std::exception; |
| //] [/normal_std Quickbook] |
| |
| int main() |
| { |
| cout << "Find_location (mean) and find_scale (standard deviation) examples." << endl; |
| try |
| { |
| |
| //[normal_find_location_and_scale_eg |
| |
| /*` |
| [h4 Using find_location and find_scale to meet dispensing and measurement specifications] |
| |
| Consider an example from K Krishnamoorthy, |
| Handbook of Statistical Distributions with Applications, |
| ISBN 1-58488-635-8, (2006) p 126, example 10.3.7. |
| |
| "A machine is set to pack 3 kg of ground beef per pack. |
| Over a long period of time it is found that the average packed was 3 kg |
| with a standard deviation of 0.1 kg. |
| Assume the packing is normally distributed." |
| |
| We start by constructing a normal distribution with the given parameters: |
| */ |
| |
| double mean = 3.; // kg |
| double standard_deviation = 0.1; // kg |
| normal packs(mean, standard_deviation); |
| /*`We can then find the fraction (or %) of packages that weigh more than 3.1 kg. |
| */ |
| |
| double max_weight = 3.1; // kg |
| cout << "Percentage of packs > " << max_weight << " is " |
| << cdf(complement(packs, max_weight)) * 100. << endl; // P(X > 3.1) |
| |
| /*`We might want to ensure that 95% of packs are over a minimum weight specification, |
| then we want the value of the mean such that P(X < 2.9) = 0.05. |
| |
| Using the mean of 3 kg, we can estimate |
| the fraction of packs that fail to meet the specification of 2.9 kg. |
| */ |
| |
| double minimum_weight = 2.9; |
| cout <<"Fraction of packs <= " << minimum_weight << " with a mean of " << mean |
| << " is " << cdf(complement(packs, minimum_weight)) << endl; |
| // fraction of packs <= 2.9 with a mean of 3 is 0.841345 |
| |
| /*`This is 0.84 - more than the target fraction of 0.95. |
| If we want 95% to be over the minimum weight, |
| what should we set the mean weight to be? |
| |
| Using the KK StatCalc program supplied with the book and the method given on page 126 gives 3.06449. |
| |
| We can confirm this by constructing a new distribution which we call 'xpacks' |
| with a safety margin mean of 3.06449 thus: |
| */ |
| double over_mean = 3.06449; |
| normal xpacks(over_mean, standard_deviation); |
| cout << "Fraction of packs >= " << minimum_weight |
| << " with a mean of " << xpacks.mean() |
| << " is " << cdf(complement(xpacks, minimum_weight)) << endl; |
| // fraction of packs >= 2.9 with a mean of 3.06449 is 0.950005 |
| |
| /*`Using this Math Toolkit, we can calculate the required mean directly thus: |
| */ |
| double under_fraction = 0.05; // so 95% are above the minimum weight mean - sd = 2.9 |
| double low_limit = standard_deviation; |
| double offset = mean - low_limit - quantile(packs, under_fraction); |
| double nominal_mean = mean + offset; |
| // mean + (mean - low_limit - quantile(packs, under_fraction)); |
| |
| normal nominal_packs(nominal_mean, standard_deviation); |
| cout << "Setting the packer to " << nominal_mean << " will mean that " |
| << "fraction of packs >= " << minimum_weight |
| << " is " << cdf(complement(nominal_packs, minimum_weight)) << endl; |
| // Setting the packer to 3.06449 will mean that fraction of packs >= 2.9 is 0.95 |
| |
| /*` |
| This calculation is generalized as the free function called |
| [link math_toolkit.dist.dist_ref.dist_algorithms find_location]. |
| |
| To use this we will need to |
| */ |
| |
| #include <boost/math/distributions/find_location.hpp> |
| using boost::math::find_location; |
| /*`and then use find_location function to find safe_mean, |
| & construct a new normal distribution called 'goodpacks'. |
| */ |
| double safe_mean = find_location<normal>(minimum_weight, under_fraction, standard_deviation); |
| normal good_packs(safe_mean, standard_deviation); |
| /*`with the same confirmation as before: |
| */ |
| cout << "Setting the packer to " << nominal_mean << " will mean that " |
| << "fraction of packs >= " << minimum_weight |
| << " is " << cdf(complement(good_packs, minimum_weight)) << endl; |
| // Setting the packer to 3.06449 will mean that fraction of packs >= 2.9 is 0.95 |
| |
| /*` |
| [h4 Using Cauchy-Lorentz instead of normal distribution] |
| |
| After examining the weight distribution of a large number of packs, we might decide that, |
| after all, the assumption of a normal distribution is not really justified. |
| We might find that the fit is better to a __cauchy_distrib. |
| This distribution has wider 'wings', so that whereas most of the values |
| are closer to the mean than the normal, there are also more values than 'normal' |
| that lie further from the mean than the normal. |
| |
| This might happen because a larger than normal lump of meat is either included or excluded. |
| |
| We first create a __cauchy_distrib with the original mean and standard deviation, |
| and estimate the fraction that lie below our minimum weight specification. |
| */ |
| |
| cauchy cpacks(mean, standard_deviation); |
| cout << "Cauchy Setting the packer to " << mean << " will mean that " |
| << "fraction of packs >= " << minimum_weight |
| << " is " << cdf(complement(cpacks, minimum_weight)) << endl; |
| // Cauchy Setting the packer to 3 will mean that fraction of packs >= 2.9 is 0.75 |
| |
| /*`Note that far fewer of the packs meet the specification, only 75% instead of 95%. |
| Now we can repeat the find_location, using the cauchy distribution as template parameter, |
| in place of the normal used above. |
| */ |
| |
| double lc = find_location<cauchy>(minimum_weight, under_fraction, standard_deviation); |
| cout << "find_location<cauchy>(minimum_weight, over fraction, standard_deviation); " << lc << endl; |
| // find_location<cauchy>(minimum_weight, over fraction, packs.standard_deviation()); 3.53138 |
| /*`Note that the safe_mean setting needs to be much higher, 3.53138 instead of 3.06449, |
| so we will make rather less profit. |
| |
| And again confirm that the fraction meeting specification is as expected. |
| */ |
| cauchy goodcpacks(lc, standard_deviation); |
| cout << "Cauchy Setting the packer to " << lc << " will mean that " |
| << "fraction of packs >= " << minimum_weight |
| << " is " << cdf(complement(goodcpacks, minimum_weight)) << endl; |
| // Cauchy Setting the packer to 3.53138 will mean that fraction of packs >= 2.9 is 0.95 |
| |
| /*`Finally we could estimate the effect of a much tighter specification, |
| that 99% of packs met the specification. |
| */ |
| |
| cout << "Cauchy Setting the packer to " |
| << find_location<cauchy>(minimum_weight, 0.99, standard_deviation) |
| << " will mean that " |
| << "fraction of packs >= " << minimum_weight |
| << " is " << cdf(complement(goodcpacks, minimum_weight)) << endl; |
| |
| /*`Setting the packer to 3.13263 will mean that fraction of packs >= 2.9 is 0.99, |
| but will more than double the mean loss from 0.0644 to 0.133 kg per pack. |
| |
| Of course, this calculation is not limited to packs of meat, it applies to dispensing anything, |
| and it also applies to a 'virtual' material like any measurement. |
| |
| The only caveat is that the calculation assumes that the standard deviation (scale) is known with |
| a reasonably low uncertainty, something that is not so easy to ensure in practice. |
| And that the distribution is well defined, __normal_distrib or __cauchy_distrib, or some other. |
| |
| If one is simply dispensing a very large number of packs, |
| then it may be feasible to measure the weight of hundreds or thousands of packs. |
| With a healthy 'degrees of freedom', the confidence intervals for the standard deviation |
| are not too wide, typically about + and - 10% for hundreds of observations. |
| |
| For other applications, where it is more difficult or expensive to make many observations, |
| the confidence intervals are depressingly wide. |
| |
| See [link math_toolkit.dist.stat_tut.weg.cs_eg.chi_sq_intervals Confidence Intervals on the standard deviation] |
| for a worked example |
| [@../../../example/chi_square_std_dev_test.cpp chi_square_std_dev_test.cpp] |
| of estimating these intervals. |
| |
| |
| [h4 Changing the scale or standard deviation] |
| |
| Alternatively, we could invest in a better (more precise) packer |
| (or measuring device) with a lower standard deviation, or scale. |
| |
| This might cost more, but would reduce the amount we have to 'give away' |
| in order to meet the specification. |
| |
| To estimate how much better (how much smaller standard deviation) it would have to be, |
| we need to get the 5% quantile to be located at the under_weight limit, 2.9 |
| */ |
| double p = 0.05; // wanted p th quantile. |
| cout << "Quantile of " << p << " = " << quantile(packs, p) |
| << ", mean = " << packs.mean() << ", sd = " << packs.standard_deviation() << endl; |
| /*` |
| Quantile of 0.05 = 2.83551, mean = 3, sd = 0.1 |
| |
| With the current packer (mean = 3, sd = 0.1), the 5% quantile is at 2.8551 kg, |
| a little below our target of 2.9 kg. |
| So we know that the standard deviation is going to have to be smaller. |
| |
| Let's start by guessing that it (now 0.1) needs to be halved, to a standard deviation of 0.05 kg. |
| */ |
| normal pack05(mean, 0.05); |
| cout << "Quantile of " << p << " = " << quantile(pack05, p) |
| << ", mean = " << pack05.mean() << ", sd = " << pack05.standard_deviation() << endl; |
| // Quantile of 0.05 = 2.91776, mean = 3, sd = 0.05 |
| |
| cout <<"Fraction of packs >= " << minimum_weight << " with a mean of " << mean |
| << " and standard deviation of " << pack05.standard_deviation() |
| << " is " << cdf(complement(pack05, minimum_weight)) << endl; |
| // Fraction of packs >= 2.9 with a mean of 3 and standard deviation of 0.05 is 0.97725 |
| /*` |
| So 0.05 was quite a good guess, but we are a little over the 2.9 target, |
| so the standard deviation could be a tiny bit more. So we could do some |
| more guessing to get closer, say by increasing standard deviation to 0.06 kg, |
| constructing another new distribution called pack06. |
| */ |
| normal pack06(mean, 0.06); |
| cout << "Quantile of " << p << " = " << quantile(pack06, p) |
| << ", mean = " << pack06.mean() << ", sd = " << pack06.standard_deviation() << endl; |
| // Quantile of 0.05 = 2.90131, mean = 3, sd = 0.06 |
| |
| cout <<"Fraction of packs >= " << minimum_weight << " with a mean of " << mean |
| << " and standard deviation of " << pack06.standard_deviation() |
| << " is " << cdf(complement(pack06, minimum_weight)) << endl; |
| // Fraction of packs >= 2.9 with a mean of 3 and standard deviation of 0.06 is 0.95221 |
| /*` |
| Now we are getting really close, but to do the job properly, |
| we might need to use root finding method, for example the tools provided, |
| and used elsewhere, in the Math Toolkit, see |
| [link math_toolkit.toolkit.internals1.roots2 Root Finding Without Derivatives]. |
| |
| But in this (normal) distribution case, we can and should be even smarter |
| and make a direct calculation. |
| */ |
| |
| /*`Our required limit is minimum_weight = 2.9 kg, often called the random variate z. |
| For a standard normal distribution, then probability p = N((minimum_weight - mean) / sd). |
| |
| We want to find the standard deviation that would be required to meet this limit, |
| so that the p th quantile is located at z (minimum_weight). |
| In this case, the 0.05 (5%) quantile is at 2.9 kg pack weight, when the mean is 3 kg, |
| ensuring that 0.95 (95%) of packs are above the minimum weight. |
| |
| Rearranging, we can directly calculate the required standard deviation: |
| */ |
| normal N01; // standard normal distribution with meamn zero and unit standard deviation. |
| p = 0.05; |
| double qp = quantile(N01, p); |
| double sd95 = (minimum_weight - mean) / qp; |
| |
| cout << "For the "<< p << "th quantile to be located at " |
| << minimum_weight << ", would need a standard deviation of " << sd95 << endl; |
| // For the 0.05th quantile to be located at 2.9, would need a standard deviation of 0.0607957 |
| |
| /*`We can now construct a new (normal) distribution pack95 for the 'better' packer, |
| and check that our distribution will meet the specification. |
| */ |
| |
| normal pack95(mean, sd95); |
| cout <<"Fraction of packs >= " << minimum_weight << " with a mean of " << mean |
| << " and standard deviation of " << pack95.standard_deviation() |
| << " is " << cdf(complement(pack95, minimum_weight)) << endl; |
| // Fraction of packs >= 2.9 with a mean of 3 and standard deviation of 0.0607957 is 0.95 |
| |
| /*`This calculation is generalized in the free function find_scale, |
| as shown below, giving the same standard deviation. |
| */ |
| double ss = find_scale<normal>(minimum_weight, under_fraction, packs.mean()); |
| cout << "find_scale<normal>(minimum_weight, under_fraction, packs.mean()); " << ss << endl; |
| // find_scale<normal>(minimum_weight, under_fraction, packs.mean()); 0.0607957 |
| |
| /*`If we had defined an over_fraction, or percentage that must pass specification |
| */ |
| double over_fraction = 0.95; |
| /*`And (wrongly) written |
| |
| double sso = find_scale<normal>(minimum_weight, over_fraction, packs.mean()); |
| |
| With the default policy, we would get a message like |
| |
| [pre |
| Message from thrown exception was: |
| Error in function boost::math::find_scale<Dist, Policy>(double, double, double, Policy): |
| Computed scale (-0.060795683191176959) is <= 0! Was the complement intended? |
| ] |
| |
| But this would return a *negative* standard deviation - obviously impossible. |
| The probability should be 1 - over_fraction, not over_fraction, thus: |
| */ |
| |
| double ss1o = find_scale<normal>(minimum_weight, 1 - over_fraction, packs.mean()); |
| cout << "find_scale<normal>(minimum_weight, under_fraction, packs.mean()); " << ss1o << endl; |
| // find_scale<normal>(minimum_weight, under_fraction, packs.mean()); 0.0607957 |
| |
| /*`But notice that using '1 - over_fraction' - will lead to a |
| [link why_complements loss of accuracy, especially if over_fraction was close to unity.] |
| In this (very common) case, we should instead use the __complements, |
| giving the most accurate result. |
| */ |
| |
| double ssc = find_scale<normal>(complement(minimum_weight, over_fraction, packs.mean())); |
| cout << "find_scale<normal>(complement(minimum_weight, over_fraction, packs.mean())); " << ssc << endl; |
| // find_scale<normal>(complement(minimum_weight, over_fraction, packs.mean())); 0.0607957 |
| |
| /*`Note that our guess of 0.06 was close to the accurate value of 0.060795683191176959. |
| |
| We can again confirm our prediction thus: |
| */ |
| |
| normal pack95c(mean, ssc); |
| cout <<"Fraction of packs >= " << minimum_weight << " with a mean of " << mean |
| << " and standard deviation of " << pack95c.standard_deviation() |
| << " is " << cdf(complement(pack95c, minimum_weight)) << endl; |
| // Fraction of packs >= 2.9 with a mean of 3 and standard deviation of 0.0607957 is 0.95 |
| |
| /*`Notice that these two deceptively simple questions: |
| |
| * Do we over-fill to make sure we meet a minimum specification (or under-fill to avoid an overdose)? |
| |
| and/or |
| |
| * Do we measure better? |
| |
| are actually extremely common. |
| |
| The weight of beef might be replaced by a measurement of more or less anything, |
| from drug tablet content, Apollo landing rocket firing, X-ray treatment doses... |
| |
| The scale can be variation in dispensing or uncertainty in measurement. |
| */ |
| //] [/normal_find_location_and_scale_eg Quickbook end] |
| |
| } |
| catch(const exception& e) |
| { // Always useful to include try & catch blocks because default policies |
| // are to throw exceptions on arguments that cause errors like underflow, overflow. |
| // Lacking try & catch blocks, the program will abort without a message below, |
| // which may give some helpful clues as to the cause of the exception. |
| cout << |
| "\n""Message from thrown exception was:\n " << e.what() << endl; |
| } |
| return 0; |
| } // int main() |
| |
| |
| /* |
| |
| Output is: |
| |
| //[normal_find_location_and_scale_output |
| |
| Find_location (mean) and find_scale (standard deviation) examples. |
| Percentage of packs > 3.1 is 15.8655 |
| Fraction of packs <= 2.9 with a mean of 3 is 0.841345 |
| Fraction of packs >= 2.9 with a mean of 3.06449 is 0.950005 |
| Setting the packer to 3.06449 will mean that fraction of packs >= 2.9 is 0.95 |
| Setting the packer to 3.06449 will mean that fraction of packs >= 2.9 is 0.95 |
| Cauchy Setting the packer to 3 will mean that fraction of packs >= 2.9 is 0.75 |
| find_location<cauchy>(minimum_weight, over fraction, standard_deviation); 3.53138 |
| Cauchy Setting the packer to 3.53138 will mean that fraction of packs >= 2.9 is 0.95 |
| Cauchy Setting the packer to -0.282052 will mean that fraction of packs >= 2.9 is 0.95 |
| Quantile of 0.05 = 2.83551, mean = 3, sd = 0.1 |
| Quantile of 0.05 = 2.91776, mean = 3, sd = 0.05 |
| Fraction of packs >= 2.9 with a mean of 3 and standard deviation of 0.05 is 0.97725 |
| Quantile of 0.05 = 2.90131, mean = 3, sd = 0.06 |
| Fraction of packs >= 2.9 with a mean of 3 and standard deviation of 0.06 is 0.95221 |
| For the 0.05th quantile to be located at 2.9, would need a standard deviation of 0.0607957 |
| Fraction of packs >= 2.9 with a mean of 3 and standard deviation of 0.0607957 is 0.95 |
| find_scale<normal>(minimum_weight, under_fraction, packs.mean()); 0.0607957 |
| find_scale<normal>(minimum_weight, under_fraction, packs.mean()); 0.0607957 |
| find_scale<normal>(complement(minimum_weight, over_fraction, packs.mean())); 0.0607957 |
| Fraction of packs >= 2.9 with a mean of 3 and standard deviation of 0.0607957 is 0.95 |
| |
| //] [/normal_find_location_and_scale_eg_output] |
| |
| */ |
| |
| |
| |