2
0
mirror of https://github.com/boostorg/math.git synced 2026-01-19 16:32:10 +00:00
Files
math/example/chi_square_std_dev_test.cpp
John Maddock 3ea5308b45 Fixed problem with chi-squared sample size estimator.
Updated test program accordingly.
Added chi-square tutorial code.


[SVN r3360]
2006-11-08 17:14:39 +00:00

419 lines
16 KiB
C++

// (C) Copyright John Maddock 2006
// Use, modification and distribution are subject to the
// Boost Software License, Version 1.0.
// (See accompanying file LICENSE_1_0.txt
// or copy at http://www.boost.org/LICENSE_1_0.txt)
#ifdef _MSC_VER
# pragma warning(disable: 4512) // assignment operator could not be generated.
# pragma warning(disable: 4510) // default constructor could not be generated.
# pragma warning(disable: 4610) // can never be instantiated - user defined constructor required.
#endif
#include <iostream>
#include <iomanip>
#include <boost/math/distributions/chi_squared.hpp>
void confidence_limits_on_variance(
double Sd, // Sample Standard Deviation
unsigned N) // Sample size
{
//
// Calculate confidence intervals for the standard deviation.
// For example if we set the confidence limit to
// 0.95, we know that if we repeat the sampling
// 100 times, then we expect that the true standard deviation
// will be between out limits on 95 occations.
// Note: this is not the same as saying a 95%
// confidence interval means that there is a 95%
// probability that the interval contains the true standard deviation.
// The interval computed from a given sample either
// contains the true mean or it does not.
// See http://www.itl.nist.gov/div898/handbook/eda/section3/eda358.htm
using namespace std;
using namespace boost::math;
// Print out general info:
cout <<
"________________________________________________\n"
"2-Sided Confidence Limits For Standard Deviation\n"
"________________________________________________\n\n";
cout << setprecision(7);
cout << setw(40) << left << "Number of Observations" << "= " << N << "\n";
cout << setw(40) << left << "Standard Deviation" << "= " << Sd << "\n";
//
// Define a table of significance/risk levels:
//
double alpha[] = { 0.5, 0.25, 0.1, 0.05, 0.01, 0.001, 0.0001, 0.00001 };
//
// Start by declaring the distribution we'll need:
//
chi_squared dist(N - 1);
//
// Print table header:
//
cout << "\n\n"
"_____________________________________________\n"
"Confidence Lower Upper\n"
" Value (%) Limit Limit\n"
"_____________________________________________\n";
//
// Now print out the data for the table rows.
//
for(unsigned i = 0; i < sizeof(alpha)/sizeof(alpha[0]); ++i)
{
// Confidence value:
cout << fixed << setprecision(3) << setw(10) << right << 100 * (1-alpha[i]);
// Calculate limits:
double lower_limit = sqrt((N - 1) * Sd * Sd / quantile(complement(dist, alpha[i] / 2)));
double upper_limit = sqrt((N - 1) * Sd * Sd / quantile(dist, alpha[i] / 2));
// Print Limits:
cout << fixed << setprecision(5) << setw(15) << right << lower_limit;
cout << fixed << setprecision(5) << setw(15) << right << upper_limit << endl;
}
cout << endl;
}
void chi_squared_test(
double Sd, // Sample std deviation
double D, // True std deviation
unsigned N, // Sample size
double alpha) // Significance level
{
//
// A Chi Squared test applied to a single set of data.
// We are testing the null hypothesis that the true
// standard deviation of the sample is D, and that any variation is down
// to chance. We can also test the alternative hypothesis
// that any difference is not down to chance.
// See http://www.itl.nist.gov/div898/handbook/eda/section3/eda358.htm
//
using namespace std;
using namespace boost::math;
// Print header:
cout <<
"______________________________________________\n"
"Chi Squared test for sample standard deviation\n"
"______________________________________________\n\n";
cout << setprecision(5);
cout << setw(55) << left << "Number of Observations" << "= " << N << "\n";
cout << setw(55) << left << "Sample Standard Deviation" << "= " << Sd << "\n";
cout << setw(55) << left << "Expected True Standard Deviation" << "= " << D << "\n\n";
//
// Now we can calculate and output some stats:
//
// test-statistic:
double t_stat = (N - 1) * (Sd / D) * (Sd / D);
cout << setw(55) << left << "Test Statistic" << "= " << t_stat << "\n";
//
// Finally define our distribution, and get the probability:
//
chi_squared dist(N - 1);
double p = cdf(dist, t_stat);
cout << setw(55) << left << "CDF of test statistic: " << "= "
<< setprecision(3) << scientific << p << "\n";
double ucv = quantile(complement(dist, alpha));
double ucv2 = quantile(complement(dist, alpha / 2));
double lcv = quantile(dist, alpha);
double lcv2 = quantile(dist, alpha / 2);
cout << setw(55) << left << "Upper Critical Value at alpha: " << "= "
<< setprecision(3) << scientific << ucv << "\n";
cout << setw(55) << left << "Upper Critical Value at alpha/2: " << "= "
<< setprecision(3) << scientific << ucv2 << "\n";
cout << setw(55) << left << "Lower Critical Value at alpha: " << "= "
<< setprecision(3) << scientific << lcv << "\n";
cout << setw(55) << left << "Lower Critical Value at alpha/2: " << "= "
<< setprecision(3) << scientific << lcv2 << "\n\n";
//
// Finally print out results of alternative hypothesis:
//
cout << setw(55) << left <<
"Results for Alternative Hypothesis and alpha" << "= "
<< setprecision(4) << fixed << alpha << "\n\n";
cout << "Alternative Hypothesis Conclusion\n";
cout << "Standard Deviation != " << setprecision(3) << fixed << D << " ";
if((ucv2 < t_stat) || (lcv2 > t_stat))
cout << "ACCEPTED\n";
else
cout << "REJECTED\n";
cout << "Standard Deviation < " << setprecision(3) << fixed << D << " ";
if(lcv > t_stat)
cout << "ACCEPTED\n";
else
cout << "REJECTED\n";
cout << "Standard Deviation > " << setprecision(3) << fixed << D << " ";
if(ucv < t_stat)
cout << "ACCEPTED\n";
else
cout << "REJECTED\n";
cout << endl << endl;
}
void chi_squared_sample_sized(
double diff, // difference from variance to detect
double variance) // true variance
{
using namespace std;
using namespace boost::math;
// Print out general info:
cout <<
"_____________________________________________________________\n"
"Estimated sample sizes required for various confidence levels\n"
"_____________________________________________________________\n\n";
cout << setprecision(5);
cout << setw(40) << left << "True Variance" << "= " << variance << "\n";
cout << setw(40) << left << "Difference to detect" << "= " << diff << "\n";
//
// Define a table of significance levels:
//
double alpha[] = { 0.5, 0.25, 0.1, 0.05, 0.01, 0.001, 0.0001, 0.00001 };
//
// Print table header:
//
cout << "\n\n"
"_______________________________________________________________\n"
"Confidence Estimated Estimated\n"
" Value (%) Sample Size Sample Size\n"
" (lower one (upper one\n"
" sided test) sided test)\n"
"_______________________________________________________________\n";
//
// Now print out the data for the table rows.
//
for(unsigned i = 0; i < sizeof(alpha)/sizeof(alpha[0]); ++i)
{
// Confidence value:
cout << fixed << setprecision(3) << setw(10) << right << 100 * (1-alpha[i]);
// calculate df for a lower single sided test:
double df = chi_squared::estimate_degrees_of_freedom(
-diff, alpha[i], alpha[i], variance);
// convert to sample size:
double size = ceil(df) + 1;
// Print size:
cout << fixed << setprecision(0) << setw(16) << right << size;
// calculate df for an upper single sided test:
df = chi_squared::estimate_degrees_of_freedom(
diff, alpha[i], alpha[i], variance);
// convert to sample size:
size = ceil(df) + 1;
// Print size:
cout << fixed << setprecision(0) << setw(16) << right << size << endl;
}
cout << endl;
}
int main()
{
//
// Run tests for Gear data
// see http://www.itl.nist.gov/div898/handbook/eda/section3/eda3581.htm
// Tests measurements of gear diameter.
//
confidence_limits_on_variance(0.6278908E-02, 100);
chi_squared_test(0.6278908E-02, 0.1, 100, 0.05);
chi_squared_sample_sized(0.1 - 0.6278908E-02, 0.1);
//
// Run tests for silicon wafer fabrication data.
// see http://www.itl.nist.gov/div898/handbook/prc/section2/prc23.htm
// A supplier of 100 ohm.cm silicon wafers claims that his fabrication
// process can produce wafers with sufficient consistency so that the
// standard deviation of resistivity for the lot does not exceed
// 10 ohm.cm. A sample of N = 10 wafers taken from the lot has a
// standard deviation of 13.97 ohm.cm
//
confidence_limits_on_variance(13.97, 10);
chi_squared_test(13.97, 10.0, 10, 0.05);
chi_squared_sample_sized(13.97 * 13.97 - 100, 100);
chi_squared_sample_sized(55, 100);
chi_squared_sample_sized(1, 100);
return 0;
}
/*
Output:
________________________________________________
2-Sided Confidence Limits For Standard Deviation
________________________________________________
Number of Observations = 100
Standard Deviation = 0.006278908
_____________________________________________
Confidence Lower Upper
Value (%) Limit Limit
_____________________________________________
50.000 0.00601 0.00662
75.000 0.00582 0.00685
90.000 0.00563 0.00712
95.000 0.00551 0.00729
99.000 0.00530 0.00766
99.900 0.00507 0.00812
99.990 0.00489 0.00855
99.999 0.00474 0.00895
______________________________________________
Chi Squared test for sample standard deviation
______________________________________________
Number of Observations = 100
Sample Standard Deviation = 0.00628
Expected True Standard Deviation = 0.10000
Test Statistic = 0.39030
CDF of test statistic: = 1.438e-099
Upper Critical Value at alpha: = 1.232e+002
Upper Critical Value at alpha/2: = 1.284e+002
Lower Critical Value at alpha: = 7.705e+001
Lower Critical Value at alpha/2: = 7.336e+001
Results for Alternative Hypothesis and alpha = 0.0500
Alternative Hypothesis Conclusion
Standard Deviation != 0.100 ACCEPTED
Standard Deviation < 0.100 ACCEPTED
Standard Deviation > 0.100 REJECTED
_____________________________________________________________
Estimated sample sizes required for various confidence levels
_____________________________________________________________
True Variance = 0.10000
Difference to detect = 0.09372
_______________________________________________________________
Confidence Estimated Estimated
Value (%) Sample Size Sample Size
(lower one (upper one
sided test) sided test)
_______________________________________________________________
50.000 2 2
75.000 2 10
90.000 4 32
95.000 5 52
99.000 8 102
99.900 13 178
99.990 18 257
99.999 23 337
________________________________________________
2-Sided Confidence Limits For Standard Deviation
________________________________________________
Number of Observations = 10
Standard Deviation = 13.9700000
_____________________________________________
Confidence Lower Upper
Value (%) Limit Limit
_____________________________________________
50.000 12.41880 17.25579
75.000 11.23084 19.74131
90.000 10.18898 22.98341
95.000 9.60906 25.50377
99.000 8.62898 31.81825
99.900 7.69466 42.51593
99.990 7.04085 55.93352
99.999 6.54517 73.00132
______________________________________________
Chi Squared test for sample standard deviation
______________________________________________
Number of Observations = 10
Sample Standard Deviation = 13.97000
Expected True Standard Deviation = 10.00000
Test Statistic = 17.56448
CDF of test statistic: = 9.594e-001
Upper Critical Value at alpha: = 1.692e+001
Upper Critical Value at alpha/2: = 1.902e+001
Lower Critical Value at alpha: = 3.325e+000
Lower Critical Value at alpha/2: = 2.700e+000
Results for Alternative Hypothesis and alpha = 0.0500
Alternative Hypothesis Conclusion
Standard Deviation != 10.000 REJECTED
Standard Deviation < 10.000 REJECTED
Standard Deviation > 10.000 ACCEPTED
_____________________________________________________________
Estimated sample sizes required for various confidence levels
_____________________________________________________________
True Variance = 100.00000
Difference to detect = 95.16090
_______________________________________________________________
Confidence Estimated Estimated
Value (%) Sample Size Sample Size
(lower one (upper one
sided test) sided test)
_______________________________________________________________
50.000 2 2
75.000 2 10
90.000 4 32
95.000 5 51
99.000 7 99
99.900 11 174
99.990 15 251
99.999 20 330
_____________________________________________________________
Estimated sample sizes required for various confidence levels
_____________________________________________________________
True Variance = 100.00000
Difference to detect = 55.00000
_______________________________________________________________
Confidence Estimated Estimated
Value (%) Sample Size Sample Size
(lower one (upper one
sided test) sided test)
_______________________________________________________________
50.000 2 2
75.000 8 21
90.000 23 71
95.000 36 115
99.000 71 228
99.900 123 401
99.990 177 580
99.999 232 762
_____________________________________________________________
Estimated sample sizes required for various confidence levels
_____________________________________________________________
True Variance = 100.00000
Difference to detect = 1.00000
_______________________________________________________________
Confidence Estimated Estimated
Value (%) Sample Size Sample Size
(lower one (upper one
sided test) sided test)
_______________________________________________________________
50.000 2 2
75.000 36033 36761
90.000 130079 132707
95.000 214283 218612
99.000 428628 437287
99.900 756333 771612
99.990 1095435 1117564
99.999 1440608 1469711
*/