Mercurial > repos > bgruening > bg_statistical_hypothesis_testing
comparison statistical_hypothesis_testing.xml @ 0:a3d8cadaf060 draft default tip
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/statistics commit 7c5002672919ca1e5eacacb835a4ce66ffa19656
author | bgruening |
---|---|
date | Mon, 21 Nov 2022 18:07:45 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:a3d8cadaf060 |
---|---|
1 <tool id="bg_statistical_hypothesis_testing" name="Statistical hypothesis testing" version="0.3"> | |
2 <description>computes several descriptive statistics</description> | |
3 <macros> | |
4 <import>macros.xml</import> | |
5 </macros> | |
6 <expand macro="requirements" /> | |
7 <command detect_errors="exit_code"><![CDATA[ | |
8 python '$__tool_directory__/statistical_hypothesis_testing.py' | |
9 --infile '${infile}' | |
10 --outfile '${outfile}' | |
11 --test_id '${test_methods.test_methods_opts}' | |
12 #if str($test_methods.test_methods_opts) == "describe" or str($test_methods.test_methods_opts) == "mode" or str($test_methods.test_methods_opts) == "normaltest" or str($test_methods.test_methods_opts) == "kurtosistest" or str($test_methods.test_methods_opts) == "skewtest" or str($test_methods.test_methods_opts) == "nanmean" or str($test_methods.test_methods_opts) == "nanmedian" or str($test_methods.test_methods_opts) == "variation" or str($test_methods.test_methods_opts) == "itemfreq" or str($test_methods.test_methods_opts) == "kurtosistest" or str($test_methods.test_methods_opts) == "skewtest" or str($test_methods.test_methods_opts) == "nanmean" or str($test_methods.test_methods_opts) == "nanmedian" or str($test_methods.test_methods_opts) == "variation" or str($test_methods.test_methods_opts) == "tiecorrect": | |
13 --sample_one_cols '${test_methods.sample_one_cols}' | |
14 #elif str($test_methods.test_methods_opts) == "gmean" or str($test_methods.test_methods_opts) == "hmean": | |
15 --sample_one_cols '${test_methods.sample_one_cols}' | |
16 --dtype "${test_methods.dtype}" | |
17 #elif str($test_methods.test_methods_opts) == "anderson": | |
18 --sample_one_cols "${test_methods.sample_one_cols}" | |
19 --dist "${test_methods.dist}" | |
20 #elif str($test_methods.test_methods_opts) == "binom_test": | |
21 --sample_one_cols "${test_methods.sample_one_cols}" | |
22 --n "${test_methods.n}" | |
23 --p "${test_methods.p}" | |
24 #elif str($test_methods.test_methods_opts) == "kurtosis": | |
25 --sample_one_cols "${test_methods.sample_one_cols}" | |
26 --axis "${test_methods.axis}" | |
27 $test_methods.fisher | |
28 $test_methods.bias | |
29 #elif str($test_methods.test_methods_opts) == "moment": | |
30 --sample_one_cols "${test_methods.sample_one_cols}" | |
31 --n "${test_methods.n}" | |
32 #elif str($test_methods.test_methods_opts) == "bayes_mvs": | |
33 --sample_one_cols "${test_methods.sample_one_cols}" | |
34 --alpha "${test_methods.alpha}" | |
35 #elif str($test_methods.test_methods_opts) == "percentileofscore": | |
36 --sample_one_cols "${test_methods.sample_one_cols}" | |
37 --score "${test_methods.score}" | |
38 --kind "${test_methods.kind}" | |
39 #elif str($test_methods.test_methods_opts) == "sigmaclip": | |
40 --sample_one_cols "${test_methods.sample_one_cols}" | |
41 --n "${test_methods.n}" | |
42 --m "${test_methods.m}" | |
43 #elif str($test_methods.test_methods_opts) == "chi2_contingency": | |
44 --sample_one_cols "${test_methods.sample_one_cols}" | |
45 $test_methods.correction | |
46 #if str($test_methods.lambda_).strip(): | |
47 --lambda_ "${test_methods.lambda_}" | |
48 #end if | |
49 #elif str($test_methods.test_methods_opts) == "skew" or str($test_methods.test_methods_opts) == "nanstd" : | |
50 --sample_one_cols "${test_methods.sample_one_cols}" | |
51 $test_methods.bias | |
52 #elif str($test_methods.test_methods_opts) == "rankdata": | |
53 --sample_one_cols "${test_methods.sample_one_cols}" | |
54 --md "${test_methods.md}" | |
55 #elif str($test_methods.test_methods_opts) == "sem" or str($test_methods.test_methods_opts) == "zscore" or str($test_methods.test_methods_opts) == "signaltonoise": | |
56 --sample_one_cols "${test_methods.sample_one_cols}" | |
57 #if str($test_methods.ddof).strip(): | |
58 --ddof "${test_methods.ddof}" | |
59 #end if | |
60 #elif str($test_methods.test_methods_opts) == "trimboth": | |
61 --sample_one_cols "${test_methods.sample_one_cols}" | |
62 #if str($test_methods.proportiontocut).strip(): | |
63 --proportiontocut "${test_methods.proportiontocut}" | |
64 #end if | |
65 #elif str($test_methods.test_methods_opts) == "trim1": | |
66 --sample_one_cols "${test_methods.sample_one_cols}" | |
67 #if str($test_methods.proportiontocut).strip(): | |
68 --proportiontocut "${test_methods.proportiontocut}" | |
69 #end if | |
70 --tail "${test_methods.tail}" | |
71 #elif str($test_methods.test_methods_opts) == "boxcox": | |
72 --sample_one_cols "${test_methods.sample_one_cols}" | |
73 --alpha "${test_methods.alpha}" | |
74 #if str($test_methods.imbda).strip(): | |
75 --imbda "${test_methods.imbda}" | |
76 #end if | |
77 #elif str($test_methods.test_methods_opts) == "boxcox_llf": | |
78 --sample_one_cols "${test_methods.sample_one_cols}" | |
79 --imbda "${test_methods.imbda}" | |
80 #elif str($test_methods.test_methods_opts) == "kstest": | |
81 --sample_one_cols "${test_methods.sample_one_cols}" | |
82 #if str($test_methods.ni).strip(): | |
83 --ni "${test_methods.ni}" | |
84 #end if | |
85 --cdf '${test_methods.cdf}' | |
86 --alternative '${test_methods.alternative}' | |
87 --mode '${test_methods.mode}' | |
88 | |
89 #elif str($test_methods.test_methods_opts) == "boxcox_normmax": | |
90 --sample_one_cols '${test_methods.sample_one_cols}' | |
91 #if str($test_methods.mf).strip(): | |
92 --mf '${test_methods.mf}' | |
93 #end if | |
94 #if str($test_methods.nf).strip(): | |
95 --nf '${test_methods.nf}' | |
96 #end if | |
97 --method '${test_methods.method}' | |
98 #elif str($test_methods.test_methods_opts) == "tmean" or str($test_methods.test_methods_opts) == "tvar" or str($test_methods.test_methods_opts) == "tstd" or str($test_methods.test_methods_opts) == "tsem": | |
99 --sample_one_cols "${test_methods.sample_one_cols}" | |
100 #if str($test_methods.mf).strip(): | |
101 --mf '${test_methods.mf}' | |
102 #end if | |
103 #if str($test_methods.nf).strip(): | |
104 --nf '${test_methods.nf}' | |
105 #end if | |
106 $test_methods.inclusive1 | |
107 $test_methods.inclusive2 | |
108 #elif str($test_methods.test_methods_opts) == "tmin": | |
109 --sample_one_cols "${test_methods.sample_one_cols}" | |
110 #if str($test_methods.mf).strip(): | |
111 --mf "${test_methods.mf}" | |
112 #end if | |
113 $test_methods.inclusive | |
114 #elif str($test_methods.test_methods_opts) == "tmax": | |
115 --sample_one_cols "${test_methods.sample_one_cols}" | |
116 #if str($test_methods.nf).strip(): | |
117 --nf "${test_methods.nf}" | |
118 #end if | |
119 $test_methods.inclusive | |
120 #elif str($test_methods.test_methods_opts) == "histogram": | |
121 --sample_one_cols "${test_methods.sample_one_cols}" | |
122 #if str($test_methods.mf).strip(): | |
123 --mf "${test_methods.mf}" | |
124 #end if | |
125 #if str($test_methods.nf).strip(): | |
126 --nf "${test_methods.nf}" | |
127 #end if | |
128 --b "${test_methods.b}" | |
129 $test_methods.printextras | |
130 #elif str($test_methods.test_methods_opts) == "cumfreq": | |
131 --sample_one_cols "${test_methods.sample_one_cols}" | |
132 #if str($test_methods.mf).strip(): | |
133 --mf "${test_methods.mf}" | |
134 #end if | |
135 #if str($test_methods.nf).strip(): | |
136 --nf "${test_methods.nf}" | |
137 #end if | |
138 --b "${test_methods.b}" | |
139 #elif str($test_methods.test_methods_opts) == "threshold": | |
140 --sample_one_cols "${test_methods.sample_one_cols}" | |
141 #if str($test_methods.mf).strip(): | |
142 --mf "${test_methods.mf}" | |
143 #end if | |
144 #if str($test_methods.nf).strip(): | |
145 --nf "${test_methods.nf}" | |
146 #end if | |
147 --new "${test_methods.new}" | |
148 #elif str($test_methods.test_methods_opts) == "relfreq": | |
149 --sample_one_cols "${test_methods.sample_one_cols}" | |
150 #if str($test_methods.mf).strip(): | |
151 --mf "${test_methods.mf}" | |
152 #end if | |
153 #if str($test_methods.nf).strip(): | |
154 --nf "${test_methods.nf}" | |
155 #end if | |
156 --b "${test_methods.b}" | |
157 #elif str($test_methods.test_methods_opts) == "spearmanr": | |
158 --sample_one_cols "${test_methods.sample_one_cols}" | |
159 #if str($test_methods.sample_two_cols).strip(): | |
160 --sample_two_cols "${test_methods.sample_two_cols}" | |
161 #end if | |
162 #elif str($test_methods.test_methods_opts) == "theilslopes": | |
163 --sample_one_cols "${test_methods.sample_one_cols}" | |
164 #if str($test_methods.sample_two_cols).strip(): | |
165 --sample_two_cols "${test_methods.sample_two_cols}" | |
166 #end if | |
167 --alpha "${test_methods.alpha}" | |
168 #elif str($test_methods.test_methods_opts) == "chisquare": | |
169 --sample_one_cols "${test_methods.sample_one_cols}" | |
170 #if str($test_methods.sample_two_cols).strip(): | |
171 --sample_two_cols "${test_methods.sample_two_cols}" | |
172 #end if | |
173 #if str($test_methods.ddof).strip(): | |
174 --ddof "${test_methods.ddof}" | |
175 #end if | |
176 #elif str($test_methods.test_methods_opts) == "power_divergence": | |
177 --sample_one_cols "${test_methods.sample_one_cols}" | |
178 #if str($test_methods.sample_two_cols).strip(): | |
179 --sample_two_cols "${test_methods.sample_two_cols}" | |
180 #end if | |
181 #if str($test_methods.ddof).strip(): | |
182 --ddof "${test_methods.ddof}" | |
183 #end if | |
184 #if str($test_methods.lambda_).strip(): | |
185 --lambda_ "${test_methods.lambda_}" | |
186 #end if | |
187 #elif str($test_methods.test_methods_opts) == "combine_pvalues": | |
188 --sample_one_cols "${test_methods.sample_one_cols}" | |
189 #if str($test_methods.sample_two_cols).strip() and $test_methods.sample_two_cols: | |
190 --sample_two_cols "${test_methods.sample_two_cols}" | |
191 #end if | |
192 --med "${test_methods.med}" | |
193 #elif str($test_methods.test_methods_opts) == "wilcoxon": | |
194 --sample_one_cols "${test_methods.sample_one_cols}" | |
195 #if str($test_methods.sample_two_cols).strip() and $test_methods.sample_two_cols: | |
196 --sample_two_cols "${test_methods.sample_two_cols}" | |
197 #end if | |
198 --zero_method "${test_methods.zero_method}" | |
199 $test_methods.correction | |
200 #elif str($test_methods.test_methods_opts) == "ranksums" or str($test_methods.test_methods_opts) == "ansari" or str($test_methods.test_methods_opts) == "linregress" or str($test_methods.test_methods_opts) == "pearsonr" or str($test_methods.test_methods_opts) == "pointbiserialr" or str($test_methods.test_methods_opts) == "ks_2samp" or str($test_methods.test_methods_opts) == "ttest_1samp" or str($test_methods.test_methods_opts) == "histogram2": | |
201 --sample_one_cols '${test_methods.sample_one_cols}' | |
202 --sample_two_cols '${test_methods.sample_two_cols}' | |
203 #elif str($test_methods.test_methods_opts) == "entropy": | |
204 --sample_one_cols '${test_methods.sample_one_cols}' | |
205 --sample_two_cols v${test_methods.sample_two_cols}' | |
206 --base "${test_methods.base}" | |
207 #elif str($test_methods.test_methods_opts) == "kendalltau": | |
208 --sample_one_cols '${test_methods.sample_one_cols}' | |
209 --sample_two_cols '${test_methods.sample_two_cols}' | |
210 $test_methods.initial_lexsort | |
211 #elif str($test_methods.test_methods_opts) == "kendalltau": | |
212 --sample_one_cols '${test_methods.sample_one_cols}' | |
213 --sample_two_cols '${test_methods.sample_two_cols}' | |
214 $test_methods.initial_lexsort | |
215 #elif str($test_methods.test_methods_opts) == "mannwhitneyu": | |
216 --sample_one_cols '${test_methods.sample_one_cols}' | |
217 --sample_two_cols '${test_methods.sample_two_cols}' | |
218 $test_methods.mwu_use_continuity | |
219 #elif str($test_methods.test_methods_opts) == "ttest_ind": | |
220 --sample_one_cols '${test_methods.sample_one_cols}' | |
221 --sample_two_cols '${test_methods.sample_two_cols}' | |
222 $test_methods.equal_var | |
223 #elif str($test_methods.test_methods_opts) == "ttest_rel": | |
224 --sample_one_cols '${test_methods.sample_one_cols}' | |
225 --sample_two_cols '${test_methods.sample_two_cols}' | |
226 --axis "${test_methods.axis}" | |
227 #elif str($test_methods.test_methods_opts) == "zmap": | |
228 --sample_one_cols '${test_methods.sample_one_cols}' | |
229 --sample_two_cols '${test_methods.sample_two_cols}' | |
230 #if str($test_methods.ddof).strip(): | |
231 --ddof '${test_methods.ddof}' | |
232 #end if | |
233 #elif str($test_methods.test_methods_opts) == "binned_statistic": | |
234 --sample_one_cols '${test_methods.sample_one_cols}' | |
235 --sample_two_cols '${test_methods.sample_two_cols}' | |
236 #if str($test_methods.mf).strip(): | |
237 --mf '${test_methods.mf}' | |
238 #end if | |
239 #if str($test_methods.nf).strip(): | |
240 --nf '${test_methods.nf}' | |
241 #end if | |
242 --statistic '${test_methods.statistic}' | |
243 --b '${test_methods.b}' | |
244 #elif str($test_methods.test_methods_opts) == "scoreatpercentile": | |
245 --sample_one_cols "${test_methods.sample_one_cols}" | |
246 --sample_two_cols "${test_methods.sample_two_cols}" | |
247 #if str($test_methods.mf).strip(): | |
248 --mf '${test_methods.mf}' | |
249 #end if | |
250 #if str($test_methods.nf).strip(): | |
251 --nf '${test_methods.nf}' | |
252 #end if | |
253 --interpolation '${test_methods.interpolation}' | |
254 #elif str($test_methods.test_methods_opts) == "mood": | |
255 --axis "${test_methods.axis}" | |
256 --sample_one_cols "${test_methods.sample_one_cols}" | |
257 --sample_two_cols "${test_methods.sample_two_cols}" | |
258 #elif str($test_methods.test_methods_opts) == "shapiro": | |
259 --sample_one_cols "${test_methods.sample_one_cols}" | |
260 #elif str($test_methods.test_methods_opts) == "bartlett" or str($test_methods.test_methods_opts) == "f_oneway" or str($test_methods.test_methods_opts) == "kruskal" or str($test_methods.test_methods_opts) == "friedmanchisquare" or str($test_methods.test_methods_opts) == "obrientransform": | |
261 --sample_cols "#echo ';'.join( [str($list.sample_cols) for $list in $test_methods.samples] )#" | |
262 #elif str($test_methods.test_methods_opts) == "levene": | |
263 --sample_cols "#echo ';'.join( [str($list.sample_cols) for $list in $test_methods.samples] )#" | |
264 --center "${test_methods.center}" | |
265 #if str($test_methods.proportiontocut).strip(): | |
266 --proportiontocut "${test_methods.proportiontocut}" | |
267 #end if | |
268 #elif str($test_methods.test_methods_opts) == "fligner": | |
269 --sample_cols "#echo ';'.join( [str($list.sample_cols) for $list in $test_methods.samples] )#" | |
270 --center "${test_methods.center}" | |
271 #if str($test_methods.proportiontocut).strip(): | |
272 --proportiontocut "${test_methods.proportiontocut}" | |
273 #end if | |
274 #elif str($test_methods.test_methods_opts) == "median_test": | |
275 --sample_cols "#echo ';'.join( [str($list.sample_cols) for $list in $test_methods.samples] )#" | |
276 $test_methods.correction | |
277 #if str($test_methods.lambda_).strip(): | |
278 --lambda_ "${test_methods.lambda_}" | |
279 #end if | |
280 --ties '${test_methods.ties}' | |
281 #end if | |
282 ]]></command> | |
283 <inputs> | |
284 <param name="infile" type="data" format="tabular" label="Sample file" help="tabular file containing the observations"/> | |
285 <conditional name="test_methods"> | |
286 <param name="test_methods_opts" type="select" label="Select a statistical test method"> | |
287 <option value="describe">Computes several descriptive statistics of the passed array</option> | |
288 <option value="gmean">Compute the geometric mean along the specified axis</option> | |
289 <option value="hmean">Calculates the harmonic mean along the specified axis</option> | |
290 <option value="kurtosis">Computes the kurtosis (Fisher or Pearson) of a dataset</option> | |
291 <option value="kurtosistest">Tests whether a dataset has normal kurtosis</option> | |
292 <option value="mode">show the most common value in the passed array</option> | |
293 <option value="moment">Calculates the nth moment about the mean for a sample</option> | |
294 <option value="normaltest">Tests whether a sample differs from a normal distribution</option> | |
295 <option value="skew">Computes the skewness of a data set.</option> | |
296 <option value="skewtest">Tests whether the skew is different from the normal distribution.</option> | |
297 <option value="tmean">Compute the trimmed mean</option> | |
298 <option value="tvar">Compute the trimmed variance</option> | |
299 <option value="tmin">Compute the trimmed minimum</option> | |
300 <option value="tmax">Compute the trimmed maximum</option> | |
301 <option value="tstd">Compute the trimmed sample standard deviation</option> | |
302 <option value="tsem">Compute the trimmed standard error of the mean</option> | |
303 <option value="nanmean">Compute the mean ignoring nans</option> | |
304 <option value="nanstd">Compute the standard deviation ignoring nans</option> | |
305 <option value="nanmedian">Compute the median ignoring nan values.</option> | |
306 <option value="variation">Computes the coefficient of variation, the ratio of the biased standard deviation to the mean.</option> | |
307 <option value="cumfreq">Returns a cumulative frequency histogram, using the histogram function</option> | |
308 <option value="histogram2">Compute histogram using divisions in bins</option> | |
309 <option value="histogram">Separates the range into several bins</option> | |
310 <option value="itemfreq">Compute frequencies for each number</option> | |
311 <option value="percentileofscore">The percentile rank of a score relative to a list of scores</option> | |
312 <option value="scoreatpercentile">Calculate the score at a given percentile of the input sequence</option> | |
313 <option value="relfreq">Returns a relative frequency histogram, using the histogram function</option> | |
314 <option value="binned_statistic">Compute a binned statistic for a set of data</option> | |
315 <option value="obrientransform">Computes the O’Brien transform on input data</option> | |
316 <option value="signaltonoise">The signal-to-noise ratio of the input data</option> | |
317 <option value="bayes_mvs">Bayesian confidence intervals for the mean, var, and std</option> | |
318 <option value="sem">Calculates the standard error of the mean of the value</option> | |
319 <option value="zmap">Calculates the relative z-scores</option> | |
320 <option value="zscore">Calculates the z score of each value in the sample, relative to the sample mean and standard deviation</option> | |
321 <option value="sigmaclip">Iterative sigma-clipping of array elements</option> | |
322 <option value="threshold">Clip array to a given value</option> | |
323 <option value="trimboth">Slices off a proportion of items from both ends of an array</option> | |
324 <option value="trim1">Slices off a proportion of items from ONE end of the passed array distribution</option> | |
325 <option value="f_oneway">Performs a 1-way ANOVA</option> | |
326 <option value="pearsonr">Calculates a Pearson correlation coefficient and the p-value for testing non-correlation.</option> | |
327 <option value="spearmanr">Calculates a Spearman rank-order correlation coefficient and the p-value to test for non-correlation</option> | |
328 <option value="pointbiserialr">Calculates a point biserial correlation coefficient and the associated p-value</option> | |
329 <option value="kendalltau">Calculates Kendall’s tau, a correlation measure for ordinal data</option> | |
330 <option value="linregress">This computes a least-squares regression for two sets of measurements</option> | |
331 <option value="theilslopes">Computes the Theil-Sen estimator for a set of points (x, y)</option> | |
332 <option value="ttest_1samp">Calculates the T-test for the mean of ONE group of scores</option> | |
333 <option value="ttest_ind">T-test for the means of TWO INDEPENDENT samples of scores</option> | |
334 <option value="ttest_rel">T-test for the means of TWO RELATED samples of scores</option> | |
335 <option value="kstest">Perform the Kolmogorov-Smirnov test for goodness of fit.</option> | |
336 <option value="chisquare">Calculates a one-way chi square test</option> | |
337 <option value="power_divergence">Cressie-Read power divergence statistic and goodness of fit test</option> | |
338 <option value="ks_2samp">Computes the Kolmogorov-Smirnov statistic on 2 samples</option> | |
339 <option value="mannwhitneyu">Computes the Mann-Whitney rank test on samples x and y</option> | |
340 <option value="tiecorrect">Tie correction factor for ties in the Mann-Whitney U and Kruskal-Wallis H tests</option> | |
341 <option value="rankdata">Assign ranks to data, dealing with ties appropriately</option> | |
342 <option value="ranksums">Compute the Wilcoxon rank-sum statistic for two samples</option> | |
343 <option value="wilcoxon">Calculate the Wilcoxon signed-rank test</option> | |
344 <option value="kruskal">Compute the Kruskal-Wallis H-test for independent samples</option> | |
345 <option value="friedmanchisquare">Computes the Friedman test for repeated measurements</option> | |
346 <option value="combine_pvalues">Methods for combining the p-values of independent tests bearing upon the same hypothesis</option> | |
347 <option value="ansari">Perform the Ansari-Bradley test for equal scale parameters</option> | |
348 <option value="bartlett">Perform Bartlett’s test for equal variances</option> | |
349 <option value="levene">Perform Levene test for equal variances.</option> | |
350 <option value="shapiro">Perform the Shapiro-Wilk test for normality</option> | |
351 <option value="anderson">Anderson-Darling test for data coming from a particular distribution</option> | |
352 <option value="binom_test">Perform a test that the probability of success is p</option> | |
353 <option value="fligner">Perform Fligner’s test for equal variances</option> | |
354 <option value="median_test">Mood’s median test</option> | |
355 <option value="mood">Perform Mood’s test for equal scale parameters</option> | |
356 <option value="boxcox">Return a positive dataset transformed by a Box-Cox power transformation</option> | |
357 <option value="boxcox_normmax">Compute optimal Box-Cox transform parameter for input data</option> | |
358 <option value="boxcox_llf">The boxcox log-likelihood function</option> | |
359 <option value="entropy">Calculate the entropy of a distribution for given probability values</option> | |
360 <option value="chi2_contingency">Chi-square test of independence of variables in a contingency table</option> | |
361 </param> | |
362 <when value="itemfreq"> | |
363 <expand macro="macro_sample_one_cols"/> | |
364 </when> | |
365 <when value="sem"> | |
366 <expand macro="macro_sample_one_cols"/> | |
367 <expand macro="macro_ddof"/> | |
368 </when> | |
369 <when value="zscore"> | |
370 <expand macro="macro_sample_one_cols"/> | |
371 <expand macro="macro_ddof"/> | |
372 </when> | |
373 <when value="relfreq"> | |
374 <expand macro="macro_sample_one_cols"/> | |
375 <expand macro="macro_mf"/> | |
376 <expand macro="macro_nf"/> | |
377 <expand macro="macro_b"/> | |
378 </when> | |
379 <when value="signaltonoise"> | |
380 <expand macro="macro_sample_one_cols"/> | |
381 <expand macro="macro_ddof"/> | |
382 </when> | |
383 <when value="bayes_mvs"> | |
384 <expand macro="macro_sample_one_cols"/> | |
385 <expand macro="macro_alpha"/> | |
386 </when> | |
387 <when value="threshold"> | |
388 <expand macro="macro_sample_one_cols"/> | |
389 <expand macro="macro_mf"/> | |
390 <expand macro="macro_nf"/> | |
391 <expand macro="macro_new"/> | |
392 </when> | |
393 <when value="trimboth"> | |
394 <expand macro="macro_sample_one_cols"/> | |
395 <expand macro="macro_proportiontocut"/> | |
396 </when> | |
397 <when value="trim1"> | |
398 <expand macro="macro_sample_one_cols"/> | |
399 <expand macro="macro_proportiontocut"/> | |
400 <expand macro="macro_tail"/> | |
401 </when> | |
402 <when value="percentileofscore"> | |
403 <expand macro="macro_sample_one_cols"/> | |
404 <expand macro="macro_score"/> | |
405 <expand macro="macro_kind"/> | |
406 </when> | |
407 <when value="normaltest"> | |
408 <expand macro="macro_sample_one_cols"/> | |
409 </when> | |
410 <when value="kurtosistest"> | |
411 <expand macro="macro_sample_one_cols"/> | |
412 </when> | |
413 <when value="describe"> | |
414 <expand macro="macro_sample_one_cols"/> | |
415 </when> | |
416 <when value="mode"> | |
417 <expand macro="macro_sample_one_cols"/> | |
418 </when> | |
419 <when value="normaltest"> | |
420 <expand macro="macro_sample_one_cols"/> | |
421 </when> | |
422 <when value="kurtosistest"> | |
423 <expand macro="macro_sample_one_cols"/> | |
424 </when> | |
425 <when value="skewtest"> | |
426 <expand macro="macro_sample_one_cols"/> | |
427 </when> | |
428 <when value="nanmean"> | |
429 <expand macro="macro_sample_one_cols"/> | |
430 </when> | |
431 <when value="nanmedian"> | |
432 <expand macro="macro_sample_one_cols"/> | |
433 </when> | |
434 <when value="variation"> | |
435 <expand macro="macro_sample_one_cols"/> | |
436 </when> | |
437 <when value="tiecorrect"> | |
438 <expand macro="macro_sample_one_cols"/> | |
439 </when> | |
440 <when value="gmean"> | |
441 <expand macro="macro_sample_one_cols"/> | |
442 <expand macro="macro_dtype"/> | |
443 </when> | |
444 <when value="hmean"> | |
445 <expand macro="macro_sample_one_cols"/> | |
446 <expand macro="macro_dtype"/> | |
447 </when> | |
448 <when value="sigmaclip"> | |
449 <expand macro="macro_sample_one_cols"/> | |
450 <expand macro="macro_m"/> | |
451 <expand macro="macro_n_in"/> | |
452 </when> | |
453 <when value="kurtosis"> | |
454 <expand macro="macro_sample_one_cols"/> | |
455 <expand macro="macro_axis"/> | |
456 <expand macro="macro_fisher"/> | |
457 <expand macro="macro_bias"/> | |
458 </when> | |
459 <when value="chi2_contingency"> | |
460 <expand macro="macro_sample_one_cols"/> | |
461 <expand macro="macro_correction"/> | |
462 <expand macro="macro_lambda_"/> | |
463 </when> | |
464 <when value="binom_test"> | |
465 <expand macro="macro_sample_one_cols"/> | |
466 <expand macro="macro_n_in"/> | |
467 <expand macro="macro_p"/> | |
468 </when> | |
469 <when value="moment"> | |
470 <expand macro="macro_sample_one_cols"/> | |
471 <expand macro="macro_n_moment"/> | |
472 </when> | |
473 <when value="skew"> | |
474 <expand macro="macro_sample_one_cols"/> | |
475 <expand macro="macro_bias"/> | |
476 </when> | |
477 <when value="tmean"> | |
478 <expand macro="macro_sample_one_cols"/> | |
479 <expand macro="macro_mf"/> | |
480 <expand macro="macro_nf"/> | |
481 <expand macro="macro_inclusive1"/> | |
482 <expand macro="macro_inclusive2"/> | |
483 </when> | |
484 <when value="tmin"> | |
485 <expand macro="macro_sample_one_cols"/> | |
486 <expand macro="macro_mf"/> | |
487 <expand macro="macro_inclusive"/> | |
488 </when> | |
489 <when value="tmax"> | |
490 <expand macro="macro_sample_one_cols"/> | |
491 <expand macro="macro_nf"/> | |
492 <expand macro="macro_inclusive"/> | |
493 </when> | |
494 <when value="tvar"> | |
495 <expand macro="macro_sample_one_cols"/> | |
496 <expand macro="macro_mf"/> | |
497 <expand macro="macro_nf"/> | |
498 <expand macro="macro_inclusive1"/> | |
499 <expand macro="macro_inclusive2"/> | |
500 </when> | |
501 <when value="tstd"> | |
502 <expand macro="macro_sample_one_cols"/> | |
503 <expand macro="macro_mf"/> | |
504 <expand macro="macro_nf"/> | |
505 <expand macro="macro_inclusive1"/> | |
506 <expand macro="macro_inclusive2"/> | |
507 </when> | |
508 <when value="tsem"> | |
509 <expand macro="macro_sample_one_cols"/> | |
510 <expand macro="macro_mf"/> | |
511 <expand macro="macro_nf"/> | |
512 <expand macro="macro_inclusive1"/> | |
513 <expand macro="macro_inclusive2"/> | |
514 </when> | |
515 <when value="nanstd"> | |
516 <expand macro="macro_sample_one_cols"/> | |
517 <expand macro="macro_bias"/> | |
518 </when> | |
519 <when value="histogram"> | |
520 <expand macro="macro_sample_one_cols"/> | |
521 <expand macro="macro_mf"/> | |
522 <expand macro="macro_nf"/> | |
523 <expand macro="macro_b"/> | |
524 <expand macro="macro_printextras"/> | |
525 | |
526 </when> | |
527 <when value="cumfreq"> | |
528 <expand macro="macro_sample_one_cols"/> | |
529 <expand macro="macro_mf"/> | |
530 <expand macro="macro_nf"/> | |
531 <expand macro="macro_b"/> | |
532 </when> | |
533 <when value="boxcox"> | |
534 <expand macro="macro_sample_one_cols"/> | |
535 <expand macro="macro_imbda"/> | |
536 <expand macro="macro_alpha"/> | |
537 </when> | |
538 <when value="boxcox_llf"> | |
539 <expand macro="macro_sample_one_cols"/> | |
540 <expand macro="macro_imbda"/> | |
541 </when> | |
542 <when value="boxcox_normmax"> | |
543 <expand macro="macro_sample_one_cols"/> | |
544 <expand macro="macro_mf"/> | |
545 <expand macro="macro_nf"/> | |
546 <expand macro="macro_method"/> | |
547 </when> | |
548 <when value="anderson"> | |
549 <expand macro="macro_sample_one_cols"/> | |
550 <expand macro="macro_dist"/> | |
551 </when> | |
552 <when value="rankdata"> | |
553 <expand macro="macro_sample_one_cols"/> | |
554 <expand macro="macro_md"/> | |
555 </when> | |
556 <when value="kstest"> | |
557 <expand macro="macro_sample_one_cols"/> | |
558 <expand macro="macro_cdf"/> | |
559 <expand macro="macro_ni"/> | |
560 <expand macro="macro_alternative"/> | |
561 <expand macro="macro_mode"/> | |
562 </when> | |
563 | |
564 <when value="spearmanr"> | |
565 <expand macro="macro_sample_one_cols"/> | |
566 <expand macro="macro_sample_two_cols"/> | |
567 </when> | |
568 <when value="ranksums"> | |
569 <expand macro="macro_sample_one_cols"/> | |
570 <expand macro="macro_sample_two_cols"/> | |
571 </when> | |
572 <when value="ansari"> | |
573 <expand macro="macro_sample_one_cols"/> | |
574 <expand macro="macro_sample_two_cols"/> | |
575 </when> | |
576 <when value="linregress"> | |
577 <expand macro="macro_sample_one_cols"/> | |
578 <expand macro="macro_sample_two_cols"/> | |
579 </when> | |
580 <when value="histogram2"> | |
581 <expand macro="macro_sample_one_cols"/> | |
582 <expand macro="macro_sample_two_cols"/> | |
583 </when> | |
584 <when value="pearsonr"> | |
585 <expand macro="macro_sample_one_cols"/> | |
586 <expand macro="macro_sample_two_cols"/> | |
587 </when> | |
588 <when value="pointbiserialr"> | |
589 <expand macro="macro_sample_one_cols"/> | |
590 <expand macro="macro_sample_two_cols"/> | |
591 </when> | |
592 <when value="ttest_1samp"> | |
593 <expand macro="macro_sample_one_cols"/> | |
594 <expand macro="macro_sample_two_cols"/> | |
595 </when> | |
596 <when value="ks_2samp"> | |
597 <expand macro="macro_sample_one_cols"/> | |
598 <expand macro="macro_sample_two_cols"/> | |
599 </when> | |
600 <when value="kendalltau"> | |
601 <expand macro="macro_sample_one_cols"/> | |
602 <expand macro="macro_sample_two_cols"/> | |
603 <expand macro="macro_initial_lexsort"/> | |
604 | |
605 </when> | |
606 <when value="mannwhitneyu"> | |
607 <expand macro="macro_sample_one_cols"/> | |
608 <expand macro="macro_sample_two_cols"/> | |
609 <expand macro="macro_mwu_use_continuity"/> | |
610 </when> | |
611 <when value="ttest_ind"> | |
612 <expand macro="macro_sample_one_cols"/> | |
613 <expand macro="macro_sample_two_cols"/> | |
614 <expand macro="macro_equal_var"/> | |
615 </when> | |
616 <when value="ttest_rel"> | |
617 <expand macro="macro_sample_one_cols"/> | |
618 <expand macro="macro_sample_two_cols"/> | |
619 <expand macro="macro_axis"/> | |
620 </when> | |
621 <when value="entropy"> | |
622 <expand macro="macro_sample_one_cols"/> | |
623 <expand macro="macro_sample_two_cols"/> | |
624 <expand macro="macro_base"/> | |
625 </when> | |
626 <when value="theilslopes"> | |
627 <expand macro="macro_sample_one_cols"/> | |
628 <expand macro="macro_sample_two_cols"/> | |
629 <expand macro="macro_alpha"/> | |
630 </when> | |
631 <when value="zmap"> | |
632 <expand macro="macro_sample_one_cols"/> | |
633 <expand macro="macro_sample_two_cols"/> | |
634 <expand macro="macro_ddof"/> | |
635 </when> | |
636 <when value="chisquare"> | |
637 <expand macro="macro_sample_one_cols"/> | |
638 <expand macro="macro_sample_two_cols"/> | |
639 <expand macro="macro_ddof"/> | |
640 </when> | |
641 <when value="power_divergence"> | |
642 <expand macro="macro_sample_one_cols"/> | |
643 <expand macro="macro_sample_two_cols"/> | |
644 <expand macro="macro_lambda_"/> | |
645 <expand macro="macro_ddof"/> | |
646 </when> | |
647 <when value="combine_pvalues"> | |
648 <expand macro="macro_sample_one_cols"/> | |
649 <expand macro="macro_sample_two_cols"/> | |
650 <expand macro="macro_med"/> | |
651 </when> | |
652 <when value="mood"> | |
653 <expand macro="macro_sample_one_cols"/> | |
654 <expand macro="macro_sample_two_cols"/> | |
655 <expand macro="macro_axis"/> | |
656 </when> | |
657 <when value="shapiro"> | |
658 <expand macro="macro_sample_one_cols"/> | |
659 </when> | |
660 <when value="wilcoxon"> | |
661 <expand macro="macro_sample_one_cols"/> | |
662 <expand macro="macro_sample_two_cols"/> | |
663 <expand macro="macro_zero_method"/> | |
664 <expand macro="macro_correction"/> | |
665 </when> | |
666 <when value="scoreatpercentile"> | |
667 <expand macro="macro_sample_one_cols"/> | |
668 <expand macro="macro_sample_two_cols"/> | |
669 <expand macro="macro_mf"/> | |
670 <expand macro="macro_nf"/> | |
671 <expand macro="macro_interpolation"/> | |
672 </when> | |
673 <when value="binned_statistic"> | |
674 <expand macro="macro_sample_one_cols"/> | |
675 <expand macro="macro_sample_two_cols"/> | |
676 <expand macro="macro_mf"/> | |
677 <expand macro="macro_nf"/> | |
678 <expand macro="macro_b"/> | |
679 <expand macro="macro_statistic"/> | |
680 </when> | |
681 <when value="fligner"> | |
682 <expand macro="macro_proportiontocut"/> | |
683 <expand macro="macro_center"/> | |
684 <expand macro="macro_sample_cols_min2"/> | |
685 </when> | |
686 <when value="f_oneway"> | |
687 <expand macro="macro_sample_cols_min2"/> | |
688 </when> | |
689 <when value="kruskal"> | |
690 <expand macro="macro_sample_cols_min2"/> | |
691 </when> | |
692 <when value="friedmanchisquare"> | |
693 <expand macro="macro_sample_cols_min3"/> | |
694 </when> | |
695 <when value="bartlett"> | |
696 <expand macro="macro_sample_cols_min2"/> | |
697 </when> | |
698 <when value="levene"> | |
699 <expand macro="macro_proportiontocut"/> | |
700 <expand macro="macro_center"/> | |
701 <expand macro="macro_sample_cols_min2"/> | |
702 </when> | |
703 <when value="obrientransform"> | |
704 <expand macro="macro_sample_cols_min2"/> | |
705 </when> | |
706 <when value="median_test"> | |
707 <expand macro="macro_ties"/> | |
708 <expand macro="macro_correction"/> | |
709 <expand macro="macro_lambda_"/> | |
710 <expand macro="macro_sample_cols_min2"/> | |
711 </when> | |
712 </conditional> | |
713 </inputs> | |
714 <outputs> | |
715 <data format="tabular" name="outfile" label="${tool.name} on ${on_string}" /> | |
716 </outputs> | |
717 <tests> | |
718 <!-- Test 01 --> | |
719 <test> | |
720 <param name="infile" value="input.tabular"/> | |
721 <output name="outfile" file="boxcox_normmax2.tabular"/> | |
722 <param name="sample_one_cols" value="1,2,3,4"/> | |
723 <param name="test_methods_opts" value="boxcox_normmax"/> | |
724 <param name="method" value="pearsonr"/> | |
725 <param name="mf" value="-2.0"/> | |
726 <param name="nf" value="2.0"/> | |
727 </test> | |
728 <test> | |
729 <param name="infile" value="input.tabular"/> | |
730 <output name="outfile" file="normaltest.tabular" lines_diff="4"/> | |
731 <param name="sample_one_cols" value="1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24"/> | |
732 <param name="test_methods_opts" value="normaltest"/> | |
733 </test> | |
734 <test> | |
735 <param name="infile" value="input.tabular"/> | |
736 <output name="outfile" file="tmin.tabular"/> | |
737 <param name="sample_one_cols" value="1,2,3,4,5,6"/> | |
738 <param name="test_methods_opts" value="tmin"/> | |
739 <param name="mf" value="10.0"/> | |
740 <param name="inclusive" value="True"/> | |
741 </test> | |
742 <test> | |
743 <param name="infile" value="input.tabular"/> | |
744 <output name="outfile" file="shapiro2.tabular"/> | |
745 <param name="sample_one_cols" value="1,2,3,4,8,9"/> | |
746 <param name="test_methods_opts" value="shapiro"/> | |
747 </test> | |
748 <!-- Test 05 --> | |
749 <test> | |
750 <param name="infile" value="input.tabular"/> | |
751 <output name="outfile" file="obrientransform.tabular"/> | |
752 <repeat name="samples"> | |
753 <param name="sample_cols" value="1,2,3,4"/> | |
754 </repeat> | |
755 <repeat name="samples"> | |
756 <param name="sample_cols" value="5,6,7,8"/> | |
757 </repeat> | |
758 <param name="test_methods_opts" value="obrientransform"/> | |
759 </test> | |
760 <test> | |
761 <param name="infile" value="input.tabular"/> | |
762 <output name="outfile" file="median_test_result1.tabular"/> | |
763 <repeat name="samples"> | |
764 <param name="sample_cols" value="1,2,3,4"/> | |
765 </repeat> | |
766 <repeat name="samples"> | |
767 <param name="sample_cols" value="5,6,7,8"/> | |
768 </repeat> | |
769 <repeat name="samples"> | |
770 <param name="sample_cols" value="9,10,11,12"/> | |
771 </repeat> | |
772 <param name="test_methods_opts" value="median_test"/> | |
773 <param name="ties" value="above"/> | |
774 <param name="correction" value="True"/> | |
775 <param name="lambda_" value="1"/> | |
776 </test> | |
777 <test> | |
778 <param name="infile" value="input.tabular"/> | |
779 <output name="outfile" file="wilcoxon_result1.tabular"/> | |
780 <param name="sample_one_cols" value="1,2,3,4,5,6,7,8,9,10"/> | |
781 <param name="sample_two_cols" value="11,12,13,14,15,16,17,18,19,20"/> | |
782 <param name="test_methods_opts" value="wilcoxon"/> | |
783 <param name="zero_method" value="pratt"/> | |
784 <param name="correction" value="False"/> | |
785 </test> | |
786 <test> | |
787 <param name="infile" value="input.tabular"/> | |
788 <output name="outfile" file="percentileofscore1.tabular"/> | |
789 <param name="sample_one_cols" value="1,2,3,4"/> | |
790 <param name="sample_two_cols" value="5,6,7,8"/> | |
791 <param name="test_methods_opts" value="percentileofscore"/> | |
792 <param name="score" value="1"/> | |
793 <param name="kind" value="rank"/> | |
794 </test> | |
795 <test> | |
796 <param name="infile" value="input.tabular"/> | |
797 <output name="outfile" file="percentileofscore2.tabular"/> | |
798 <param name="sample_one_cols" value="1,2,3,4"/> | |
799 <param name="sample_two_cols" value="5,6,7,8"/> | |
800 <param name="test_methods_opts" value="percentileofscore"/> | |
801 <param name="score" value="2"/> | |
802 <param name="kind" value="mean"/> | |
803 </test> | |
804 <!-- Test 10 --> | |
805 <test> | |
806 <param name="infile" value="input.tabular"/> | |
807 <output name="outfile" file="trim1.tabular"/> | |
808 <param name="sample_one_cols" value="1,2,3,4,5,6"/> | |
809 <param name="test_methods_opts" value="trim1"/> | |
810 <param name="tail" value="left"/> | |
811 <param name="proportiontocut" value="1.0"/> | |
812 </test> | |
813 <test> | |
814 <param name="infile" value="input.tabular"/> | |
815 <output name="outfile" file="scoreatpercentile.tabular"/> | |
816 <param name="sample_one_cols" value="1,2,3,4"/> | |
817 <param name="sample_two_cols" value="11,12,13,14"/> | |
818 <param name="test_methods_opts" value="scoreatpercentile"/> | |
819 <param name="mf" value="5.0"/> | |
820 <param name="nf" value="50.0"/> | |
821 <param name="interpolation" value="lower"/> | |
822 </test> | |
823 <test> | |
824 <param name="infile" value="input.tabular"/> | |
825 <output name="outfile" file="anderson.tabular"/> | |
826 <param name="sample_one_cols" value="1,2,3,4"/> | |
827 <param name="test_methods_opts" value="anderson"/> | |
828 <param name="dist" value="expon"/> | |
829 </test> | |
830 <test> | |
831 <param name="infile" value="input.tabular"/> | |
832 <output name="outfile" file="boxcox_normmax.tabular" lines_diff="14"/> | |
833 <param name="sample_one_cols" value="1,2,3,4"/> | |
834 <param name="test_methods_opts" value="boxcox_normmax"/> | |
835 <param name="method" value="mle"/> | |
836 <param name="mf" value="-3.0"/> | |
837 <param name="nf" value="3.0"/> | |
838 </test> | |
839 <test> | |
840 <param name="infile" value="input.tabular"/> | |
841 <output name="outfile" file="f_oneway.tabular"/> | |
842 <repeat name="samples"> | |
843 <param name="sample_cols" value="1,2,3,4"/> | |
844 </repeat> | |
845 <repeat name="samples"> | |
846 <param name="sample_cols" value="5,6,7,8"/> | |
847 </repeat> | |
848 <param name="test_methods_opts" value="f_oneway"/> | |
849 </test> | |
850 <!-- Test 15 --> | |
851 <test> | |
852 <param name="infile" value="input.tabular"/> | |
853 <output name="outfile" file="shapiro.tabular"/> | |
854 <param name="sample_one_cols" value="1,2,3,4"/> | |
855 <param name="test_methods_opts" value="shapiro"/> | |
856 </test> | |
857 <!-- | |
858 Fail with the following error: | |
859 ValueError: For each axis slice, the sum of the observed frequencies must agree | |
860 with the sum of the expected frequencies to a relative tolerance of 1e-08, but | |
861 the percent differences are: 0.08823529411764706 | |
862 <test> | |
863 <param name="infile" value="input.tabular"/> | |
864 <output name="outfile" file="power_divergence.tabular"/> | |
865 <param name="sample_one_cols" value="1,2,3,4"/> | |
866 <param name="sample_two_cols" value="5,6,7,8"/> | |
867 <param name="test_methods_opts" value="power_divergence"/> | |
868 <param name="ddof" value="1"/> | |
869 <param name="lambda_" value="1"/> | |
870 </test> | |
871 --> | |
872 <test> | |
873 <param name="infile" value="input.tabular"/> | |
874 <output name="outfile" file="itemfreq.tabular"/> | |
875 <param name="sample_one_cols" value="1,2,3,4,5,6,7,8,9,10"/> | |
876 <param name="test_methods_opts" value="itemfreq"/> | |
877 </test> | |
878 <test> | |
879 <param name="infile" value="input.tabular"/> | |
880 <output name="outfile" file="trimboth.tabular"/> | |
881 <param name="sample_one_cols" value="1,2,3,4,5,6,7,8,9,10"/> | |
882 <param name="proportiontocut" value="0"/> | |
883 <param name="test_methods_opts" value="trimboth"/> | |
884 </test> | |
885 <test> | |
886 <param name="infile" value="input.tabular"/> | |
887 <output name="outfile" file="tmean.tabular"/> | |
888 <param name="sample_one_cols" value="1,2,3,4,5,6"/> | |
889 <param name="test_methods_opts" value="tmean"/> | |
890 <param name="mf" value="0"/> | |
891 <param name="nf" value="50"/> | |
892 <param name="inclusive1" value="True"/> | |
893 <param name="inclusive2" value="True"/> | |
894 </test> | |
895 <!-- Test 20 --> | |
896 <test> | |
897 <param name="infile" value="input.tabular"/> | |
898 <output name="outfile" file="tvar.tabular"/> | |
899 <param name="sample_one_cols" value="1,2,3,4,5,6"/> | |
900 <param name="test_methods_opts" value="tvar"/> | |
901 <param name="mf" value="0"/> | |
902 <param name="nf" value="50"/> | |
903 <param name="inclusive1" value="True"/> | |
904 <param name="inclusive2" value="True"/> | |
905 </test> | |
906 </tests> | |
907 <help> | |
908 | |
909 .. class:: warningmark | |
910 | |
911 | |
912 Computes a large number of probability distributions as well as a statistical functions of any kind. | |
913 For more informations have a look at the `SciPy site`_. | |
914 | |
915 .. _`SciPy site`: http://docs.scipy.org/doc/scipy/reference/stats.html | |
916 | |
917 | |
918 ----- | |
919 | |
920 ======== | |
921 Describe | |
922 ======== | |
923 | |
924 Computes several descriptive statistics for samples x | |
925 | |
926 ----- | |
927 | |
928 **The output are:** | |
929 | |
930 size of the data : int | |
931 | |
932 length of data along axis | |
933 | |
934 (min, max): tuple of ndarrays or floats | |
935 | |
936 minimum and maximum value of data array | |
937 | |
938 arithmetic mean : ndarray or float | |
939 | |
940 mean of data along axis | |
941 | |
942 unbiased variance : ndarray or float | |
943 | |
944 variance of the data along axis, denominator is number of observations minus one. | |
945 | |
946 biased skewness : ndarray or float | |
947 | |
948 skewness, based on moment calculations with denominator equal to the number of observations, i.e. no degrees of freedom correction | |
949 | |
950 biased kurtosis : ndarray or float | |
951 | |
952 kurtosis (Fisher), the kurtosis is normalized so that it is zero for the normal distribution. No degrees of freedom or bias correction is used. | |
953 | |
954 **example**: | |
955 | |
956 describe([4,417,8,3]) the result is (4,(3.0, 417.0),108.0,42440.6666667 ,1.15432044278, -0.666961688151) | |
957 | |
958 | |
959 ===== | |
960 Gmean | |
961 ===== | |
962 | |
963 Compute the geometric mean along the specified axis. | |
964 | |
965 Returns the geometric average of the array elements. That is: n-th root of (x1 * x2 * ... * xn) | |
966 | |
967 ----- | |
968 | |
969 **The output are:** | |
970 | |
971 gmean : ndarray | |
972 | |
973 see dtype parameter above | |
974 | |
975 **example**: | |
976 | |
977 stats.gmean([4,17,8,3],dtype='float64') the result is (6.35594365562) | |
978 | |
979 ===== | |
980 Hmean | |
981 ===== | |
982 | |
983 py.stats.hmean(a, axis=0, dtype=None)[source] | |
984 Calculates the harmonic mean along the specified axis. | |
985 | |
986 That is: n / (1/x1 + 1/x2 + ... + 1/xn) | |
987 | |
988 **The output are:** | |
989 | |
990 hmean : ndarray | |
991 | |
992 see dtype parameter above | |
993 | |
994 | |
995 **example**: | |
996 | |
997 stats.hmean([4,17,8,3],dtype='float64')the result is (5.21405750799) | |
998 | |
999 ======== | |
1000 Kurtosis | |
1001 ======== | |
1002 | |
1003 Computes the kurtosis (Fisher or Pearson) of a dataset. | |
1004 | |
1005 Kurtosis is the fourth central moment divided by the square of the variance. If Fisher’s definition is used, then 3.0 is subtracted from the result to give 0.0 for a normal distribution. | |
1006 | |
1007 If bias is False then the kurtosis is calculated using k statistics to eliminate bias coming from biased moment estimators | |
1008 | |
1009 ----- | |
1010 | |
1011 Computes the kurtosis for samples x . | |
1012 | |
1013 **The output are:** | |
1014 | |
1015 kurtosis : array | |
1016 | |
1017 The kurtosis of values along an axis. If all values are equal, return -3 for Fisher’s definition and 0 for Pearson’s definition. | |
1018 | |
1019 **example**: | |
1020 | |
1021 kurtosis([4,417,8,3],0,true,true) the result is (-0.666961688151) | |
1022 | |
1023 ============= | |
1024 Kurtosis Test | |
1025 ============= | |
1026 | |
1027 Tests whether a dataset has normal kurtosis | |
1028 | |
1029 This function tests the null hypothesis that the kurtosis of the population from which the sample was drawn is that of the normal distribution: kurtosis = 3(n-1)/(n+1). | |
1030 | |
1031 ----- | |
1032 | |
1033 Computes the Z-value and p-value about samples x. | |
1034 | |
1035 kurtosistest only valid for n>=20. | |
1036 | |
1037 **The output are:** | |
1038 | |
1039 z-score : float | |
1040 | |
1041 The computed z-score for this test | |
1042 | |
1043 p-value : float | |
1044 | |
1045 The 2-sided p-value for the hypothesis test | |
1046 | |
1047 | |
1048 **example**: | |
1049 | |
1050 kurtosistest([4,17,8,3,30,45,5,3,4,17,8,3,30,45,5,3,4,17,8,3,30,45,5,3]) the result is (0.29775013081425117, 0.7658938788569033) | |
1051 | |
1052 ==== | |
1053 Mode | |
1054 ==== | |
1055 | |
1056 Returns an array of the modal value in the passed array. | |
1057 | |
1058 If there is more than one such value, only the first is returned. The bin-count for the modal bins is also returned. | |
1059 | |
1060 ----- | |
1061 | |
1062 Computes the most common value for samples x . | |
1063 | |
1064 **The output are:** | |
1065 | |
1066 vals : ndarray | |
1067 | |
1068 Array of modal values. | |
1069 | |
1070 counts : ndarray | |
1071 | |
1072 Array of counts for each mode. | |
1073 | |
1074 | |
1075 **example**: | |
1076 | |
1077 mode([4,417,8,3]) the result is ([ 3.], [ 1.]) | |
1078 | |
1079 ====== | |
1080 Moment | |
1081 ====== | |
1082 | |
1083 Calculates the nth moment about the mean for a sample. | |
1084 | |
1085 Generally used to calculate coefficients of skewness and kurtosis. | |
1086 | |
1087 ----- | |
1088 | |
1089 Computes the nth moment about the mean for samples x . | |
1090 | |
1091 **The output are:** | |
1092 | |
1093 n-th central moment : ndarray or float | |
1094 | |
1095 The appropriate moment along the given axis or over all values if axis is None. The denominator for the moment calculation is the number of observations, no degrees of freedom correction is done. | |
1096 | |
1097 | |
1098 **example**: | |
1099 | |
1100 mode([4,417,8,3],moment=2) the result is (31830.5) | |
1101 | |
1102 | |
1103 =========== | |
1104 Normal Test | |
1105 =========== | |
1106 | |
1107 Tests whether a sample differs from a normal distribution. | |
1108 | |
1109 This function tests the null hypothesis that a sample comes from a normal distribution. It is based on D’Agostino and Pearson’s test that combines skew and kurtosis to produce an omnibus test of normality. | |
1110 | |
1111 ----- | |
1112 | |
1113 Computes the k2 and p-value for samples x. | |
1114 | |
1115 skewtest is not valid with less than 8 samples.kurtosistest only valid for n>=20. | |
1116 | |
1117 **The output are:** | |
1118 | |
1119 k2 : float or array | |
1120 | |
1121 s^2 + k^2, where s is the z-score returned by skewtest and k is the z-score returned by kurtosistest. | |
1122 | |
1123 p-value : float or array | |
1124 | |
1125 A 2-sided chi squared probability for the hypothesis test. | |
1126 | |
1127 | |
1128 **example**: | |
1129 | |
1130 normaltest([4,17,8,3,30,45,5,3,4,17,8,3,30,45,5,3,4,17,8,3,30,45,5,3]) the result is (5.8877986151838, 0.052659990380181286) | |
1131 | |
1132 ==== | |
1133 Skew | |
1134 ==== | |
1135 | |
1136 Computes the skewness of a data set. | |
1137 | |
1138 For normally distributed data, the skewness should be about 0. A skewness value > 0 means that there is more weight in the left tail of the distribution. The function skewtest can be used to determine if the skewness value is close enough to 0, statistically speaking. | |
1139 | |
1140 ----- | |
1141 | |
1142 Computes the skewness from samples x. | |
1143 | |
1144 | |
1145 **The output are:** | |
1146 | |
1147 skewness : ndarray | |
1148 | |
1149 The skewness of values along an axis, returning 0 where all values are equal. | |
1150 | |
1151 | |
1152 **example**: | |
1153 | |
1154 kurtosistest([4,417,8,3]) the result is (1.1543204427775307) | |
1155 | |
1156 | |
1157 ========= | |
1158 Skew Test | |
1159 ========= | |
1160 | |
1161 Tests whether the skew is different from the normal distribution. | |
1162 | |
1163 This function tests the null hypothesis that the skewness of the population that the sample was drawn from is the same as that of a corresponding normal distribution. | |
1164 | |
1165 ----- | |
1166 | |
1167 Computes the z-value and p-value from samples x. | |
1168 | |
1169 skewtest is not valid with less than 8 samples | |
1170 | |
1171 **The output are:** | |
1172 | |
1173 z-score : float | |
1174 | |
1175 The computed z-score for this test. | |
1176 | |
1177 p-value : float | |
1178 | |
1179 a 2-sided p-value for the hypothesis test | |
1180 | |
1181 **example**: | |
1182 | |
1183 skewtest([4,17,8,3,30,45,5,3,4,17,8,3,30,45,5,3,4,17,8,3,30,45,5,3]) the result is (2.40814108282,0.0160339834731) | |
1184 | |
1185 ====== | |
1186 tmean | |
1187 ====== | |
1188 | |
1189 Compute the trimmed mean. | |
1190 | |
1191 This function finds the arithmetic mean of given values, ignoring values outside the given limits. | |
1192 | |
1193 ----- | |
1194 | |
1195 Computes the mean of samples x,considering the lower and higher limits. | |
1196 | |
1197 Values in the input array less than the lower limit or greater than the upper limit will be ignored | |
1198 | |
1199 for inclusive,These flags determine whether values exactly equal to the lower or upper limits are included. The default value is (True, True) | |
1200 | |
1201 **The output are:** | |
1202 | |
1203 tmean : float | |
1204 | |
1205 The computed mean for this test. | |
1206 | |
1207 | |
1208 **example**: | |
1209 | |
1210 tmean([4,17,8,3],(0,20),(true,true)) the result is (8.0) | |
1211 | |
1212 ===== | |
1213 tvar | |
1214 ===== | |
1215 | |
1216 Compute the trimmed variance | |
1217 | |
1218 This function computes the sample variance of an array of values, while ignoring values which are outside of given limits | |
1219 | |
1220 ----- | |
1221 | |
1222 Computes the variance of samples x,considering the lower and higher limits. | |
1223 | |
1224 Values in the input array less than the lower limit or greater than the upper limit will be ignored | |
1225 | |
1226 for inclusive,These flags determine whether values exactly equal to the lower or upper limits are included. The default value is (True, True) | |
1227 | |
1228 **The output are:** | |
1229 | |
1230 tvar : float | |
1231 | |
1232 The computed variance for this test. | |
1233 | |
1234 | |
1235 **example**: | |
1236 | |
1237 tvar([4,17,8,3],(0,99999),(true,true)) the result is (40.6666666667) | |
1238 | |
1239 ===== | |
1240 tmin | |
1241 ===== | |
1242 | |
1243 Compute the trimmed minimum. | |
1244 | |
1245 This function finds the arithmetic minimum of given values, ignoring values outside the given limits. | |
1246 | |
1247 ----- | |
1248 | |
1249 Compute the trimmed minimum | |
1250 | |
1251 This function finds the miminum value of an array a along the specified axis, but only considering values greater than a specified lower limit. | |
1252 | |
1253 **The output are:** | |
1254 | |
1255 tmin : float | |
1256 | |
1257 The computed min for this test. | |
1258 | |
1259 | |
1260 **example**: | |
1261 | |
1262 stats.tmin([4,17,8,3],2,0,'true') the result is (3.0) | |
1263 | |
1264 ============ | |
1265 tmax | |
1266 ============ | |
1267 | |
1268 Compute the trimmed maximum. | |
1269 | |
1270 This function finds the arithmetic maximum of given values, ignoring values outside the given limits. | |
1271 | |
1272 This function computes the maximum value of an array along a given axis, while ignoring values larger than a specified upper limit. | |
1273 | |
1274 **The output are:** | |
1275 | |
1276 tmax : float | |
1277 | |
1278 The computed max for this test. | |
1279 | |
1280 | |
1281 **example**: | |
1282 | |
1283 stats.tmax([4,17,8,3],50,0,'true') the result is (17.0) | |
1284 | |
1285 ============ | |
1286 tstd | |
1287 ============ | |
1288 | |
1289 Compute the trimmed sample standard deviation | |
1290 | |
1291 This function finds the sample standard deviation of given values, ignoring values outside the given limits. | |
1292 | |
1293 ----- | |
1294 | |
1295 Computes the deviation of samples x,considering the lower and higher limits. | |
1296 | |
1297 Values in the input array less than the lower limit or greater than the upper limit will be ignored | |
1298 | |
1299 for inclusive,These flags determine whether values exactly equal to the lower or upper limits are included. The default value is (True, True) | |
1300 | |
1301 **The output are:** | |
1302 | |
1303 tstd : float | |
1304 | |
1305 The computed deviation for this test. | |
1306 | |
1307 | |
1308 **example**: | |
1309 | |
1310 tstd([4,17,8,3],(0,99999),(true,true)) the result is (6.37704215657) | |
1311 | |
1312 | |
1313 ============ | |
1314 tsem | |
1315 ============ | |
1316 | |
1317 Compute the trimmed standard error of the mean. | |
1318 | |
1319 This function finds the standard error of the mean for given values, ignoring values outside the given limits. | |
1320 | |
1321 ----- | |
1322 | |
1323 Computes the standard error of mean for samples x,considering the lower and higher limits. | |
1324 | |
1325 Values in the input array less than the lower limit or greater than the upper limit will be ignored | |
1326 | |
1327 for inclusive,These flags determine whether values exactly equal to the lower or upper limits are included. The default value is (True, True) | |
1328 | |
1329 **The output are:** | |
1330 | |
1331 tsem : float | |
1332 | |
1333 The computed the standard error of mean for this test. | |
1334 | |
1335 | |
1336 **example**: | |
1337 | |
1338 tsem([4,17,8,3],(0,99999),(true,true)) the result is (3.18852107828) | |
1339 | |
1340 ======== | |
1341 nanmean | |
1342 ======== | |
1343 | |
1344 Compute the mean over the given axis ignoring nans | |
1345 | |
1346 ----- | |
1347 | |
1348 Computes the mean for samples x without considering nans | |
1349 | |
1350 **The output are:** | |
1351 | |
1352 m : float | |
1353 | |
1354 The computed the mean for this test. | |
1355 | |
1356 | |
1357 **example**: | |
1358 | |
1359 tsem([4,17,8,3]) the result is (8.0) | |
1360 | |
1361 ======= | |
1362 nanstd | |
1363 ======= | |
1364 | |
1365 Compute the standard deviation over the given axis, ignoring nans. | |
1366 | |
1367 ----- | |
1368 | |
1369 Computes the deviation for samples x without considering nans | |
1370 | |
1371 **The output are:** | |
1372 | |
1373 s : float | |
1374 | |
1375 The computed the standard deviation for this test. | |
1376 | |
1377 | |
1378 **example**: | |
1379 | |
1380 nanstd([4,17,8,3],0,'false') the result is (5.52268050859) | |
1381 | |
1382 | |
1383 ============ | |
1384 nanmedian | |
1385 ============ | |
1386 | |
1387 Computes the median for samples x without considering nans | |
1388 | |
1389 **The output are:** | |
1390 | |
1391 m : float | |
1392 | |
1393 The computed the median for this test. | |
1394 | |
1395 | |
1396 **example**: | |
1397 | |
1398 nanmedian([4,17,8,3]) the result is (6.0) | |
1399 | |
1400 | |
1401 ============ | |
1402 variation | |
1403 ============ | |
1404 | |
1405 Computes the coefficient of variation, the ratio of the biased standard deviation to the mean for samples x | |
1406 | |
1407 **The output are:** | |
1408 | |
1409 ratio: float | |
1410 | |
1411 The ratio of the biased standard deviation to the mean for this test. | |
1412 | |
1413 | |
1414 **example**: | |
1415 | |
1416 variation([4,17,8,3]) the result is (0.690335063574) | |
1417 | |
1418 ============ | |
1419 cumfreq | |
1420 ============ | |
1421 | |
1422 Returns a cumulative frequency histogram, using the histogram function. | |
1423 | |
1424 **The output are:** | |
1425 | |
1426 cumfreq : ndarray | |
1427 | |
1428 Binned values of cumulative frequency. | |
1429 | |
1430 lowerreallimit : float | |
1431 | |
1432 Lower real limit | |
1433 | |
1434 binsize : float | |
1435 | |
1436 Width of each bin. | |
1437 | |
1438 extrapoints : int | |
1439 | |
1440 Extra points. | |
1441 | |
1442 | |
1443 **example**: | |
1444 | |
1445 cumfreq([4,17,8,3],defaultreallimits=(2.0,3.5)) the result is ([ 0. 0. 0. 0. 0. 0. 1. 1. 1. 1.],2.0,0.15,3) | |
1446 | |
1447 ========== | |
1448 histogram2 | |
1449 ========== | |
1450 | |
1451 Compute histogram using divisions in bins. | |
1452 | |
1453 Count the number of times values from array a fall into numerical ranges defined by bins. | |
1454 | |
1455 samples should at least have two numbers. | |
1456 | |
1457 **The output are:** | |
1458 | |
1459 histogram2 : ndarray of rank 1 | |
1460 | |
1461 Each value represents the occurrences for a given bin (range) of values. | |
1462 | |
1463 | |
1464 **example**: | |
1465 | |
1466 stats.histogram2([4,17,8,3], [30,45,5,3]) the result is (array([ 0, -2, -2, 4])) | |
1467 | |
1468 ============ | |
1469 histogram | |
1470 ============ | |
1471 | |
1472 Separates the range into several bins and returns the number of instances in each bin | |
1473 | |
1474 **The output are:** | |
1475 | |
1476 histogram : ndarray | |
1477 | |
1478 Number of points (or sum of weights) in each bin. | |
1479 | |
1480 low_range : float | |
1481 | |
1482 Lowest value of histogram, the lower limit of the first bin. | |
1483 | |
1484 binsize : float | |
1485 | |
1486 The size of the bins (all bins have the same size). | |
1487 | |
1488 extrapoints : int | |
1489 | |
1490 The number of points outside the range of the histogram. | |
1491 | |
1492 | |
1493 **example**: | |
1494 | |
1495 histogram([4,17,8,3],defaultlimits=(2.0,3.4)) the result is ([ 0. 0. 0. 0. 0. 0. 0. 1. 0. 0.],2.0,0.14,3) | |
1496 | |
1497 | |
1498 ============ | |
1499 itemfreq | |
1500 ============ | |
1501 | |
1502 Computes the frequencies for numbers | |
1503 | |
1504 **The output are:** | |
1505 | |
1506 temfreq : (K, 2) ndarray | |
1507 A 2-D frequency table. Column 1 contains sorted, unique values from a, column 2 contains their respective counts. | |
1508 | |
1509 | |
1510 **example**: | |
1511 | |
1512 variation([4,17,8,3]) the result is array([[ 3, 1], [ 4, 1],[ 8, 1],[17, 1]]) | |
1513 | |
1514 === | |
1515 Sem | |
1516 === | |
1517 | |
1518 Calculates the standard error of the mean (or standard error of measurement) of the values in the input array. | |
1519 | |
1520 | |
1521 **The output are:** | |
1522 | |
1523 s : ndarray or float | |
1524 The standard error of the mean in the sample(s), along the input axis. | |
1525 | |
1526 | |
1527 **example**: | |
1528 | |
1529 variation([4,17,8,3],ddof=1) the result is(3.18852107828) | |
1530 | |
1531 ===== | |
1532 Z Map | |
1533 ===== | |
1534 | |
1535 Calculates the relative z-scores. | |
1536 | |
1537 Returns an array of z-scores, i.e., scores that are standardized to zero mean and unit variance, where mean and variance are calculated from the comparison array. | |
1538 | |
1539 | |
1540 **The output are:** | |
1541 | |
1542 zscore : array_like | |
1543 | |
1544 Z-scores, in the same shape as scores. | |
1545 | |
1546 **example**: | |
1547 | |
1548 stats.zmap([4,17,8,3],[30,45,5,3],ddof=1)the result is[-0.82496302 -0.18469321 -0.62795692 -0.87421454] | |
1549 | |
1550 ======= | |
1551 Z Score | |
1552 ======= | |
1553 | |
1554 Calculates the z score of each value in the sample, relative to the sample mean and standard deviation | |
1555 | |
1556 | |
1557 **The output are:** | |
1558 | |
1559 zscore : array_like | |
1560 The z-scores, standardized by mean and standard deviation of input array a. | |
1561 | |
1562 | |
1563 **example**: | |
1564 | |
1565 variation([4,17,8,3],ddof=0) the result is ([-0.72428597 1.62964343 0. -0.90535746]) | |
1566 | |
1567 =============== | |
1568 Signal to noise | |
1569 =============== | |
1570 | |
1571 The signal-to-noise ratio of the input data. | |
1572 | |
1573 Returns the signal-to-noise ratio of a, here defined as the mean divided by the standard deviation. | |
1574 | |
1575 | |
1576 **The output are:** | |
1577 | |
1578 s2n : ndarray | |
1579 The mean to standard deviation ratio(s) along axis, or 0 where the standard deviation is 0. | |
1580 | |
1581 | |
1582 **example**: | |
1583 | |
1584 variation([4,17,8,3],ddof=0) the result is (1.44857193668) | |
1585 | |
1586 =================== | |
1587 Percentile of score | |
1588 =================== | |
1589 | |
1590 The percentile rank of a score relative to a list of scores. | |
1591 | |
1592 A percentileofscore of, for example, 80% means that 80% of the scores in a are below the given score. In the case of gaps or ties, the exact definition depends on the optional keyword, kind. | |
1593 | |
1594 **The output are:** | |
1595 | |
1596 pcos : float | |
1597 Percentile-position of score (0-100) relative to a. | |
1598 | |
1599 | |
1600 **example**: | |
1601 | |
1602 percentileofscore([4,17,8,3],score=3,kind='rank') the result is(25.0) | |
1603 | |
1604 =================== | |
1605 Score at percentile | |
1606 =================== | |
1607 | |
1608 Calculate the score at a given percentile of the input sequence. | |
1609 | |
1610 For example, the score at per=50 is the median. If the desired quantile lies between two data points, we interpolate between them, according to the value of interpolation. If the parameter limit is provided, it should be a tuple (lower, upper) of two values. | |
1611 | |
1612 The second simple should be in range [0,100]. | |
1613 | |
1614 **The output are:** | |
1615 | |
1616 score : float or ndarray | |
1617 Score at percentile(s). | |
1618 | |
1619 | |
1620 **example**: | |
1621 | |
1622 stats.scoreatpercentile([4,17,8,3],[8,3],(0,100),'fraction') the result is array([ 3.24, 3.09]) | |
1623 | |
1624 ======= | |
1625 relfreq | |
1626 ======= | |
1627 | |
1628 Returns a relative frequency histogram, using the histogram function | |
1629 | |
1630 numbins are the number of bins to use for the histogram. | |
1631 | |
1632 **The output are:** | |
1633 | |
1634 relfreq : ndarray | |
1635 | |
1636 Binned values of relative frequency. | |
1637 | |
1638 lowerreallimit : float | |
1639 | |
1640 Lower real limit | |
1641 | |
1642 binsize : float | |
1643 | |
1644 Width of each bin. | |
1645 | |
1646 extrapoints : int | |
1647 | |
1648 Extra points. | |
1649 | |
1650 | |
1651 **example**: | |
1652 | |
1653 stats.relfreq([4,17,8,3],10,(0,100)) the result is (array([ 0.75, 0.25, 0.0 , 0.0 , 0.0 , 0.0 , 0.0 , 0.0 , 0.0 , 0.0 ]), 0, 10.0, 0) | |
1654 | |
1655 ================ | |
1656 Binned statistic | |
1657 ================ | |
1658 | |
1659 Compute a binned statistic for a set of data. | |
1660 | |
1661 This is a generalization of a histogram function. A histogram divides the space into bins, and returns the count of the number of points in each bin. This function allows the computation of the sum, mean, median, or other statistic of the values within each bin. | |
1662 | |
1663 Y must be the same shape as X | |
1664 | |
1665 **The output are:** | |
1666 | |
1667 statistic : array | |
1668 | |
1669 The values of the selected statistic in each bin. | |
1670 | |
1671 bin_edges : array of dtype float | |
1672 | |
1673 Return the bin edges (length(statistic)+1). | |
1674 | |
1675 binnumber : 1-D ndarray of ints | |
1676 | |
1677 This assigns to each observation an integer that represents the bin in which this observation falls. Array has the same length as values. | |
1678 | |
1679 | |
1680 **example**: | |
1681 | |
1682 stats.binned_statistic([4,17,8,3],[30,45,5,3],'sum',10,(0,100)) the result is ([ 38. 45. 0. 0. 0. 0. 0. 0. 0. 0.],[ 0. 10. 20. 30. 40. 50. 60. 70. 80. 90. 100.],[1 2 1 1]) | |
1683 | |
1684 ================ | |
1685 obrientransform | |
1686 ================ | |
1687 | |
1688 Computes the O’Brien transform on input data (any number of arrays). | |
1689 | |
1690 Used to test for homogeneity of variance prior to running one-way stats. | |
1691 | |
1692 It has to have at least two samples. | |
1693 | |
1694 **The output are:** | |
1695 | |
1696 obrientransform : ndarray | |
1697 | |
1698 Transformed data for use in an ANOVA. The first dimension of the result corresponds to the sequence of transformed arrays. If the arrays given are all 1-D of the same length, the return value is a 2-D array; otherwise it is a 1-D array of type object, with each element being an ndarray. | |
1699 | |
1700 | |
1701 **example**: | |
1702 | |
1703 stats.obrientransformcenter([4,17,8,3], [30,45,5,3]) the result is (array([[ 16.5 , 124.83333333, -10.16666667, 31.5 ],[ 39.54166667, 877.04166667, 310.375 , 422.04166667]])) | |
1704 | |
1705 ========= | |
1706 bayes mvs | |
1707 ========= | |
1708 | |
1709 Bayesian confidence intervals for the mean, var, and std.alpha should be larger than 0,smaller than 1. | |
1710 | |
1711 | |
1712 **The output are:** | |
1713 | |
1714 mean_cntr, var_cntr, std_cntr : tuple | |
1715 | |
1716 The three results are for the mean, variance and standard deviation, respectively. Each result is a tuple of the form: | |
1717 | |
1718 (center, (lower, upper)) | |
1719 | |
1720 with center the mean of the conditional pdf of the value given the data, and (lower, upper) a confidence interval, centered on the median, containing the estimate to a probability alpha. | |
1721 | |
1722 **example**: | |
1723 | |
1724 stats.bayes_mvs([4,17,8,3],0.8) the result is (8.0, (0.49625108326958145, 15.503748916730416));(122.0, (15.611548029617781, 346.74229584218108));(8.8129230241075476, (3.9511451542075475, 18.621017583423871)) | |
1725 | |
1726 ========= | |
1727 sigmaclip | |
1728 ========= | |
1729 | |
1730 Iterative sigma-clipping of array elements. | |
1731 | |
1732 The output array contains only those elements of the input array c that satisfy the conditions | |
1733 | |
1734 **The output are:** | |
1735 | |
1736 c : ndarray | |
1737 Input array with clipped elements removed. | |
1738 critlower : float | |
1739 Lower threshold value use for clipping. | |
1740 critlupper : float | |
1741 Upper threshold value use for clipping. | |
1742 | |
1743 | |
1744 **example**: | |
1745 | |
1746 sigmaclip([4,17,8,3]) the result is [ 4. 17. 8. 3.],-14.0907220344,30.0907220344) | |
1747 | |
1748 ========= | |
1749 threshold | |
1750 ========= | |
1751 | |
1752 Clip array to a given value. | |
1753 | |
1754 Similar to numpy.clip(), except that values less than threshmin or greater than threshmax are replaced by newval, instead of by threshmin and threshmax respectively. | |
1755 | |
1756 | |
1757 **The output are:** | |
1758 | |
1759 out : ndarray | |
1760 The clipped input array, with values less than threshmin or greater than threshmax replaced with newval. | |
1761 | |
1762 **example**: | |
1763 | |
1764 stats.threshold([4,17,8,3],2,8,0)the result is array([4, 17, 8, 3]) | |
1765 | |
1766 ======== | |
1767 trimboth | |
1768 ======== | |
1769 | |
1770 Slices off a proportion of items from both ends of an array. | |
1771 | |
1772 Slices off the passed proportion of items from both ends of the passed array (i.e., with proportiontocut = 0.1, slices leftmost 10% and rightmost 10% of scores). You must pre-sort the array if you want ‘proper’ trimming. Slices off less if proportion results in a non-integer slice index (i.e., conservatively slices off proportiontocut). | |
1773 | |
1774 | |
1775 **The output are:** | |
1776 | |
1777 out : ndarray | |
1778 Trimmed version of array a. | |
1779 | |
1780 **example**: | |
1781 | |
1782 stats.trimboth([4,17,8,3],0.1)the result is array([ 4, 17, 8, 3]) | |
1783 | |
1784 ===== | |
1785 trim1 | |
1786 ===== | |
1787 | |
1788 Slices off a proportion of items from ONE end of the passed array distribution. | |
1789 | |
1790 If proportiontocut = 0.1, slices off ‘leftmost’ or ‘rightmost’ 10% of scores. Slices off LESS if proportion results in a non-integer slice index (i.e., conservatively slices off proportiontocut ). | |
1791 | |
1792 **The output are:** | |
1793 | |
1794 trim1 : ndarray | |
1795 | |
1796 Trimmed version of array a | |
1797 | |
1798 **example**: | |
1799 | |
1800 stats.trim1([4,17,8,3],0.5,'left')the result is array([8, 3]) | |
1801 | |
1802 ========= | |
1803 spearmanr | |
1804 ========= | |
1805 | |
1806 Calculates a Spearman rank-order correlation coefficient and the p-value to test for non-correlation. | |
1807 | |
1808 The Spearman correlation is a nonparametric measure of the monotonicity of the relationship between two datasets. Unlike the Pearson correlation, the Spearman correlation does not assume that both datasets are normally distributed. Like other correlation coefficients, this one varies between -1 and +1 with 0 implying no correlation. Correlations of -1 or +1 imply an exact monotonic relationship. Positive correlations imply that as x increases, so does y. Negative correlations imply that as x increases, y decreases. | |
1809 | |
1810 **The output are:** | |
1811 | |
1812 rho : float or ndarray (2-D square) | |
1813 | |
1814 Spearman correlation matrix or correlation coefficient (if only 2 variables are given as parameters. Correlation matrix is square with length equal to total number of variables (columns or rows) in a and b combined. | |
1815 | |
1816 p-value : float | |
1817 | |
1818 The two-sided p-value for a hypothesis test whose null hypothesis is that two sets of data are uncorrelated, has same dimension as rho. | |
1819 | |
1820 **example**: | |
1821 | |
1822 stats.spearmanr([4,17,8,3,30,45,5,3],[5,3,4,17,8,3,30,45])the result is (-0.722891566265, 0.0427539458876) | |
1823 | |
1824 ======== | |
1825 f oneway | |
1826 ======== | |
1827 | |
1828 Performs a 1-way ANOVA. | |
1829 | |
1830 The one-way ANOVA tests the null hypothesis that two or more groups have the same population mean. The test is applied to samples from two or more groups, possibly with differing sizes. | |
1831 | |
1832 **The output are:** | |
1833 | |
1834 F-value : float | |
1835 | |
1836 The computed F-value of the test. | |
1837 | |
1838 p-value : float | |
1839 | |
1840 The associated p-value from the F-distribution. | |
1841 | |
1842 **example**: | |
1843 | |
1844 stats. f_oneway([4,17,8,3], [30,45,5,3]) the result is (1.43569457222,0.276015080537) | |
1845 | |
1846 ================= | |
1847 Mann-Whitney rank | |
1848 ================= | |
1849 | |
1850 Compute the Wilcoxon rank-sum statistic for two samples. | |
1851 | |
1852 The Wilcoxon rank-sum test tests the null hypothesis that two sets of measurements are drawn from the same distribution. The alternative hypothesis is that values in one sample are more likely to be larger than the values in the other sample. | |
1853 | |
1854 This test should be used to compare two samples from continuous distributions. It does not handle ties between measurements in x and y. For tie-handling and an optional continuity correction use mannwhitneyu. | |
1855 | |
1856 ----- | |
1857 | |
1858 Computes the Mann-Whitney rank test on samples x and y. | |
1859 | |
1860 u : float | |
1861 | |
1862 The Mann-Whitney statistics. | |
1863 | |
1864 prob : float | |
1865 | |
1866 One-sided p-value assuming a asymptotic normal distribution. | |
1867 | |
1868 =================== | |
1869 Ansari-Bradley test | |
1870 =================== | |
1871 | |
1872 Perform the Ansari-Bradley test for equal scale parameters | |
1873 | |
1874 The Ansari-Bradley test is a non-parametric test for the equality of the scale parameter of the distributions from which two samples were drawn. | |
1875 | |
1876 The p-value given is exact when the sample sizes are both less than 55 and there are no ties, otherwise a normal approximation for the p-value is used. | |
1877 | |
1878 ----- | |
1879 | |
1880 Computes the Ansari-Bradley test for samples x and y. | |
1881 | |
1882 **The output are:** | |
1883 | |
1884 AB : float | |
1885 | |
1886 The Ansari-Bradley test statistic | |
1887 | |
1888 p-value : float | |
1889 | |
1890 The p-value of the hypothesis test | |
1891 | |
1892 **example**: | |
1893 | |
1894 ansari([1,2,3,4],[15,5,20,8,10,12]) the result is (10.0, 0.53333333333333333) | |
1895 | |
1896 ======== | |
1897 bartlett | |
1898 ======== | |
1899 | |
1900 Perform Bartlett’s test for equal variances | |
1901 | |
1902 Bartlett’s test tests the null hypothesis that all input samples are from populations with equal variances. | |
1903 | |
1904 It has to have at least two samples. | |
1905 | |
1906 **The output are:** | |
1907 | |
1908 T : float | |
1909 | |
1910 The test statistic. | |
1911 | |
1912 p-value : float | |
1913 | |
1914 The p-value of the test. | |
1915 | |
1916 | |
1917 **example**: | |
1918 | |
1919 stats.bartlett([4,17,8,3], [30,45,5,3]) the result is (2.87507113948,0.0899609995242) | |
1920 | |
1921 ====== | |
1922 levene | |
1923 ====== | |
1924 | |
1925 Perform Levene test for equal variances. | |
1926 | |
1927 The Levene test tests the null hypothesis that all input samples are from populations with equal variances. | |
1928 | |
1929 It has to have at least two samples. | |
1930 | |
1931 **The output are:** | |
1932 | |
1933 W : float | |
1934 | |
1935 The test statistic. | |
1936 | |
1937 p-value : float | |
1938 | |
1939 The p-value for the test. | |
1940 | |
1941 | |
1942 **example**: | |
1943 | |
1944 stats.levene(center='mean',proportiontocut=0.01,[4,17,8,3], [30,45,5,3]) the result is (11.5803858521,0.014442549362) | |
1945 | |
1946 ======= | |
1947 fligner | |
1948 ======= | |
1949 | |
1950 Perform Fligner’s test for equal variances. | |
1951 | |
1952 Fligner’s test tests the null hypothesis that all input samples are from populations with equal variances. Fligner’s test is non-parametric in contrast to Bartlett’s test bartlett and Levene’s test levene. | |
1953 | |
1954 **The output are:** | |
1955 | |
1956 Xsq : float | |
1957 | |
1958 The test statistic. | |
1959 | |
1960 p-value : float | |
1961 | |
1962 The p-value for the hypothesis test. | |
1963 | |
1964 | |
1965 ========== | |
1966 linregress | |
1967 ========== | |
1968 | |
1969 Calculate a regression line | |
1970 | |
1971 This computes a least-squares regression for two sets of measurements. | |
1972 | |
1973 ----- | |
1974 | |
1975 Computes the least-squares regression for samples x and y. | |
1976 | |
1977 **The output are:** | |
1978 | |
1979 slope : float | |
1980 | |
1981 slope of the regression line | |
1982 | |
1983 intercept : float | |
1984 | |
1985 intercept of the regression line | |
1986 | |
1987 r-value : float | |
1988 | |
1989 correlation coefficient | |
1990 | |
1991 p-value : float | |
1992 | |
1993 two-sided p-value for a hypothesis test whose null hypothesis is that the slope is zero. | |
1994 | |
1995 stderr : float | |
1996 | |
1997 Standard error of the estimate | |
1998 | |
1999 **example**: | |
2000 | |
2001 linregress([4,417,8,3],[30,45,5,3]) the result is (0.0783053989099, 12.2930169177, 0.794515680443,0.205484319557,0.0423191764713) | |
2002 | |
2003 =========== | |
2004 ttest 1samp | |
2005 =========== | |
2006 | |
2007 Calculates the T-test for the mean of ONE group of scores. | |
2008 | |
2009 This is a two-sided test for the null hypothesis that the expected value (mean) of a sample of independent observations a is equal to the given population mean, popmean. | |
2010 | |
2011 **The output are:** | |
2012 | |
2013 t : float or array | |
2014 | |
2015 The calculated t-statistic. | |
2016 | |
2017 prob : float or array | |
2018 | |
2019 The two-tailed p-value. | |
2020 | |
2021 **example**: | |
2022 | |
2023 stats.ttest_1samp([4,17,8,3],[30,45,5,3])the result is (array([ -6.89975053, -11.60412589, 0.94087507, 1.56812512]), array([ 0.00623831, 0.00137449, 0.41617971, 0.21485306])) | |
2024 | |
2025 ========= | |
2026 ttest ind | |
2027 ========= | |
2028 | |
2029 Calculates the T-test for the means of TWO INDEPENDENT samples of scores. | |
2030 | |
2031 This is a two-sided test for the null hypothesis that 2 independent samples have identical average (expected) values. This test assumes that the populations have identical variances. | |
2032 | |
2033 The independent samples t-test is used when two separate sets of independent and identically distributed samples are obtained, one from each of the two populations | |
2034 being compared. | |
2035 ----- | |
2036 Computes the T-test for the means of independent samples x and y. | |
2037 | |
2038 **The output are:** | |
2039 | |
2040 t : float or array | |
2041 | |
2042 The calculated t-statistic. | |
2043 | |
2044 prob : float or array | |
2045 | |
2046 The two-tailed p-value. | |
2047 | |
2048 **example**: | |
2049 | |
2050 ttest_ind([4,417,8,3],[30,45,5,3]) the result is (0.842956644207,0.431566932748) | |
2051 | |
2052 ========= | |
2053 ttest rel | |
2054 ========= | |
2055 | |
2056 Calculates the T-test on TWO RELATED samples of scores, a and b. | |
2057 | |
2058 This is a two-sided test for the null hypothesis that 2 related or repeated samples have identical average (expected) values. | |
2059 | |
2060 related samples t-tests typically consist of a sample of matched pairs of similar units, or one group of units that has been tested twice (a "repeated measures" t-test) | |
2061 | |
2062 ----- | |
2063 | |
2064 Computes the T-test for the means of related samples x and y. | |
2065 | |
2066 **The output are:** | |
2067 | |
2068 t : float or array | |
2069 | |
2070 t-statistic | |
2071 | |
2072 prob : float or array | |
2073 | |
2074 two-tailed p-value | |
2075 | |
2076 **example**: | |
2077 | |
2078 ttest_rel([4,417,8,3],[30,45,5,3]) the result is (0.917072474241,0.426732624361) | |
2079 | |
2080 ========= | |
2081 chisquare | |
2082 ========= | |
2083 | |
2084 Calculates a one-way chi square test. | |
2085 | |
2086 The chi square test tests the null hypothesis that the categorical data has the given frequencies. | |
2087 | |
2088 **The output are:** | |
2089 | |
2090 chisq : float or ndarray | |
2091 | |
2092 The chi-squared test statistic. The value is a float if axis is None or f_obs and f_exp are 1-D. | |
2093 | |
2094 p : float or ndarray | |
2095 | |
2096 The p-value of the test. The value is a float if ddof and the return value chisq are scalars. | |
2097 | |
2098 **example**: | |
2099 | |
2100 stats.chisquare([4,17,8,3],[30,45,5,3],ddof=1)the result is (41.7555555556,8.5683326078e-10) | |
2101 | |
2102 ================ | |
2103 power divergence | |
2104 ================ | |
2105 | |
2106 Cressie-Read power divergence statistic and goodness of fit test. | |
2107 | |
2108 This function tests the null hypothesis that the categorical data has the given frequencies, using the Cressie-Read power divergence statistic. | |
2109 | |
2110 **The output are:** | |
2111 | |
2112 stat : float or ndarray | |
2113 | |
2114 The Cressie-Read power divergence test statistic. The value is a float if axis is None or if` f_obs and f_exp are 1-D. | |
2115 | |
2116 p : float or ndarray | |
2117 | |
2118 The p-value of the test. The value is a float if ddof and the return value stat are scalars. | |
2119 | |
2120 **example**: | |
2121 | |
2122 stats.power_divergence([4,17,8,3],[30,45,5,3],1,lambda=1)the result is (41.7555555556, 8.5683326078e-10) | |
2123 | |
2124 ========== | |
2125 tiecorrect | |
2126 ========== | |
2127 | |
2128 Tie correction factor for ties in the Mann-Whitney U and Kruskal-Wallis H tests. | |
2129 | |
2130 **The output are:** | |
2131 | |
2132 factor : float | |
2133 | |
2134 Correction factor for U or H. | |
2135 | |
2136 **example**: | |
2137 | |
2138 stats.tiecorrect([4,17,8,3,30,45,5,3])the result is (0.988095238095) | |
2139 | |
2140 ======== | |
2141 rankdata | |
2142 ======== | |
2143 | |
2144 Assign ranks to data, dealing with ties appropriately. | |
2145 | |
2146 Ranks begin at 1. The method argument controls how ranks are assigned to equal values. See [R308] for further discussion of ranking methods. | |
2147 | |
2148 **The output are:** | |
2149 | |
2150 ranks : ndarray | |
2151 | |
2152 An array of length equal to the size of a, containing rank scores. | |
2153 | |
2154 **example**: | |
2155 | |
2156 stats.rankdata([4,17,8,3],average)the result is ([ 2. 4. 3. 1.]) | |
2157 | |
2158 ======= | |
2159 kruskal | |
2160 ======= | |
2161 | |
2162 Compute the Kruskal-Wallis H-test for independent samples | |
2163 | |
2164 The Kruskal-Wallis H-test tests the null hypothesis that the population median of all of the groups are equal. It is a non-parametric version of ANOVA. | |
2165 | |
2166 The number of samples have to be more than one | |
2167 | |
2168 **The output are:** | |
2169 | |
2170 H-statistic : float | |
2171 | |
2172 The Kruskal-Wallis H statistic, corrected for ties | |
2173 | |
2174 p-value : float | |
2175 | |
2176 The p-value for the test using the assumption that H has a chi square distribution | |
2177 | |
2178 | |
2179 **example**: | |
2180 | |
2181 stats. kruskal([4,17,8,3], [30,45,5,3]) the result is (0.527108433735,0.467825077285) | |
2182 | |
2183 ================== | |
2184 friedmanchisquare | |
2185 ================== | |
2186 | |
2187 Computes the Friedman test for repeated measurements | |
2188 | |
2189 The Friedman test tests the null hypothesis that repeated measurements of the same individuals have the same distribution. It is often used to test for consistency among measurements obtained in different ways. | |
2190 | |
2191 The number of samples have to be more than two. | |
2192 | |
2193 **The output are:** | |
2194 | |
2195 friedman chi-square statistic : float | |
2196 | |
2197 the test statistic, correcting for ties | |
2198 | |
2199 p-value : float | |
2200 | |
2201 the associated p-value assuming that the test statistic has a chi squared distribution | |
2202 | |
2203 | |
2204 **example**: | |
2205 | |
2206 stats.friedmanchisquare([4,17,8,3],[8,3,30,45],[30,45,5,3])the result is (0.933333333333,0.627089085273) | |
2207 | |
2208 ===== | |
2209 mood | |
2210 ===== | |
2211 | |
2212 Perform Mood’s test for equal scale parameters. | |
2213 | |
2214 Mood’s two-sample test for scale parameters is a non-parametric test for the null hypothesis that two samples are drawn from the same distribution with the same scale parameter. | |
2215 | |
2216 ----- | |
2217 | |
2218 Computes the Mood’s test for equal scale samples x and y. | |
2219 | |
2220 **The output are:** | |
2221 | |
2222 z : scalar or ndarray | |
2223 | |
2224 The z-score for the hypothesis test. For 1-D inputs a scalar is returned; | |
2225 | |
2226 p-value : scalar ndarray | |
2227 | |
2228 The p-value for the hypothesis test. | |
2229 | |
2230 **example**: | |
2231 | |
2232 mood([4,417,8,3],[30,45,5,3]) the result is (0.396928310068,0.691420327045) | |
2233 | |
2234 =============== | |
2235 combine_pvalues | |
2236 =============== | |
2237 | |
2238 Methods for combining the p-values of independent tests bearing upon the same hypothesis. | |
2239 | |
2240 | |
2241 **The output are:** | |
2242 | |
2243 statistic: float | |
2244 | |
2245 The statistic calculated by the specified method: - “fisher”: The chi-squared statistic - “stouffer”: The Z-score | |
2246 | |
2247 pval: float | |
2248 | |
2249 The combined p-value. | |
2250 | |
2251 **example**: | |
2252 | |
2253 stats.combine_pvalues([4,17,8,3],method='fisher',weights=[5,6,7,8]) the result is (-14.795123071,1.0) | |
2254 | |
2255 =========== | |
2256 median test | |
2257 =========== | |
2258 | |
2259 Mood’s median test. | |
2260 | |
2261 Test that two or more samples come from populations with the same median. | |
2262 | |
2263 **The output are:** | |
2264 | |
2265 stat : float | |
2266 | |
2267 The test statistic. The statistic that is returned is determined by lambda. The default is Pearson’s chi-squared statistic. | |
2268 | |
2269 p : float | |
2270 | |
2271 The p-value of the test. | |
2272 | |
2273 m : float | |
2274 | |
2275 The grand median. | |
2276 | |
2277 table : ndarray | |
2278 | |
2279 The contingency table. | |
2280 | |
2281 | |
2282 **example**: | |
2283 | |
2284 stats.median_test(ties='below',correction=True ,lambda=1,*a)the result is ((0.0, 1.0, 6.5, array([[2, 2],[2, 2]]))) | |
2285 | |
2286 ======== | |
2287 shapiro | |
2288 ======== | |
2289 | |
2290 Perform the Shapiro-Wilk test for normality. | |
2291 | |
2292 The Shapiro-Wilk test tests the null hypothesis that the data was drawn from a normal distribution. | |
2293 | |
2294 ----- | |
2295 | |
2296 Computes the Shapiro-Wilk test for samples x and y. | |
2297 | |
2298 If x has length n, then y must have length n/2. | |
2299 | |
2300 **The output are:** | |
2301 | |
2302 W : float | |
2303 | |
2304 The test statistic. | |
2305 | |
2306 p-value : float | |
2307 | |
2308 The p-value for the hypothesis test. | |
2309 | |
2310 | |
2311 **example**: | |
2312 | |
2313 shapiro([4,417,8,3]) the result is (0.66630089283, 0.00436889193952) | |
2314 | |
2315 ======== | |
2316 anderson | |
2317 ======== | |
2318 | |
2319 Anderson-Darling test for data coming from a particular distribution | |
2320 | |
2321 The Anderson-Darling test is a modification of the Kolmogorov- Smirnov test kstest for the null hypothesis that a sample is drawn from a population that follows a particular distribution. For the Anderson-Darling test, the critical values depend on which distribution is being tested against. This function works for normal, exponential, logistic, or Gumbel (Extreme Value Type I) distributions. | |
2322 | |
2323 ----- | |
2324 | |
2325 Computes the Anderson-Darling test for samples x which comes from a specific distribution.. | |
2326 | |
2327 **The output are:** | |
2328 | |
2329 | |
2330 A2 : float | |
2331 | |
2332 The Anderson-Darling test statistic | |
2333 | |
2334 critical : list | |
2335 | |
2336 The critical values for this distribution | |
2337 | |
2338 sig : list | |
2339 | |
2340 The significance levels for the corresponding critical values in percents. The function returns critical values for a differing set of significance levels depending on the distribution that is being tested against. | |
2341 | |
2342 **example**: | |
2343 | |
2344 anderson([4,417,8,3],norm) the result is (0.806976419634,[ 1.317 1.499 1.799 2.098 2.496] ,[ 15. 10. 5. 2.5 1. ]) | |
2345 | |
2346 ========== | |
2347 binom_test | |
2348 ========== | |
2349 | |
2350 Perform a test that the probability of success is p. | |
2351 | |
2352 This is an exact, two-sided test of the null hypothesis that the probability of success in a Bernoulli experiment is p. | |
2353 | |
2354 he binomial test is an exact test of the statistical significance of deviations from a theoretically expected distribution of observations into two categories. | |
2355 | |
2356 ----- | |
2357 | |
2358 Computes the test for the probability of success is p . | |
2359 | |
2360 **The output are:** | |
2361 | |
2362 p-value : float | |
2363 | |
2364 The p-value of the hypothesis test | |
2365 | |
2366 **example**: | |
2367 | |
2368 binom_test([417,8],1,0.5) the result is (5.81382734132e-112) | |
2369 | |
2370 ======== | |
2371 pearsonr | |
2372 ======== | |
2373 | |
2374 Calculates a Pearson correlation coefficient and the p-value for testing non-correlation. | |
2375 | |
2376 The Pearson correlation coefficient measures the linear relationship between two datasets.The value of the correlation (i.e., correlation coefficient) does not depend on the specific measurement units used. | |
2377 | |
2378 **The output are:** | |
2379 | |
2380 Pearson’s correlation coefficient: float | |
2381 | |
2382 2-tailed p-value: float | |
2383 | |
2384 | |
2385 **example**: | |
2386 | |
2387 pearsonr([4,17,8,3],[30,45,5,3]) the result is (0.695092958988,0.304907041012) | |
2388 | |
2389 ======== | |
2390 wilcoxon | |
2391 ======== | |
2392 | |
2393 Calculate the Wilcoxon signed-rank test. | |
2394 | |
2395 The Wilcoxon signed-rank test tests the null hypothesis that two related paired samples come from the same distribution. In particular, it tests whether the distribution of the differences x - y is symmetric about zero. It is a non-parametric version of the paired T-test. | |
2396 | |
2397 **The output are:** | |
2398 | |
2399 T : float | |
2400 | |
2401 The sum of the ranks of the differences above or below zero, whichever is smaller. | |
2402 | |
2403 p-value : float | |
2404 | |
2405 The two-sided p-value for the test. | |
2406 | |
2407 | |
2408 **example**: | |
2409 | |
2410 stats.wilcoxon([3,6,23,70,20,55,4,19,3,6], | |
2411 [23,70,20,55,4,19,3,6,23,70],zero_method='pratt',correction=True) the result is (23.0, 0.68309139830960874) | |
2412 | |
2413 ============== | |
2414 pointbiserialr | |
2415 ============== | |
2416 | |
2417 Calculates a Pearson correlation coefficient and the p-value for testing non-correlation. | |
2418 | |
2419 The Pearson correlation coefficient measures the linear relationship between two datasets.The value of the correlation (i.e., correlation coefficient) does not depend on the specific measurement units used. | |
2420 **The output are:** | |
2421 | |
2422 r : float | |
2423 | |
2424 R value | |
2425 | |
2426 p-value : float | |
2427 | |
2428 2-tailed p-value | |
2429 | |
2430 | |
2431 **example**: | |
2432 | |
2433 pointbiserialr([0,0,0,1,1,1,1],[1,0,1,2,3,4,5]) the result is (0.84162541153017323, 0.017570710081214368) | |
2434 | |
2435 ======== | |
2436 ks_2samp | |
2437 ======== | |
2438 | |
2439 Computes the Kolmogorov-Smirnov statistic on 2 samples. | |
2440 | |
2441 This is a two-sided test for the null hypothesis that 2 independent samples are drawn from the same continuous distribution. | |
2442 | |
2443 If the K-S statistic is small or the p-value is high, then we cannot reject the hypothesis that the distributions of the two samples are the same. | |
2444 | |
2445 **The output are:** | |
2446 | |
2447 D : float | |
2448 | |
2449 KS statistic | |
2450 | |
2451 p-value : float | |
2452 | |
2453 two-tailed p-value | |
2454 | |
2455 | |
2456 **example**: | |
2457 | |
2458 ks_2samp([4,17,8,3],[30,45,5,3]) the result is (0.5,0.534415719217) | |
2459 | |
2460 ========== | |
2461 kendalltau | |
2462 ========== | |
2463 | |
2464 Calculates Kendall’s tau, a correlation measure for sample x and sample y. | |
2465 | |
2466 sample x and sample y should be in the same size. | |
2467 | |
2468 Kendall’s tau is a measure of the correspondence between two rankings. Values close to 1 indicate strong agreement, values close to -1 indicate strong disagreement. This is the tau-b version of Kendall’s tau which accounts for ties. | |
2469 | |
2470 | |
2471 **The output are:** | |
2472 | |
2473 Kendall’s tau : float | |
2474 | |
2475 The tau statistic. | |
2476 | |
2477 p-value : float | |
2478 | |
2479 The two-sided p-value for a hypothesis test whose null hypothesis is an absence of association, tau = 0. | |
2480 | |
2481 | |
2482 **example**: | |
2483 | |
2484 kendalltau([4,17,8,3],[30,45,5,3]),the result is (0.666666666667,0.174231399708) | |
2485 | |
2486 ================ | |
2487 chi2_contingency | |
2488 ================ | |
2489 | |
2490 Chi-square test of independence of variables in a contingency table. | |
2491 | |
2492 This function computes the chi-square statistic and p-value for the hypothesis test of independence of the observed frequencies in the contingency table observed. | |
2493 | |
2494 **The output are:** | |
2495 | |
2496 chi2 : float | |
2497 | |
2498 The test statistic. | |
2499 | |
2500 p : float | |
2501 | |
2502 The p-value of the test | |
2503 | |
2504 dof : int | |
2505 | |
2506 Degrees of freedom | |
2507 | |
2508 expected : ndarray, same shape as observed | |
2509 | |
2510 The expected frequencies, based on the marginal sums of the table. | |
2511 | |
2512 **example**: | |
2513 | |
2514 stats.chi2_contingency([4,17,8,3],1)the result is (0.0, 1.0, 0, array([ 4., 17., 8., 3.])) | |
2515 | |
2516 ====== | |
2517 boxcox | |
2518 ====== | |
2519 | |
2520 Return a positive dataset transformed by a Box-Cox power transformation | |
2521 | |
2522 **The output are:** | |
2523 | |
2524 boxcox : ndarray | |
2525 | |
2526 Box-Cox power transformed array. | |
2527 | |
2528 maxlog : float, optional | |
2529 | |
2530 If the lmbda parameter is None, the second returned argument is the lambda that maximizes the log-likelihood function. | |
2531 | |
2532 (min_ci, max_ci) : tuple of float, optional | |
2533 | |
2534 If lmbda parameter is None and alpha is not None, this returned tuple of floats represents the minimum and maximum confidence limits given alpha. | |
2535 | |
2536 | |
2537 **example**: | |
2538 | |
2539 stats.boxcox([4,17,8,3],0.9) the result is ([ 1.03301717 1.60587825 1.35353026 0.8679017 ],-0.447422166194,(-0.5699221654511225, -0.3259515659400082)) | |
2540 | |
2541 ============== | |
2542 boxcox normmax | |
2543 ============== | |
2544 | |
2545 Compute optimal Box-Cox transform parameter for input data | |
2546 | |
2547 **The output are:** | |
2548 | |
2549 maxlog : float or ndarray | |
2550 | |
2551 The optimal transform parameter found. An array instead of a scalar for method='all'. | |
2552 | |
2553 | |
2554 **example**: | |
2555 | |
2556 stats.boxcox_normmax([4,17,8,3],(-2,2),'pearsonr')the result is (-0.702386238971) | |
2557 | |
2558 ========== | |
2559 boxcox llf | |
2560 ========== | |
2561 | |
2562 The boxcox log-likelihood function | |
2563 | |
2564 **The output are:** | |
2565 | |
2566 llf : float or ndarray | |
2567 | |
2568 Box-Cox log-likelihood of data given lmb. A float for 1-D data, an array otherwise. | |
2569 | |
2570 **example**: | |
2571 | |
2572 stats.boxcox_llf(1,[4,17,8,3]) the result is (-6.83545336723) | |
2573 | |
2574 ======= | |
2575 entropy | |
2576 ======= | |
2577 | |
2578 Calculate the entropy of a distribution for given probability values. | |
2579 | |
2580 If only probabilities pk are given, the entropy is calculated as S = -sum(pk * log(pk), axis=0). | |
2581 | |
2582 If qk is not None, then compute the Kullback-Leibler divergence S = sum(pk * log(pk / qk), axis=0). | |
2583 | |
2584 This routine will normalize pk and qk if they don’t sum to 1. | |
2585 | |
2586 **The output are:** | |
2587 | |
2588 S : float | |
2589 | |
2590 The calculated entropy. | |
2591 | |
2592 | |
2593 **example**: | |
2594 | |
2595 stats.entropy([4,17,8,3],[30,45,5,3],1.6)the result is (0.641692653659) | |
2596 | |
2597 ====== | |
2598 kstest | |
2599 ====== | |
2600 | |
2601 Perform the Kolmogorov-Smirnov test for goodness of fit. | |
2602 | |
2603 **The output are:** | |
2604 | |
2605 D : float | |
2606 | |
2607 KS test statistic, either D, D+ or D-. | |
2608 | |
2609 p-value : float | |
2610 | |
2611 One-tailed or two-tailed p-value. | |
2612 | |
2613 **example**: | |
2614 | |
2615 stats.kstest([4,17,8,3],'norm',N=20,alternative='two-sided',mode='approx')the result is (0.998650101968,6.6409100441e-12) | |
2616 | |
2617 =========== | |
2618 theilslopes | |
2619 =========== | |
2620 | |
2621 Computes the Theil-Sen estimator for a set of points (x, y). | |
2622 | |
2623 theilslopes implements a method for robust linear regression. It computes the slope as the median of all slopes between paired values. | |
2624 | |
2625 **The output are:** | |
2626 | |
2627 medslope : float | |
2628 | |
2629 Theil slope. | |
2630 | |
2631 medintercept : float | |
2632 | |
2633 Intercept of the Theil line, as median(y) - medslope*median(x). | |
2634 | |
2635 lo_slope : float | |
2636 | |
2637 Lower bound of the confidence interval on medslope. | |
2638 | |
2639 up_slope : float | |
2640 | |
2641 Upper bound of the confidence interval on medslope. | |
2642 | |
2643 **example**: | |
2644 | |
2645 stats.theilslopes([4,17,8,3],[30,45,5,3],0.95)the result is (0.279166666667,1.11458333333,-0.16,2.5) | |
2646 | |
2647 </help> | |
2648 </tool> |