-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathstats.ml
More file actions
151 lines (135 loc) · 6.1 KB
/
stats.ml
File metadata and controls
151 lines (135 loc) · 6.1 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
open Eval
exception Illegal
type funct = float list
type dataset = (float * float) list
(* POST: [rsq dset] is a float in the range 0..1. that represents the r squared
* value for a linear regression performed on [dset].
* PRE: [dset] is a (float * float) list of finite length.
*)
let rsq dset =
let abr = lin_reg_stats dset in
if List.length abr < 3 then raise Illegal
else List.nth abr 2
(* POST: [residuals ?func dset] is a float list that represents the "residual"
* for each element in [dset]; that is, the difference between the y-value and the
* expected y-value given by the regression model [?func]. [residuals dset]
* will be the list of residuals for a linear regression on [dset]. [residuals
* func dset] will be the list of residuals for the user-input function and [dset].
* PRE: [dset] is a (float * float) list of finite length. [func] if provided is
* a float list of length 2, 3, or 4.
*)
let residuals ?func dset =
let xylst = List.split dset in
let xlst = fst xylst in
let ylst = snd xylst in
let func = match func with None -> lin_reg dset | Some x -> x in
if List.length dset < 2 then raise Illegal else
if List.length func < 2 then raise Illegal
else
let yhatlst = generate_y_lst func xlst in
let ypairlst = List.combine ylst yhatlst in
List.map (fun (y1,y2) -> y1 -. y2) ypairlst
(* POST: [residuals_plot ?func dset] is a (float * float) list with elements of
* the form (x-coordinate, residual) where the residuals are from the float list
* generated by [residuals ?func dset].
* PRE: [dset] is a (float * float) list of finite length. [func] if provided is
* a float list of length 2, 3, or 4.
*)
let residuals_plot ?func dset =
let xlst = List.map (fun (x,y) -> x) dset in
let reslst = residuals ?func dset in
List.combine xlst reslst
(* POST: [residualssq ?func dset] is a float list with elements that represent
* the square of the residuals generated by [residuals ?func dset].
* PRE: [dset] is a (float * float) list of finite length. [func] if provided is
* a float list of length 2, 3, or 4.
*)
let residualssq ?func dset =
let reslst = residuals ?func dset in
List.map (fun x -> x**2.) reslst
(* POST: [variance ?func dset] is a float that represents the variance of
* a regression model [func] applied to a dataset [dset]. If the optional argument
* [func] is provided, it will return the variance for the dataset modeled on
* the user-input [func]; if [func] is not provided, it will return the variance
* for the dataset modeled on a linear regression on [dset].
* PRE: [dset] is a (float * float) list of finite length. [func] if provided is
* a float list of length 2, 3, or 4.
*)
let variance ?func dset =
let ressq = residualssq ?func dset in
let n = float_of_int (List.length dset) in
sum(ressq)/.(n -. 1.)
(* POST: [stderror ?func dset] is a float that represents the standard error of
* a regression model [func] applied to a dataset [dset]. If the optional argument
* [func] is provided, it will return the standard error for the dataset modeled on
* the user-input [func]; if [func] is not provided, it will return the standard
* error for the dataset modeled on a linear regression on [dset].
* PRE: [dset] is a (float * float) list of finite length. [func] if provided is
* a float list of length 2, 3, or 4.
*)
let stderror ?func dset =
let n = float_of_int (List.length dset) in
let ressq = residualssq ?func dset in
sqrt(sum(ressq)/.(n))
(* POST: [stderror ?func dset] is a float that represents the standard deviation of
* the elements in [dset] applied to the regression model [func]. If the optional
* argument [func] is provided, it will return the standard deviation for the
* dataset modeled on the user-input [func]; if [func] is not provided, it will
* return the standard deviation of the dataset modeled on a linear regression
* on [dset].
* PRE: [dset] is a (float * float) list of finite length. [func] if provided is
* a float list of length 2, 3, or 4.
*)
let stddev ?func dset = sqrt(variance ?func dset)
(* POST: [chisq ?func dset] is a float that represents the chi squared test on the
* regression model [func] with the dataset [dset]. If the optional
* argument [func] is provided, it will perform the chi squared test on the
* user-input [func]; if [func] is not provided, it will perform the chi squared
* test on a linear regression on [dset].
* PRE: [dset] is a (float * float) list of finite length where observed values,
* i.e. y-values, are >=1.. [func] if provided is a float list of length 2, 3, or 4.
*)
let chisq ?func dset =
let ressq = residualssq ?func dset in
let ylst = List.map (fun (x,y) -> y) dset in
let resylst = List.combine ressq ylst in
sum(List.map (fun (e,y) -> e/.y) resylst)
let rec fact x =
match x with
| 0. -> 1.
| y -> x *. fact (x-.1.)
(*POST: [poisson mean x] is a float calculating the Poisson distribution
* from the given values
* PRE: mean is a float greater than 0 and x is a nonnegative int
*)
let poisson mean x =
if mean <= 0. || x<0 then raise Illegal else
(2.71828**(-.mean))*.(mean**(float x))/.(fact (float x))
(*POST: [poissonc mean x] is a float calculating the cumulative Poisson
* distribution from the given values
* PRE: mean is a float greater than 0 and x is a nonnegative int
*)
let rec poissonc mean x =
if mean <= 0. || x<0 then raise Illegal else
match x with
| 0 -> poisson mean 0
| y -> (poisson mean y) +. (poissonc mean (y-1))
(*POST: [binomial n p x] is a float calculating the binomial distribution
* from the given values
* PRE: n is an int greater than 0, p is a float between 0 and 1, and x is
* an int less than or equal to n
*)
let binomial n p x =
if n<=0 || p<0. || p>1. || x>n then raise Illegal else
(((fact (float n))/.((fact (float x))*.
(fact (float (n-x))))))*.(p**(float x))*.((1.-.p)**(float (n-x)))
(*POST: [binomialc n p x] is a float calculating the cumulative binomial
* distribution from the given values
* PRE: n is an int greater than 0, p is a float between 0 and 1, and x is
* an int less than or equal to n
*)
let rec binomialc n p x =
if n<=0 || p<0. || p>1. || x>n then raise Illegal else
match x with
| 0 -> binomial n p x
| y -> (binomial n p y)+. (binomialc n p (y-1))