-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathshared.h
More file actions
198 lines (176 loc) · 5.11 KB
/
shared.h
File metadata and controls
198 lines (176 loc) · 5.11 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
//Shared information for linked optimisation
#include <iostream>
#include <iomanip>
#include <fstream>
#include <cstring>
#include <vector>
#include <string>
#include <algorithm>
#include <map>
#include <gsl/gsl_rng.h>
#include <gsl/gsl_randist.h>
#include <gsl/gsl_vector.h>
#include <gsl/gsl_matrix.h>
#include <gsl/gsl_blas.h>
#include <gsl/gsl_multifit_nlin.h>
#include <gsl/gsl_sf_hyperg.h>
#include <gsl/gsl_sf_gamma.h>
#include <gsl/gsl_randist.h>
#include <gsl/gsl_histogram.h>
#include <gsl/gsl_statistics_double.h>
#include <gsl/gsl_permutation.h>
#include <gsl/gsl_math.h>
#include <gsl/gsl_linalg.h>
#include <gsl/gsl_multimin.h>
using namespace std;
struct run_params {
string ref;
int plines; //Number of elements in string defining paired-end data in specific .sam file
int almethod; //Method of gathering sequence alignment data
int min_qual; //Minimum sequence quality
int max_qual; //Maximum sequence quality
int min_rlen; //Minimum number of alleles that must be contained in a single read
int ali_qual; //Minimum alignment quality
int ali_inc; //Flag to include reads with uncertain alignment quality
int sorted; //Flag to indicate that .sam files have been sorted by their first column
int qlib; //Base quality indicators
int pairs; //Include paired end reads
int ddup; //Remove duplicate reads
double specify_csl; //Read value of noise parameter C_sl;
double q_cut; //Cutoff to identify polymorphism
double qp_cut; //Probability required to identify polymorphism
int n_min; //Minimum number of variant alleles to call polymorphism
int rep_q; //Number of time points at which a SNP needs to be observed in order for it to be recorded
int first; //Flag to require first time-point to be polymorphic when calling SNPs
double dq_cut; //Cutoff change in allele frequency per day for potential neutrality
int dep_cut; //Cutoff overall read depth to call polymorphism
double seed; //Random seed
int skip;
int no_sam; //Skip reading in list of .sam files in calling single-locus variants
int pos; //Default position for mutational scan
int det; //Flag to use deterministic model of single-locus evolution; no mutation or drift
double mu; //Mutation rate for single-locus evolution model
double hap_q_cut; //Minimum freuqency within a dataset at which to include a partial haplotype
int hap_n_min; //Minimum number of calls within a dataset required to include a partial haplotype
int hap_index; //Flag to use the original haplotype numbers in numbering Hap_data?.dat, Contribs?.dat
int conservative; //Flag to calculate additional conservative estimate of noise
int readhap; //Flag to call multi-locus polymorphisms against specific haplotypes, contained in a file
string hap_file; //File name for haplotype data
string in_file; //File name for output
int get_in; //Flag to use read input file
string out_file; //File name for output
int get_out; //Flag to use read output file
int full_haps; //Flag to generate full haplotypes
int full_rep; //Flag to give Multi_locus_haplotypes output
int vs_ref; //Call against the given reference sequence, rather than against the consensus in the first time point at each position
int gmaf; //Call polymorphisms that are fixed against the reference sequence
int uniq; //Only print one trajectory per locus
int multi_gap; //Option in multilocus calling to call full haplotypes with gaps, rather than consecutive loci
int maxgap; //Maximum number of gaps to allow in multi_gap code
int verb; //Verbose output
};
struct rseq {
string name;
string seq;
int size;
};
struct rd {
int alpos;
int alq;
int inc;
int flag;
vector<int> flagv;
int rev;
int refno;
int pairno;
int del;
string ref;
string cigar;
string cig_string; //Cigar data by nucleotide
string seq;
string revseq;
string qual;
string revqual;
string paircode;
};
struct alldat {
int s_length;
vector<rd> data;
};
struct poly {
int locus;
int first;
char nuc;
char cons;
};
struct mpoly {
vector<char> vars;
int count;
};
struct str { //Single-locus trajectory through time
int locus;
int inc;
char cons;
char nuc;
vector<int> times;
vector<int> nA;
vector<int> nC;
vector<int> nG;
vector<int> nT;
vector<int> nN;
vector<double> qA;
vector<double> qC;
vector<double> qG;
vector<double> qT;
double mA; //Mean frequencies - sum of observations over number of observations
double mC;
double mG;
double mT;
vector<double> logL;
vector<double> BIC;
};
struct mtr { //Multi-locus trajectory through time
vector<char> seq;
vector<int> n; //Number of observations
double m; //Mean frequency
vector<double> logL;
};
struct joined {
int alpos;
string seq;
};
struct pr {
string s1;
string s2;
};
struct par {
int i1;
int i2;
};
struct ld_info {
int i;
int j;
vector<int> n_i1;
vector<int> n_i0;
vector<int> n_j1;
vector<int> n_j0;
vector<int> n_11;
vector<int> n_10;
vector<int> n_01;
vector<int> n_00;
};
struct det {
int start;
int length;
int used;
int sam_ref;
string seq;
};
struct nuc {
vector<int> nA;
vector<int> nC;
vector<int> nG;
vector<int> nT;
vector<int> nN;
};
double DirichletMultiCalc (int N, double c, vector<int> obs, vector<double> inf, vector<double>& fact_store);