Mercurial > repos > fz77 > pscan_chip_test
changeset 9:6a5cfb8a872f draft
Uploaded
author | fz77 |
---|---|
date | Mon, 17 Nov 2014 09:40:44 -0500 |
parents | 2aad8f91e6f2 |
children | 5e2af7c006b0 |
files | pscan_chip/pscan_chip_g.cpp |
diffstat | 1 files changed, 2908 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pscan_chip/pscan_chip_g.cpp Mon Nov 17 09:40:44 2014 -0500 @@ -0,0 +1,2908 @@ +// Pscan-ChIP 1.1 +// // Copyright (C) 2013 Federico Zambelli and Giulio Pavesi +// // +// // This program is free software: you can redistribute it and/or modify +// // it under the terms of the GNU General Public License as published by +// // the Free Software Foundation, either version 3 of the License, or +// // (at your option) any later version. +// // +// // This program is distributed in the hope that it will be useful, +// // but WITHOUT ANY WARRANTY; without even the implied warranty of +// // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// // GNU General Public License for more details. +// // +// // You should have received a copy of the GNU General Public License +// // along with this program. If not, see <http://www.gnu.org/licenses/>. +// // +// // If you find Pscan-ChIP useful in your research please cite our paper: +// // +// // Zambelli F, Pesole G, Pavesi G. PscanChIP: Finding over-represented +// // transcription factor-binding site motifs and their correlations in sequences from +// // ChIP-Seq experiments. +// // Nucleic Acids Res. 2013 Jul;41(Web Server issue):W535-43. + +#include <string> +#include <iostream> +#include <fstream> +#include <vector> +#include <sstream> +#include <math.h> +#include <map> +#include <cstdlib> +#include <gsl/gsl_randist.h> +#include <gsl/gsl_cdf.h> +#include <string.h> +#include <ctime> + +using namespace std; + +const double MIN_ADD_FREQ = 0.01, EPSILON = pow(10,-21), INF = pow(10,21), /*POS_VIS_THRESHOLD = pow(10,-5),*/ DIST_RATIO_THRESHOLD = 3, DIST_PV_THRESHOLD = pow(10,-5); +const unsigned int MAX_REG_SIZE = 3000, ALPHABET_SIZE = 4, TOP_P = 1, DIST_HIT_THRESHOLD = 10; +//const char ALPHABET[4] = {'A','C','G','T'}, RC_ALPHABET[4] = {'T','G','C','A'}; +char VI['T' + 1]; +string regfile, genome, matrixfile, additional_matrixfile, outfile, bgfile, selected, obstf, GALAXY_OUTPUT; +unsigned int RSIZE = 150, MAX_L = 0, W_SIZE = 10, W_STEP = 5; +bool SECOND_BEST_POS = false, SINGLE_STRAND = false, VERBOSE = false, GET_SEQUENCES = false, POSITIONAL = false, SINGLE_MATRIX = false, SPEARMAN = false, TWOBIT = false, QUIET = false; //NAIVE = false; +ofstream fasta_out; +double SINGLE_MATRIX_BG_AVG = 0; + +struct mpoint +{ + unsigned int x; + unsigned int y; +}; + +struct bg +{ + unsigned int Size; + double Avg, Var; +}; + +class sequence +{ + private: + string Chr, Name; + char *Seq; + unsigned int Start, End, E_Rank; + char Strand, AntiStrand; + bool good; + public: + sequence(string*, ifstream*, unsigned int); + sequence(string*, unsigned int, string, unsigned int); + char strand(); + char antistrand(); + string chr(); + unsigned int start(); + unsigned int end(); + unsigned int e_rank(); + string name(); + char* seq(); + bool is_good(); + double score(); + int mpos(); +}; + +class matrix +{ + friend void scanner(vector<matrix>*, vector<sequence>*); + private: + void normalizer(); + void to_log(); + void avg(); + void max_min(); + void rev_c(); + double get_M_value(int, char); + double get_rc_M_value(int, char); + void calc_var(double); + void calc_W_param(double); + void welch_test(double); + void welch_test_bg(double); +// void welch_test_bg_G(double); + void welch_test_pos(double); + void z_test_pos(double); + unsigned int L, bg_Size, BIN_NUM; + double **M; + double **rc_M; + double max_score, min_score, t, dof, t_bg, dof_bg; + double spearman; + string Name, Id; + bool O_U, O_U_bg; + vector<bool> W_O_U; + bool good, BG; + bool norm_sec_step; + vector<double> v_score, vF_score; + vector<unsigned int> v_pos; + vector<bool> v_strand; + vector<vector<double> > W_R_v_score; + vector<double> W_R_Avg, W_R_Var, W_R_pvalue, W_R_T_pvalue; + double R_Avg, F_Avg, R_Var, F_Var, /*G_Avg, G_Var,*/ pvalue, W_Avg, W_Var; + double bg_Avg, bg_Var, pvalue_bg; + public: + matrix(vector<string>*, map<string,bg>*); + void display(); + bool is_good(); + string name(); + string id(); + string o_or_u(); + string o_or_u_bg(); + int W_o_or_u(unsigned int); + void scan(vector<sequence>*); + double scan_seq_ds(char*, bool, unsigned int, vector<double>*); + double scan_seq_ss(char*, bool, unsigned int, char, vector<double>*); + double scan_seq_ds_second_best(char*, unsigned int, unsigned int *, bool *, int); + double scan_seq_ss_second_best(char*, unsigned int, unsigned int *, bool *, unsigned int, bool); + double get_score_at(unsigned int); + unsigned int get_pos_at(unsigned int); + unsigned int length(); + bool get_strand_at(unsigned int); + bool have_bg(); + unsigned int bin_num(); + double get_R_Avg(); + double get_F_Avg(); + double get_bg_Avg(); + double get_R_Var(); + double get_F_Var(); + double max(); + double min(); + double get_pvalue(); + double get_pvalue_bg(); + double get_W_pvalue(unsigned int); + double get_W_R_Avg_at_pos(unsigned int); + double get_t(); + double get_dof(); + double get_spearman(); + double get_W_Avg(); + double get_W_R_T_pvalue(unsigned int); +}; + +void command_line_parser(int, char**); +void acquire_regions(vector<sequence>*); +void acquire_matrices(vector<matrix>*, map<string,bg>*, string*); +void acquire_background(map<string,bg>*); +void scanner(vector<matrix>*, vector<sequence>*); +void main_output(vector<matrix>*, vector<sequence>*, unsigned int); +void main_output_single_matrix(vector<matrix>*, vector<sequence>*); +void distance_analysis(vector<matrix>*, vector<sequence>*, unsigned int, multimap <double, vector<matrix>::iterator>*); +void display_help(); + +int main(int argc, char **argv) +{ + srand(time(NULL)); + + vector<sequence> Seq; + vector<matrix> Mat; + map<string,bg> Bg; + VI['A'] = 0; + VI['C'] = 1; + VI['G'] = 2; + VI['T'] = 3; + VI['N'] = 4; + + obstf.clear(); + + command_line_parser(argc, argv); + + if(GET_SEQUENCES) + { + string outfile = regfile; + outfile += ".fasta"; + fasta_out.open(outfile.c_str(), ios::out); + } + + if(!regfile.empty() && !matrixfile.empty()) + { + if(!bgfile.empty()) + { + if(!QUIET) cerr << "\nAcquiring background file... "; + acquire_background(&Bg); + if(!QUIET) cerr << "done" << endl; + } + + if(!QUIET) cerr << "\nReading binding profiles..."; + acquire_matrices(&Mat, &Bg, &matrixfile); + + if(!additional_matrixfile.empty()) + acquire_matrices(&Mat, &Bg, &additional_matrixfile); + + if(Mat.empty()) + { + if(!QUIET) cerr << "\nZero valid matrices acquired. Quitting..." << endl; + exit(1); + } + + if(!QUIET) cerr << "done: " << Mat.size() << " binding profiles acquired. Max matrix length = " << MAX_L << endl; + + if(Mat.size() == 1) + { + if(!QUIET) cerr << "\nSwitching to single matrix mode..." << endl; + SINGLE_MATRIX = true; + } + + if(!QUIET) cerr << "\nAcquiring regions..."; + acquire_regions(&Seq); + if(!QUIET) cerr << "done: " << Seq.size() << " unique regions acquired." << endl; + + if(Seq.size() <= 2) + POSITIONAL = false; + + if(Seq.empty()) + { + if(!QUIET) cerr << "\nZero regions to work on! Exiting..." << endl; + exit(1); + } + + if(GET_SEQUENCES) + { + fasta_out.close(); + exit(0); + } + +// for(unsigned int i = 0; i < Mat.size(); i++) +// Mat[i].display(); + + if(!QUIET) cerr << "\nScanning..."; + scanner(&Mat, &Seq); + + if(!QUIET) cerr << "\nWriting output..." << endl; + + if(!SINGLE_MATRIX) + main_output(&Mat, &Seq, Seq.size()); + else + main_output_single_matrix(&Mat, &Seq); + + if(!QUIET) cerr << "\nOutput written in file: " << outfile << endl; + + } + else if(GET_SEQUENCES && !regfile.empty()) + { + if(!QUIET) cerr << "\nAcquiring regions..."; + acquire_regions(&Seq); + if(!QUIET) cerr << "done: " << Seq.size() << " unique regions acquired." << endl; + + fasta_out.close(); + exit(0); + } + else + display_help(); + + +/* for(unsigned int i = 0; i < Seq.size(); i++) + { + cout << ">" << i+1 << '\t' << Seq[i].name() << '\t' << Seq[i].start() << '\t' << Seq[i].end() << '\t' + << Mat[0].get_score_at(i) << '\t' << Mat[0].get_pos_at(i) << '\t' << Mat[0].get_strand_at(i) << '\t' + << "Max = " << Mat[0].max() << '\t' << "Min = " << Mat[0].min() + << endl << Seq[i].seq() << endl; + cout << ">" << i+1 << '\t' << Seq[i].name() << '\t' << "UPSTREAM" << endl << Seq[i].uf_seq() << endl; + cout << ">" << i+1 << '\t' << Seq[i].name() << '\t' << "DOWNSTREAM" << endl << Seq[i].df_seq() << endl; + } +*/ + + return 0; +} + +void display_help() +{ + if(!QUIET) cerr << endl << "Syntax: pscan_chip -r region_file.bed -M matrixfile.wil -g genome [-pos] [-wsize window_size] [-wstep window_step] [-bg background_file] [-s region_size]" << endl << endl; + exit(0); +} + +void main_output_single_matrix(vector<matrix> *Mat, vector<sequence> *Seq) +{ + outfile = regfile; + outfile += "."; + outfile += Mat->at(0).id(); + + string outfile2 = outfile; + + outfile += ".ris"; + outfile2 += ".all.ris"; + + if(!GALAXY_OUTPUT.empty()) + outfile = GALAXY_OUTPUT; + + multimap <double, string> outmap; + + ofstream out(outfile.c_str(), ios::out); + ofstream out2(outfile2.c_str(), ios::out); + + if(!out) + { + if(!QUIET) cerr << "\nError! Can't open output file: " << outfile << " for writing..." << endl; + exit(1); + } + + out << "CHR\tREG_START\tREG_END\tREG_STRAND\tABS_SITE_START\tABS_SITE_END\tREL_SITE_START\tREL_SITE_END\tSITE_STRAND\tSCORE\tSITE"; + out2 << "CHR\tREG_START\tREG_END\tREG_STRAND\tABS_SITE_START\tABS_SITE_END\tREL_SITE_START\tREL_SITE_END\tSITE_STRAND\tSCORE\tSITE"; + +// if(SPEARMAN) +// out << "\tE_RANK"; + + if(SECOND_BEST_POS) + { + out << "\tSB_POS\tSB_SCORE\tSB_OLIGO\tSB_STRAND" << endl; + out2 << "\tSB_POS\tSB_SCORE\tSB_OLIGO\tSB_STRAND" << endl; + } + else + { + out << endl; + out2 << endl; + } + + matrix *M = &Mat->at(0); + +/* if(SINGLE_MATRIX_BG_AVG == 0) //QUESTO SERVE PER QUANDO NON C'E' IL VALORE DI BACKGROUND! + { + double loc_avg = 0, loc_stdev = 0; + + for(unsigned int i = 0; i < Seq->size(); i++) + loc_avg += M->get_score_at(i); + + loc_avg /= Seq->size(); + + for(unsigned int i = 0; i < Seq->size(); i++) + loc_stdev += pow(M->get_score_at(i) - loc_avg, 2); + + loc_stdev /= (Seq->size() - 1); + + loc_stdev = sqrt(loc_stdev); + + SINGLE_MATRIX_BG_AVG = loc_avg - (2*loc_stdev); + + if(!QUIET) cerr << "\nSMBG = " << SINGLE_MATRIX_BG_AVG; + } +*/ + char Oligo[M->length() + 1]; + + Oligo[M->length()] = '\0'; + + for(unsigned int i = 0; i < Seq->size(); i++) + { +// if(!QUIET) cerr << endl << M->get_score_at(i) << '\t' << M->get_bg_Avg() << '\t'; + +// if(M->get_score_at(i) < SINGLE_MATRIX_BG_AVG) //ONLY REPORT OCCURRENCES WITH SCORE >= BG_AVG +// continue; + +// if(!QUIET) cerr << "passed"; + + ostringstream outbuf; + + sequence *S = &Seq->at(i); + + strncpy(Oligo, S->seq() + RSIZE/2 + (M->get_pos_at(i) - M->length()/2), M->length()); + + outbuf << S->chr() << '\t' << S->start() << '\t' << S->end() << '\t' << S->strand() << '\t' + << S->start() + M->get_pos_at(i) - M->length()/2 << '\t' << S->start() + M->get_pos_at(i) + M->length()/2 << '\t' + << (int)((M->get_pos_at(i) - M->length()/2) - RSIZE/2) << '\t' << (int)((M->get_pos_at(i) + M->length()/2 - 1) - RSIZE/2)<< '\t'; + + if(S->strand() == '+') + { + if(!M->get_strand_at(i)) + outbuf << S->strand(); + else + outbuf << S->antistrand(); + } + else + { + if(!M->get_strand_at(i)) + outbuf << S->antistrand(); + else + outbuf << S->strand(); + } + + outbuf << '\t' << M->get_score_at(i) << '\t' << Oligo; + +// if(SPEARMAN) +// outbuf << '\t' << S->e_rank(); + + if(SECOND_BEST_POS) + { + if(SINGLE_STRAND) + { + bool s; + unsigned int P; + double score; + + score = M->scan_seq_ss_second_best(S->seq() + (RSIZE/2), RSIZE, &P, &s,M->get_pos_at(i), S->strand()); + + strncpy(Oligo, S->seq() + RSIZE/2 + P, M->length()); + + outbuf << '\t' << P << '\t' << score << '\t' << Oligo; + } + else + { + bool s; + unsigned int P; + double score; + + score = M->scan_seq_ds_second_best(S->seq() + (RSIZE/2), RSIZE, &P, &s, (int)M->get_pos_at(i)); + + strncpy(Oligo, S->seq() + RSIZE/2 + P, M->length()); + + outbuf << '\t' << (int)P - (int)RSIZE/2 << '\t' << score << '\t' << Oligo << '\t'; + + if(s) + outbuf << '-'; + else + outbuf << '+'; + } + + outbuf << endl; + } + else + outbuf << endl; + + outmap.insert(make_pair(M->get_score_at(i),outbuf.str())); + } + + multimap<double,string>::reverse_iterator mi = outmap.rbegin(); + +// unsigned int count = 0; + + while(mi != outmap.rend()) + { +// if(count >= outmap.size()/2 && outmap.size() > 1) //SOLO LA MIGLIORE META' DELLE OCCORRENZE!!! +// break; + if(mi->first >= SINGLE_MATRIX_BG_AVG || mi == outmap.rbegin()) + out << mi->second; + + out2 << mi->second; + + mi++; +// count++; + } + + out.close(); + out2.close(); + return; +} + +void main_output(vector<matrix> *Mat, vector<sequence> *Seq, unsigned int SIZE) +{ + multimap <double, vector<matrix>::iterator> S_map; + outfile = regfile; + + outfile += ".res"; + + if(!GALAXY_OUTPUT.empty()) + outfile = GALAXY_OUTPUT; + + ofstream out(outfile.c_str(),ios::out); + + if(!out) + { + if(!QUIET) cerr << "\nError! Can't open output file: " << outfile << " for writing..." << endl; + exit(1); + } + + out << "NAME\tID\tREG_N\tL_PVALUE O/U\tG_PVALUE O/U\tR_AVG\tR_STDEV\tF_AVG\tBG_AVG"; + + if(SPEARMAN) + out << "\tSP_COR"; + + if(POSITIONAL) + out << "\tPREF_POS\tPREF_POS_PV\n"; + else + out << endl; + + vector<matrix>::iterator vi = Mat->begin(); + + while(vi != Mat->end()) + { + if(S_map.find(vi->get_pvalue()) != S_map.end()) //GFI WORKAROUND + { + if(S_map.find(vi->get_pvalue())->second->get_pvalue_bg() < vi->get_pvalue_bg()) + { + S_map.insert(make_pair(vi->get_pvalue() + pow(10,-240), vi)); + vi++; + continue; + } + else if((S_map.find(vi->get_pvalue())->second->get_pvalue_bg() > vi->get_pvalue_bg())) + { + S_map.insert(make_pair(vi->get_pvalue() - pow(10,-240), vi)); + vi++; + continue; + } + else + { + if(S_map.find(vi->get_pvalue())->second->get_spearman() > vi->get_spearman()) + { + S_map.insert(make_pair(vi->get_pvalue() + pow(10,-240), vi)); + vi++; + continue; + } + else if(S_map.find(vi->get_pvalue())->second->get_spearman() <= vi->get_spearman()) + { + S_map.insert(make_pair(vi->get_pvalue() - pow(10,-240), vi)); + vi++; + continue; + } + } + + } + else + S_map.insert(make_pair(vi->get_pvalue(), vi)); + + vi++; + } + + multimap <double, vector<matrix>::iterator>::iterator mi = S_map.begin(); + + while(mi != S_map.end()) + { + double pv1, pv2; + + pv1 = mi->second->get_pvalue() * Mat->size(); + + if(pv1 > 1) + pv1 = 1; + + pv2 = mi->second->get_pvalue_bg() * Mat->size(); + + if(pv2 > 1) + pv2 = 1; + + out << mi->second->name() << '\t' << mi->second->id() << '\t' << SIZE << '\t' << pv1 << ' ' << mi->second->o_or_u() << '\t'; + + if(mi->second->have_bg()) + out << pv2 << ' ' << mi->second->o_or_u_bg() << '\t'; + else + out << "N/A N/A\t"; + + out << mi->second->get_R_Avg() << '\t' << pow(mi->second->get_R_Var(), 0.5) << '\t' << mi->second->get_F_Avg() << '\t'; + + if(mi->second->have_bg()) + out << mi->second->get_bg_Avg() << '\t'; + else + out << "N/A" << '\t'; + + if(SPEARMAN) + out << mi->second->get_spearman() << '\t'; + + if(POSITIONAL) + { + double min_pv = 1, min_pv_c; + unsigned int min_pos; + + for(unsigned int i = 0; i < mi->second->bin_num() - 1; i++) + { + if(mi->second->get_W_pvalue(i) <= min_pv) //&& mi->second->W_o_or_u(i) == 1) + { + min_pv = mi->second->get_W_pvalue(i); + min_pos = i; + } + } + + min_pv_c = min_pv * (mi->second->bin_num() * Seq->size()); + + if(min_pv_c > 1) + min_pv_c = 1; + + out << '[' << (int)(((W_STEP * min_pos) + mi->second->length()/2) - RSIZE/2) << ',' << (int)(((W_STEP * min_pos) + W_SIZE) + mi->second->length()/2 - RSIZE/2) << ']' << '\t' << min_pv_c;// << '\t' << mi->second->get_W_R_T_pvalue(min_pos) << '\t' << min_pv_c; + } + + out << endl; + + mi++; + } + + if(POSITIONAL) + { + out << endl << endl << "POSITIONAL ANALYSIS: W_SIZE = " << W_SIZE << " W_STEP = " << W_STEP << endl << endl; + + out << "NAME\tID\t"; + + mi = S_map.begin(); + + for(int i = -(int)(RSIZE/2); i <= (int)((RSIZE/2) - W_SIZE); i += W_STEP) + out << i /*<< '|' << i + (int)(W_SIZE - 1)*/ << '\t'; + + out << endl; + + while(mi != S_map.end()) + { +/* bool gflag = false; + + for(unsigned int i = 0; i < mi->second->bin_num() - 1; i++) + if(mi->second->get_W_pvalue(i) <= POS_VIS_THRESHOLD) + { + gflag = true; + break; + } + + if(!gflag) + { + mi++; + continue; + } +*/ + out << mi->second->name() << '\t' << mi->second->id() << '\t'; + + for(unsigned int i = 0; i < mi->second->bin_num() - 1; i++) + { +// out << (-log(mi->second->get_W_pvalue(i))/log(10)) * mi->second->W_o_or_u(i) << '\t';//<< " [" << mi->second->W_o_or_u(i) << "]\t"; +// + double bpv = mi->second->get_W_pvalue(i) * (mi->second->bin_num() * Seq->size()); + + if(bpv > 1) + bpv = 1; + + out << bpv << '\t'; + } + + out << endl; + + mi++; + } + + if(!obstf.empty()) + distance_analysis(Mat, Seq, SIZE, &S_map); + } + + out.close(); + + return; +} + +void distance_analysis(vector<matrix> *Mat, vector<sequence> *Seq, unsigned int SIZE, multimap <double, vector<matrix>::iterator> *S_map) +{ + string dist_outfile = regfile, dist2_outfile = regfile, dist3_outfile = regfile; + + dist_outfile += ".dist"; + dist2_outfile += ".exp_dist"; + dist3_outfile += ".pv_dist"; + + ofstream out(dist_outfile.c_str()), out2, out3; + + if(!obstf.empty()) + { + out2.open(dist2_outfile.c_str()); + out3.open(dist3_outfile.c_str()); + + out2 << "TF/EXP_HITS\t"; + out3 << "TF/PVALUE\t"; + } + + out << "TF1|TF2/HITS\t"; + + for(int p = -(int)(RSIZE - 1); p < (int)RSIZE; p++) + out << p << '\t'; + + out << endl; + + if(!obstf.empty()) + { + for(int p = -(int)(RSIZE - 1); p < (int)RSIZE; p++) + { + out2 << p << '\t'; + out3 << p << '\t'; + } + + out2 << endl; + out3 << endl; + } + + multimap <double, vector<matrix>::iterator>::iterator MRI = S_map->begin(); + unsigned int LCOUNT = 0; + + while(MRI != S_map->end()) + { + if((MRI->second->get_pvalue_bg() > DIST_PV_THRESHOLD || MRI->second->o_or_u_bg() == "UNDER") && + (MRI->second->get_pvalue() > DIST_PV_THRESHOLD || MRI->second->o_or_u() == "UNDER") ) + { + MRI++; + continue; + } + +// if(!QUIET) cerr << endl << MRI->second->name() << endl; + + LCOUNT++; + + map<int, double> R_dist_map; + + if(!obstf.empty() && obstf == MRI->second->id()) + { + map<int, unsigned int> M_hit_distr; + + for(unsigned int a = 0; a < SIZE; a++) + { + map<int, unsigned int>::iterator mi; + + if(!MRI->second->get_strand_at(a)) + mi = M_hit_distr.find(MRI->second->get_pos_at(a)); + else + mi = M_hit_distr.find(RSIZE - MRI->second->get_pos_at(a)); + + if(mi == M_hit_distr.end()) + { + if(!MRI->second->get_strand_at(a)) + M_hit_distr[MRI->second->get_pos_at(a)] = 1; + else + M_hit_distr[RSIZE - MRI->second->get_pos_at(a)] = 1; + } + else + mi->second++; + } + + map<int, unsigned int>::iterator mi = M_hit_distr.begin(); + + while(mi != M_hit_distr.end()) + { + for(int b = 0; b < RSIZE; b++) + { + int dpos = mi->first - b; + + map<int, double>::iterator ri = R_dist_map.find(dpos); + + if(ri == R_dist_map.end()) + R_dist_map[dpos] = mi->second; + else + ri->second += mi->second; + + } + + mi++; + } + + map<int, double>::iterator ri = R_dist_map.begin(); + double risum = SIZE * RSIZE; + + out2 << MRI->second->name() << '\t'; + + while(ri != R_dist_map.end()) + { + ri->second /= risum; + out2 << ri->second*SIZE << '\t'; + ri++; + } + + out2 << endl; + } + +// multimap <double, vector<matrix>::iterator>::iterator MRI2 = MRI; + multimap <double, vector<matrix>::iterator>::iterator MRI2 = S_map->begin(); + + while(MRI2 != S_map->end()) + { + vector<unsigned int> hit_v((RSIZE*2) - 1, 0); + + if(MRI2 == MRI) + { + vector<unsigned int> SBPOS; +// vector<bool> SBSTRAND; + + if(SINGLE_STRAND) + { + for(unsigned int i = 0; i < Seq->size(); i++) + { + bool s; + unsigned int P; + double score; + + score = MRI->second->scan_seq_ss_second_best(Seq->at(i).seq() + (RSIZE/2), RSIZE, &P, &s, MRI->second->get_pos_at(i), Seq->at(i).strand()); + SBPOS.push_back(P); + // SBSTRAND.push_back(s); + } + + } + else + { + for(unsigned int i = 0; i < Seq->size(); i++) + { + bool s; + unsigned int P; + double score; + + score = MRI->second->scan_seq_ds_second_best(Seq->at(i).seq() + (RSIZE/2), RSIZE, &P, &s, (int)MRI->second->get_pos_at(i)); + + SBPOS.push_back(P + MRI->second->length()/2); + // SBSTRAND.push_back(s); + + } + } + + for(unsigned int k = 0; k < SIZE; k++) + { + int dist; + + if(!MRI->second->get_strand_at(k)) + dist = (int)SBPOS.at(k) - (int)MRI->second->get_pos_at(k); + else + dist = ((int)(RSIZE-1) - (int)SBPOS.at(k)) - ((int)(RSIZE - 1) - (int)MRI->second->get_pos_at(k)); + + hit_v.at((RSIZE-1) + dist)++; + } + + } + else + { + for(unsigned int k = 0; k < SIZE; k++) + { + int dist; + + if(!MRI->second->get_strand_at(k)) + dist = (int)MRI2->second->get_pos_at(k) - (int)MRI->second->get_pos_at(k); + else + dist = ((int)(RSIZE - 1 ) - (int)MRI2->second->get_pos_at(k)) - ((int)(RSIZE - 1 ) - (int)MRI->second->get_pos_at(k)); + +// if(!QUIET) cerr << endl << (int)((RSIZE-1) + dist) << endl; + + hit_v.at((RSIZE-1) + dist)++; + } + } + + if((!obstf.empty() && obstf == MRI->second->id()) || obstf.empty()) + out << MRI->second->name() << "|" << MRI2->second->name() << '\t'; + + if(!obstf.empty() && obstf == MRI->second->id()) + out3 << MRI->second->name() << "|" << MRI2->second->name() << '\t'; + + for(unsigned int p = 0; p < (RSIZE*2) - 1; p++) + { + if((!obstf.empty() && obstf == MRI->second->id()) || obstf.empty()) + out << hit_v[p] << '\t'; + + if(!obstf.empty() && obstf == MRI->second->id()) + { + map<int, double>::iterator ri = R_dist_map.find(p - (RSIZE - 1)); + + if(hit_v[p] >= DIST_HIT_THRESHOLD && hit_v[p]/(ri->second * SIZE) >= DIST_RATIO_THRESHOLD) + { + double pv = gsl_cdf_binomial_Q(hit_v[p], ri->second, SIZE); + + if(pv <= DIST_PV_THRESHOLD) + out3 << pv << '\t'; + else + out3 << "NA" << '\t'; + } + else + out3 << "NA" << '\t'; + } + } + + if((!obstf.empty() && obstf == MRI->second->id()) || obstf.empty()) + out << endl; + + if(!obstf.empty() && obstf == MRI->second->id()) + out3 << endl; + + MRI2++; + } + + MRI++; + } + + out.close(); + + if(!obstf.empty()) + { + out2.close(); + out3.close(); + } + + if(!LCOUNT) + { + remove(dist_outfile.c_str()); + + if(!obstf.empty()) + { + remove(dist2_outfile.c_str()); + remove(dist3_outfile.c_str()); + } + } + + if(!QUIET) cerr << "\nDistance analysis written in file: " << dist_outfile << endl; + + return; +} + +void scanner(vector<matrix> *Mat, vector<sequence> *Seq) +{ + for(unsigned int i = 0; i < Mat->size(); i++) + { + if(VERBOSE) + { + if(!QUIET) cerr << "\nScanning sequences for " + << Mat->at(i).name() << " (" << Mat->at(i).id() << ")"<< endl; + } + + Mat->at(i).scan(Seq); + Mat->at(i).calc_var((double)Seq->size()); + + if(POSITIONAL) + Mat->at(i).calc_W_param((double)Seq->size()); + + if(VERBOSE) + { + if(!QUIET) cerr << "\nRegions avg = " << Mat->at(i).R_Avg << '\t' << "Flanks avg = " << Mat->at(i).F_Avg << '\t' + << "Regions devstd = " << pow(Mat->at(i).R_Var,0.5) << '\t' << "Flanks devstd = " << pow(Mat->at(i).F_Var,0.5); + if(!QUIET) cerr << "\nDoing Welch's t test... "; + } + + Mat->at(i).welch_test((double)Seq->size()); + + if(Mat->at(i).have_bg()) + Mat->at(i).welch_test_bg((double)Seq->size()); + + if(POSITIONAL) + { +// Mat->at(i).welch_test_pos((double)Seq->size()); + Mat->at(i).z_test_pos((double)Seq->size()); + } + + if(VERBOSE) + { + if(!QUIET) cerr << "done. Pvalue = " << Mat->at(i).get_pvalue() << ' ' << Mat->at(i).o_or_u() << endl; + if(!QUIET) cerr << endl; + } + } + + return; +} + +void acquire_matrices(vector<matrix> *Mat, map<string,bg> *Bg, string *matfile) +{ + ifstream in(matfile->c_str()); + string line; + + if(!in) + { + if(!QUIET) cerr << "Can't find " << *matfile << endl; + exit(1); + } + + while(getline(in,line)) + { + if(line.empty()) + continue; + + vector<string> mbuf; + mbuf.push_back(line); + + for(unsigned int i = 0; i < ALPHABET_SIZE; i++) + { + getline(in,line); + + if(!line.empty()) + mbuf.push_back(line); + } + + if(mbuf.size() == ALPHABET_SIZE + 1) + Mat->push_back(matrix(&mbuf,Bg)); + } + + in.close(); + + vector<matrix>::iterator vi = Mat->begin(); + + while(vi != Mat->end()) + { + if(!vi->is_good()) + { + vi = Mat->erase(vi); + continue; + } + + vi++; + } + + return; +} + +void acquire_background(map<string, bg> *Bg) +{ + ifstream in(bgfile.c_str()); + + if(!in) + { + if(!QUIET) cerr << "\nWarning: background file " << bgfile << " not found..." << endl; + return; + } + + string line; + + getline(in,line); + + while(getline(in, line)) + { + istringstream str(line); + string id, trash; + bg nbg; + + str >> trash >> id >> nbg.Size >> trash >> trash >> trash >> trash >> nbg.Avg >> nbg.Var; + + nbg.Var = pow(nbg.Var,2); + + Bg->insert(make_pair(id,nbg)); + } + + in.close(); + return; +} + +void acquire_regions(vector<sequence> *Seq) +{ + ifstream in(regfile.c_str()); +// map<string, vector<string> > Regs; + map<string, map<unsigned int, string> > Regs; //PREVENT REG DUPLICATES + map<string, map<unsigned int, unsigned int > > M_RANK; + + if(!in) + { + if(!QUIET) cerr << "Can't find " << regfile << endl; + exit(1); + } + + string line; + + unsigned int r_count = 0; + + while(getline(in,line)) + { + if(line.empty()) + continue; + + if(line[0] == '#') + continue; + + for(unsigned int i = 0; i < 3; i++) + line[i] = tolower(line[i]); + + for(unsigned int i = 3; i < line.size(); i++) + line[i] = toupper(line[i]); + + istringstream str(line); + string chr; + unsigned int start, end, mp; + + str >> chr >> start >> end; + +// Regs[chr].push_back(line); + mp = start + ((end - start)/2); + + if(mp > (RSIZE + RSIZE/2) + 1) + { + Regs[chr][mp] = line; + +// if(!QUIET) cerr << endl << line; + + if(M_RANK.find(chr) != M_RANK.end()) //PREVENT REGIONS PRESENT MORE THAN ONCE TO DO TOO MUCH MESS WITH SPEARMAN CORRELATION + if(M_RANK[chr].find(mp) != M_RANK[chr].end()) + continue; + + M_RANK[chr][mp] = r_count; + r_count++; + } + else + if(!QUIET) cerr << "\nSkipping region: too close to chromosome start..." << endl << line << endl; + } + + in.close(); + + map<string, map<unsigned int, string> >::iterator mi = Regs.begin(); + + if(!TWOBIT) + { + while(mi != Regs.end()) + { + string file = genome; + file += "/"; + file += mi->first; + file += ".raw"; + + in.open(file.c_str()); + + if(!in) + { + if(!QUIET) cerr << "\nCan't find " << file << ". Skipping chromosome:\n" << mi->first << endl; + + if(SPEARMAN) + { + if(!QUIET) cerr << "\nSpearman correlation disabled..." << endl; + SPEARMAN = false; + } + + mi++; + in.close(); + continue; + } + + map<unsigned int, string>::iterator si = mi->second.begin(); + + while(si != mi->second.end()) + { + Seq->push_back(sequence(&si->second, &in, M_RANK[mi->first][si->first])); + si++; + } + + in.close(); + mi++; + } + } + else + { + ostringstream str; + ofstream outfile; + str << ".pchip." << rand() << ".tmp"; + string tmp1 = str.str(); + outfile.open(tmp1.c_str(), ios::out); + vector<string> k1; + vector<unsigned int> k2; + + while(mi != Regs.end()) + { + map<unsigned int, string>::iterator si = mi->second.begin(); + + while(si != mi->second.end()) + { + unsigned int Start, End; + + Start = si->first - RSIZE; + End = si->first + RSIZE + 1 + MAX_L; + + outfile << mi->first << '\t' << Start << '\t' << End << '\t' << "." << endl; + + k1.push_back(mi->first); + k2.push_back(si->first); + + si++; + } + + mi++; + } + + string cline = "twoBitToFa -bedPos -bed="; + cline += str.str(); + str << ".fa"; + cline += " "; + cline += genome; +/* cline += "/"; + cline += genome; + cline += ".2bit "; */ + cline += " "; + cline += str.str(); + +// if(!QUIET) cerr << endl << "TwoBit command line:\n" << cline << endl; + + if(system(cline.c_str()) != 0) + { + if(!QUIET) cerr << endl << "twoBitToFa returned non 0 error code... aborting pscan_chip run!\n" << endl; + exit(1); + } + else + { + ifstream in(str.str().c_str()); + string seq; + seq.clear(); + + if(!in) + { + if(!QUIET) cerr << endl << "Can't access twoBitToFa output file! Aborting pscan_chip run!\n" << endl; + exit(1); + } + + unsigned int scount = 0; + + while(getline(in,line)) + { + if(line.empty()) + continue; + + if(line[0] == '>') + { + if(seq.empty()) + continue; + else + { + Seq->push_back(sequence(&seq, M_RANK[k1.at(scount)][k2.at(scount)], k1.at(scount), k2.at(scount))); + seq.clear(); + scount++; + continue; + } + } + else + seq += line; + } + + Seq->push_back(sequence(&seq, M_RANK[k1.at(scount)][k2.at(scount)], k1.at(scount), k2.at(scount))); //LAST SEQUENCE! + + in.close(); + remove(str.str().c_str()); + remove(tmp1.c_str()); + } + +// exit(1); + } + + vector<sequence>::iterator vi = Seq->begin(); + + while(vi != Seq->end()) + { + if(!vi->is_good()) + { + vi = Seq->erase(vi); + continue; + } + + vi++; + } + + return; +} + +void command_line_parser(int argc, char **argv) +{ + if(argc == 1) + return; + + for(int i = 1; i < argc; i++) + { + string buf = argv[i]; + + if(buf == "-r") + { + if(i < argc - 1) + regfile = argv[++i]; + continue; + } + + if(buf == "-g") + { + if(i < argc - 1) + genome = argv[++i]; + continue; + } + + if(buf == "-s") + { + if(i < argc - 1) + RSIZE = atoi(argv[++i]); + continue; + } + if(buf == "-bgavg") + { + if(i < argc - 1) + SINGLE_MATRIX_BG_AVG = atof(argv[++i]); + continue; + } + if(buf == "-M") + { + if(i < argc - 1) + matrixfile = argv[++i]; + continue; + } + if(buf == "-MADD") + { + if(i < argc - 1) + additional_matrixfile = argv[++i]; + continue; + } + if(buf == "-m") + { + if(i < argc - 1) + selected = argv[++i]; + continue; + } + + if(buf == "-bg") + { + if(i < argc - 1) + bgfile = argv[++i]; + continue; + } + if(buf == "-go") + { + if(i < argc - 1) + GALAXY_OUTPUT = argv[++i]; + continue; + } + if(buf == "-obs") + { + if(i < argc - 1) + obstf = argv[++i]; + + continue; + } + if(buf == "-ss") + { + SINGLE_STRAND = true; + continue; + } + if(buf == "-2bit") + { + TWOBIT = true; + continue; + } + if(buf == "-quiet") + { + QUIET = true; + continue; + } + if(buf == "-sbp") + { + SECOND_BEST_POS = true; + continue; + } + if(buf == "-fasta") + { + GET_SEQUENCES = true; + continue; + } + if(buf == "-pos") + { + POSITIONAL = true; + continue; + } + if(buf == "-spear") + { + SPEARMAN = true; + continue; + } + if(buf == "-wsize") + { + if(i < argc - 1) + W_SIZE = atoi(argv[++i]); + + W_STEP = W_SIZE / 2; + continue; + } +/* if(buf == "-wstep") + { + if(i < argc - 1) + W_STEP = atoi(argv[++i]); + continue; + } + + if(buf == "-fast") + { + FAST_SCAN = true; + continue; + }*/ +/* if(buf == "-naive") + { + NAIVE = true; + continue; + } +*/ + if(buf == "-v") + { + VERBOSE = true; + continue; + } + else + { + if(!QUIET) cerr << "\nUnknown option: " << buf << endl; + exit(1); + } + + /* if(buf == "-Q") + { + if(i < argc - 1) + idfile = argv[++i]; + continue; + } + if(buf == "-ql") + { + if(i < argc - 1) + q_idlist = argv[++i]; + continue; + } + if(buf == "-imgm") + { + if(i < argc - 1) + bg_for_img = argv[++i]; + continue; + } + if(buf == "-magic") + { + if(i < argc - 1) + magic = argv[++i]; + continue; + }*/ + } + + if(RSIZE > MAX_REG_SIZE) + { + if(!QUIET) cerr << "\nRegion size exceeds " << MAX_REG_SIZE << "..." << endl; + exit(1); + } + + if(RSIZE % W_SIZE != 0 || W_SIZE % 2 != 0 || RSIZE / W_SIZE < 5) + { + if(!QUIET) cerr << "\nWindow size must be even and both an exact divisor of region size and at least five times smaller of it: positional analysis disabled." << endl; + POSITIONAL = false; + } + + return; +} + +//SEQUENCE CLASS BEGIN + +sequence::sequence(string *seq, unsigned int R, string chr, unsigned int mp) //CONSTRUCTOR FOR 2BIT INPUT +{ + Chr = chr; + Strand = '+'; + AntiStrand = '-'; + E_Rank = R; + good = true; + + Start = mp - RSIZE/2; + End = (mp + RSIZE/2) - 1; + + for(unsigned int i = 0; i < seq->size(); i++) + seq->at(i) = toupper(seq->at(i)); + + Seq = new char[(RSIZE*2) + 1 + MAX_L]; + +// if(!QUIET) cerr << "\nseq.size = " << seq->size() << endl; +// if(!QUIET) cerr << "\narray.size = " << (RSIZE*2) + 1 + MAX_L << endl; + + strcpy(Seq, seq->c_str()); + + if(seq->size() != RSIZE*2 + MAX_L + 1) + { + good = false; + return; + } + + if(GET_SEQUENCES) + fasta_out << ">" << Chr << '\t' << Start + 1 << '\t' << End + 1 << endl << seq->substr(RSIZE/2, RSIZE) << endl; + + return; +} + +sequence::sequence(string *line, ifstream *in, unsigned int R) +{ + istringstream str(*line); + string trash; + int buf_s, buf_e, mp; + good = true; + + E_Rank = R; + + str >> Chr >> buf_s >> buf_e >> Name >> trash >> Strand; + + if(Strand == 'R') + Strand = '-'; + + if(Strand != '+' && Strand != '-') + Strand = '+'; + + if(Strand == '+') + AntiStrand = '-'; + else + AntiStrand = '+'; + + mp = buf_s + ((buf_e - buf_s)/2); + + Start = mp - RSIZE/2; + End = (mp + RSIZE/2) - 1; + +// Seq = new char[RSIZE + 1]; +// UF_Seq = new char[(RSIZE/2) + 1 + MAX_L/2]; +// DF_Seq = new char[(RSIZE/2) + 1 + MAX_L/2]; + + Seq = new char[(RSIZE*2) + 1 + MAX_L]; + + in->seekg((Start) - RSIZE/2); + + in->get(Seq, (RSIZE*2 + 1) + MAX_L); + + if(in->gcount() != RSIZE*2 + MAX_L) + { + good = false; + return; + } + + if(GET_SEQUENCES) + { + char *out_seq; + out_seq = new char[RSIZE + 1]; + + in->seekg(Start); + in->get(out_seq, (RSIZE + 1)); + + fasta_out << ">" << Chr << '\t' << Start + 1 << '\t' << End + 1 << '\t' << "---" << '\t' << buf_s << '\t' << buf_e << '\t' << endl << out_seq << endl; + + delete[] out_seq; + } + + /*in->seekg(((Start - 1) - RSIZE/2) - (MAX_L/2)); + + in->get(UF_Seq, RSIZE/2 + 1 + MAX_L /2); + + if(in->gcount() != RSIZE/2 + (MAX_L/2)) + { + good = false; + return; + } + + in->get(Seq, RSIZE + 1); + + if(in->gcount() != RSIZE) + { + good = false; + return; + } + + in->get(DF_Seq, RSIZE/2 + 1 + (MAX_L/2)); + + if(in->gcount() != RSIZE/2 + (MAX_L/2)) + good = false; +*/ + + return; +} + +unsigned int sequence::e_rank() +{ + return E_Rank; +} + +string sequence::chr() +{ + return Chr; +} + +/*string* sequence::seq() +{ + return &Seq; +}*/ + +char* sequence::seq() +{ + return Seq; +} + +char sequence::strand() +{ + return Strand; +} + +char sequence::antistrand() +{ + return AntiStrand; +} + +/*char* sequence::uf_seq() +{ + return UF_Seq; +} + +char* sequence::df_seq() +{ + return DF_Seq; +} +*/ +string sequence::name() +{ + return Name; +} + +unsigned int sequence::start() +{ + return Start; +} + +unsigned int sequence::end() +{ + return End; +} + +bool sequence::is_good() +{ + return good; +} + +// SEQUENCE CLASS END + +// MATRIX CLASS BEGIN + +matrix::matrix(vector<string>* mbuf, map<string, bg> *Bg) +{ + string buf; + istringstream bstr(mbuf->at(1)), nstr(mbuf->at(0)); + L = 0; + R_Avg = 0; + F_Avg = 0; + R_Var = 0; + F_Var = 0; + bg_Avg = 0; + bg_Var = 0; + + BIN_NUM = RSIZE / W_STEP; + + W_R_Avg.resize(BIN_NUM, 0); + W_R_Var.resize(BIN_NUM, 0); + W_R_pvalue.resize(BIN_NUM, 0); + W_R_T_pvalue.resize(BIN_NUM,0); + spearman = 42; + + O_U = false; + O_U_bg = false; + W_O_U.resize(BIN_NUM, 0); + + BG = false; + good = true; + norm_sec_step = false; + + while(bstr >> buf) + L++; + + if(L > RSIZE/2) + { + good = false; + return; + } + + if(L > MAX_L) + MAX_L = L; + + if(!mbuf->at(0).empty()) + { + if(mbuf->at(0).at(0) != '>') + { + good = false; + return; + } + } + else + { + good = false; + return; + } + +// Name = mbuf->at(0).substr(1); + nstr >> Id >> Name; + + Id = Id.substr(1); + + if(!selected.empty()) + { +// cerr << endl << selected << '\t' << Id << '\t' << Name << endl; + + if(selected != Id && selected != Name) + { + good = false; + return; + } + } + + if(!Bg->empty()) + { + map<string,bg>::iterator mi = Bg->find(Id); + + if(mi == Bg->end()) + mi = Bg->find(Name); + + if(mi != Bg->end()) + { + bg_Avg = mi->second.Avg; + bg_Var = mi->second.Var; + bg_Size = mi->second.Size; + BG = true; + } + else + if(!QUIET) cerr << "\nWarning: no background for matrix: " << Id << endl; + } + + M = new double*[L]; + rc_M = new double*[L]; + + for(unsigned int i = 1; i < ALPHABET_SIZE + 1; i++) + { + istringstream str(mbuf->at(i)); + + unsigned int count = 0; + + while(str >> buf) + { + if(i == 1) + { + M[count] = new double[ALPHABET_SIZE + 1]; + rc_M[count] = new double[ALPHABET_SIZE + 1]; + } + + M[count][i-1] = atof(buf.c_str()); + count++; + + if(count > L) + { + good = false; + return; + } + } + + if(count != L) + { + good = false; + return; + } + } + + normalizer(); + to_log(); + rev_c(); + avg(); + max_min(); +// set_top_pos(); + + if(min_score == max_score) + good = false; + + return; +} + +void matrix::display() +{ + if(!QUIET) cerr << "\n>" << Name << '\t' << good << endl; + + for(unsigned int i = 0; i < ALPHABET_SIZE; i++) + { + for(unsigned int j = 0; j < L; j++) + if(!QUIET) cerr << M[j][i] << '\t'; + + if(!QUIET) cerr << endl; + } + + if(!QUIET) cerr << endl; + +// for(unsigned int i = 0; i < TOP_P; i++) +// if(!QUIET) cerr << "TOP[" << i+1 << "] = " << top_pos[i] + 1 << "-" << top_char[i] << '\t'; + + if(!QUIET) cerr << "\n\n>" << Name << '\t' << good << '\t' << "RC" << endl; + + for(unsigned int i = 0; i < ALPHABET_SIZE; i++) + { + for(unsigned int j = 0; j < L; j++) + if(!QUIET) cerr << rc_M[j][i] << '\t'; + + if(!QUIET) cerr << endl; + } + + if(!QUIET) cerr << endl; + +// for(unsigned int i = 0; i < TOP_P; i++) + // if(!QUIET) cerr << "TOP[" << i+1 << "] = " << rc_top_pos[i] + 1 << "-" << rc_top_char[i] << '\t'; + + if(!QUIET) cerr << endl; + + return; +} + +bool matrix::is_good() +{ + return good; +} + + +void matrix::normalizer() +{ + double *sums = new double[L]; + + for(int x = 0; x < L; x++) + { + sums[x] = 0; + + for(int y = 0; y < ALPHABET_SIZE; y++) + sums[x] += M[x][y]; + } + + for(int x = 0; x < L; x++) + for(int y = 0; y < ALPHABET_SIZE; y++) + M[x][y] /= sums[x]; + + delete[] sums; + + if(!norm_sec_step) + { + for(int x = 0; x < L; x++) + for(int y = 0; y < ALPHABET_SIZE; y++) + M[x][y] += MIN_ADD_FREQ; + + norm_sec_step = true; + + normalizer(); + } + + return; + +} + +void matrix::to_log() +{ + for(int x = 0; x < L; x++) + for(int y = 0; y < ALPHABET_SIZE; y++) + M[x][y] = log(M[x][y]); + + return; +} + +void matrix::avg() +{ +// mat_avg = new double[L]; + + for(int x = 0; x < L; x++) + { + double buf = 0; + + for(int y = 0; y < ALPHABET_SIZE; y++) + buf += M[x][y]; + + // mat_avg[x] = buf/ALPHABET_SIZE; + M[x][ALPHABET_SIZE] = buf/ALPHABET_SIZE; + rc_M[(L-1) -x][ALPHABET_SIZE] = buf/ALPHABET_SIZE; + } + + return; +} + +void matrix::max_min() +{ + double min, max; + + max_score = 0; + min_score = 0; + + for(int x = 0; x < L; x++) + { + min = M[x][0]; + max = M[x][0]; + + for(int y = 1; y < ALPHABET_SIZE; y++) + { + if(M[x][y] < min) + min = M[x][y]; + + if(M[x][y] > max) + max = M[x][y]; + } + + min_score += min; + max_score += max; + } + + return; +} + +void matrix::rev_c() +{ + for(unsigned int x = 0; x < L; x++) + for(unsigned int y = 0; y < ALPHABET_SIZE; y++) + rc_M[(L - 1) - x][(ALPHABET_SIZE - 1) - y] = M[x][y]; + + return; +} + +void matrix::scan(vector<sequence> *Seq) +{ + v_score.reserve(Seq->size()); + vF_score.reserve(Seq->size()); + v_pos.reserve(Seq->size()); + v_strand.reserve(Seq->size()); + + if(POSITIONAL) + W_R_v_score.reserve(Seq->size()); + + unsigned int progress; + + if(!SINGLE_STRAND) + { + for(unsigned int i = 0; i < Seq->size(); i++) + { + if(VERBOSE) + { + progress = (unsigned int)(((double)(i+1)/(double)Seq->size()) * 100); + + if(!(progress%5)) + if(!QUIET) cerr << progress << '%'; + } + + double max_flank_u, max_flank_d; + vector<double> W_buf(BIN_NUM, 0); + + { + double r_score; + + max_flank_u = scan_seq_ds(Seq->at(i).seq() , true, RSIZE/2, &W_buf); + r_score = scan_seq_ds(Seq->at(i).seq() + (RSIZE/2) , false, RSIZE, &W_buf); + max_flank_d = scan_seq_ds(Seq->at(i).seq() + ((RSIZE/2)*3) , true, RSIZE/2, &W_buf); + + R_Avg += r_score; + + /* if(r_score < max_flank_u || r_score < max_flank_d) + { + if(max_flank_u > max_flank_d) + G_Avg += max_flank_u; + else + G_Avg += max_flank_d; + } + else + G_Avg += r_score;*/ + + if(POSITIONAL) + { + // for(unsigned int i = 0; i < BIN_NUM; i++) + // W_R_Avg[i] += W_buf[i]; + + W_R_v_score.push_back(W_buf); + } + + } + + if(max_flank_u > max_flank_d) + { + F_Avg += max_flank_u; + vF_score.push_back(max_flank_u); + } + else + { + F_Avg += max_flank_d; + vF_score.push_back(max_flank_d); + } + + + if(VERBOSE) + if(!(progress%5)) + if(!QUIET) cerr << (char)13; + } + } + else + { + for(unsigned int i = 0; i < Seq->size(); i++) + { + + if(VERBOSE) + if(!QUIET) cerr << (unsigned int)(((double)(i+1)/(double)Seq->size()) * 100) << '%'; + + double max_flank_u, max_flank_d; + vector<double> W_buf(BIN_NUM, 0); + + { + + double r_score; + + max_flank_u = scan_seq_ss(Seq->at(i).seq() , true, RSIZE/2, Seq->at(i).strand(), &W_buf); + r_score += scan_seq_ss(Seq->at(i).seq() + (RSIZE/2) , false, RSIZE, Seq->at(i).strand(), &W_buf); + max_flank_d = scan_seq_ss(Seq->at(i).seq() + ((RSIZE/2)*3), true, RSIZE/2, Seq->at(i).strand(), &W_buf); + + R_Avg += r_score; + + /* if(r_score < max_flank_u || r_score < max_flank_d) + { + if(max_flank_u > max_flank_d) + G_Avg += max_flank_u; + else + G_Avg += max_flank_d; + } + else + G_Avg += r_score;*/ + + if(POSITIONAL) + { + // for(unsigned int i = 0; i < BIN_NUM; i++) + // W_R_Avg[i] += W_buf[i]; + + W_R_v_score.push_back(W_buf); + } + } + + if(max_flank_u > max_flank_d) + { + F_Avg += max_flank_u; + vF_score.push_back(max_flank_u); + } + else + { + F_Avg += max_flank_d; + vF_score.push_back(max_flank_d); + } + + if(VERBOSE) + if(!QUIET) cerr << (char)13; + } + } + + R_Avg /= Seq->size(); + F_Avg /= Seq->size(); +// G_Avg /= Seq->size(); + +// for(unsigned int i = 0; i < BIN_NUM; i++) +// W_R_Avg[i] /= Seq->size(); + + if(SPEARMAN) + { + multimap<double, unsigned int> SRM; + + for(unsigned int i = 0; i < v_score.size(); i++) + SRM.insert(make_pair(v_score[i], i)); + + multimap<double, unsigned int>::reverse_iterator mmi = SRM.rbegin(); + int sr = 0; + double d = 0; + + while(mmi != SRM.rend()) + { +// if(!QUIET) cerr << endl << mmi->first << '\t' << sr << '\t' << Seq->at(mmi->second).e_rank() << '\t' << this->name(); + + d += pow((sr - (int)Seq->at(mmi->second).e_rank()), 2); + sr++; + mmi++; + } + + d *= 6; + +// if(!QUIET) cerr << endl << "d = " << d << "\tden = " << (Seq->size() * (pow(Seq->size(), 2) - 1)) << '\t' << "ssize = " << Seq->size(); + + d /= (Seq->size() * (pow(Seq->size(), 2) - 1)); + + spearman = 1 - d; + } + + return; +} + +void matrix::calc_var(double S) +{ + for(unsigned int i = 0; i < S; i++) + { + R_Var += pow(v_score[i] - R_Avg,2); + F_Var += pow(vF_score[i] - F_Avg,2); + +/* if(v_score[i] >= vF_score[i]) + G_Var += pow(v_score[i] - G_Avg,2); + else + G_Var += pow(vF_score[i] - G_Avg,2);*/ + } + + if(S > 1) + { + R_Var /= (S - 1); + F_Var /= (S - 1); +// G_Var /= (S - 1); + } + + if(R_Var == 0) + R_Var += EPSILON; + + if(F_Var == 0) + F_Var += EPSILON; + +// if(G_Var == 0) +// G_Var += EPSILON; + + return; +} + +void matrix::calc_W_param(double S) +{ + if(S > 1) + S = S/2; //SOLO PER LA PRIMA META' DELLE SEQUENZE!!! + + W_Avg = 0; + W_Var = 0; + + multimap<double, unsigned int> score_seq_map; + + for(unsigned int i = 0; i < v_score.size(); i++) + score_seq_map.insert(make_pair(v_score.at(i), i)); + + multimap<double, unsigned int>::reverse_iterator mi = score_seq_map.rbegin(); + + for(unsigned int i = 0; i < S; i++) + { + for(unsigned int j = 0; j < BIN_NUM; j++) + W_R_Avg[j] += W_R_v_score[mi->second][j]; + +// if(!QUIET) cerr << Name << '\t' << mi->first << endl; + + mi++; + } + + for(unsigned int j = 0; j < BIN_NUM; j++) + W_R_Avg[j] /= S; + + mi = score_seq_map.rbegin(); + + for(unsigned int i = 0; i < S; i++) + { + for(unsigned int j = 0; j < BIN_NUM; j++) + W_R_Var[j] += pow(W_R_v_score[mi->second][j] - W_R_Avg[j],2); + + mi++; + } + +/* for(unsigned int i = 0; i < S; i++) + for(unsigned int j = 0; j < BIN_NUM; j++) + W_R_Var[j] += pow(W_R_v_score[i][j] - W_R_Avg[j],2);*/ + + if(S > 1) + { + for(unsigned int j = 0; j < BIN_NUM; j++) + W_R_Var[j] /= (S - 1); + } + + for(unsigned int j = 0; j < BIN_NUM; j++) + if(W_R_Var[j] == 0) + W_R_Var[j] += EPSILON; + + for(unsigned int j = 0; j < BIN_NUM - 1; j++) //THE LAST BIN IS WASTED SINCE IT IS AN HALF BIN + W_Avg += W_R_Avg[j]; + + W_Avg /= (BIN_NUM - 1); + + for(unsigned int j = 0; j < BIN_NUM - 1; j++) + W_Var += pow(W_R_Avg[j] - W_Avg, 2); + + W_Var /= (BIN_NUM - 2); + + return; +} + +void matrix::welch_test(double S) +{ + t = R_Avg - F_Avg; + + if(t == 0) + t = EPSILON; + + t /= pow((R_Var/S) + (F_Var/S),0.5); + + dof = pow((R_Var/S) + (F_Var/S),2); + + dof /= (pow(R_Var,2)/((pow(S,2)) * (S-1))) + (pow(F_Var,2)/((pow(S,2)) * (S-1))); + + if(t > 0) + pvalue = gsl_cdf_tdist_Q(t,dof); + else + { + pvalue = gsl_cdf_tdist_P(t,dof); + O_U = true; + } + + return; +} + +void matrix::z_test_pos(double S) +{ +// if(!QUIET) cerr << endl << Name; + + for(unsigned int i = 0; i < BIN_NUM; i++) + { + double se = sqrt(W_R_Var.at(i)), z; + se /= sqrt(S/2); + + z = W_R_Avg.at(i) - W_Avg; + z /=se; + + if(z >= 0) + W_O_U.at(i) = true; + + W_R_pvalue.at(i) = gsl_cdf_ugaussian_Q(z); + +// if(!QUIET) cerr << endl << "W_R_Avg.at(i) " << W_R_Avg.at(i) << '\t' << W_Avg << '\t' << W_R_pvalue.at(i) << endl; + + } + + return; +} + +void matrix::welch_test_pos(double S) +{ + if(S > 1) + S = S/2; //SOLO LA PRIMA META' DELLE SEQUENZE!!! + +// if(!QUIET) cerr << "\nRAVG = " << W_Avg << endl; + for(unsigned int i = 0; i < BIN_NUM; i++) + { + double T, DOF; + + T = W_R_Avg.at(i) - W_Avg; + +// if(!QUIET) cerr << W_R_Avg.at(i) << '\t'; + + // if(!QUIET) cerr << endl << Name << endl << "T = " << T << '\t' << W_R_Avg.at(i) << '\t' << W_Avg; + + if(T == 0) + T = EPSILON; + + T /= pow((W_R_Var.at(i)/S) + (W_Var/S),0.5); + + DOF = pow((W_R_Var.at(i)/S) + (W_Var/S),2); + + DOF /= (pow(W_R_Var.at(i),2)/((pow(S,2)) * (S-1))) + (pow(W_Var,2)/((pow(S,2)) * (S-1))); + + if(T > 0) + W_R_T_pvalue.at(i) = gsl_cdf_tdist_Q(T,DOF); + else + { + W_R_T_pvalue.at(i) = gsl_cdf_tdist_P(T,DOF); + W_O_U.at(i) = true; + } + + // if(!QUIET) cerr << "\tPV= " << W_R_pvalue.at(i); + + } + +// if(!QUIET) cerr << endl; + + return; +} + +void matrix::welch_test_bg(double S1) +{ + double S2 = bg_Size; + +// if(!QUIET) cerr << Name << '\t' << R_Avg << '\t' << bg_Avg << '\t' << R_Var << '\t' << bg_Var << '\t' << S1 << '\t' << S2 << endl; + + t_bg = R_Avg - bg_Avg; + + if(t_bg == 0) + t_bg = EPSILON; + + t_bg /= pow((R_Var/S1) + (bg_Var/S2),0.5); + + dof_bg = pow((R_Var/S1) + (bg_Var/S2),2); + + dof_bg /= (pow(R_Var,2)/((pow(S1,2)) * (S1-1))) + (pow(bg_Var,2)/((pow(S2,2)) * (S2-1))); + + if(t_bg > 0) + pvalue_bg = gsl_cdf_tdist_Q(t_bg,dof_bg); + else + { + pvalue_bg = gsl_cdf_tdist_P(t_bg,dof_bg); + O_U_bg = true; + } + + return; +} + +/*void matrix::welch_test_bg_G(double S1) +{ + double S2 = bg_Size; + +// if(!QUIET) cerr << Name << '\t' << R_Avg << '\t' << bg_Avg << '\t' << R_Var << '\t' << bg_Var << '\t' << S1 << '\t' << S2 << endl; + + t_bg = G_Avg - bg_Avg; + + if(t_bg == 0) + t_bg = EPSILON; + + t_bg /= pow((G_Var/S1) + (bg_Var/S2),0.5); + + dof_bg = pow((G_Var/S1) + (bg_Var/S2),2); + + dof_bg /= (pow(G_Var,2)/((pow(S1,2)) * (S1-1))) + (pow(bg_Var,2)/((pow(S2,2)) * (S2-1))); + + if(t_bg > 0) + pvalue_bg = gsl_cdf_tdist_Q(t_bg,dof_bg); + else + { + pvalue_bg = gsl_cdf_tdist_P(t_bg,dof_bg); + O_U_bg = true; + } + + return; +}*/ + +double matrix::scan_seq_ds(char *S, bool flank, unsigned int l, vector<double> *W_buf) +{ + unsigned int pos = 0, max_pos = 0; + double seq_score; + bool strand = false; + + l -= L/2; + + while(pos < l) + { + + double pos_score[2] = {0,0}; + + for(unsigned int j = 0; j < L; j++) + { + pos_score[0] += get_M_value(j, S[pos+j]); + pos_score[1] += get_rc_M_value(j, S[pos+j]); + } + + if(seq_score < pos_score[0] || !pos) + { + seq_score = pos_score[0]; + max_pos = pos; + strand = false; + } + if(seq_score < pos_score[1]) + { + seq_score = pos_score[1]; + max_pos = pos; + strand = true; + } + + if(!flank && POSITIONAL) + { + unsigned int vpos = pos / W_STEP; + + if(W_buf->at(vpos) < pos_score[0] || !(pos % W_STEP)) + W_buf->at(vpos) = pos_score[0]; + + if(W_buf->at(vpos) < pos_score[1]) + W_buf->at(vpos) = pos_score[1]; + + if(vpos) + { + if(W_buf->at(vpos - 1) < pos_score[0]) + W_buf->at(vpos - 1) = pos_score[0]; + + if(W_buf->at(vpos - 1) < pos_score[1]) + W_buf->at(vpos - 1) = pos_score[1]; + } + } + + pos++; + } + + seq_score = 1 + (seq_score - max_score)/(max_score - min_score); + + if(!flank && POSITIONAL) + { +// if(!QUIET) cerr << endl << Name << endl; + + unsigned int binm = 0; + + for(unsigned int i = 0; i < BIN_NUM; i++) + { + if(W_buf->at(i) != 0) + W_buf->at(i) = 1 + (W_buf->at(i) - max_score)/(max_score - min_score); + else + binm++; + } + + BIN_NUM -= binm; + } + + if(!flank) + { + v_score.push_back(seq_score); + v_pos.push_back(max_pos + L/2); + v_strand.push_back(strand); + } + + return seq_score; +} + +double matrix::scan_seq_ss_second_best(char *S, unsigned int l, unsigned int *P, bool *s, unsigned int POS, bool seq_strand) +{ + unsigned int pos = 0, max_pos = 0; + double seq_score = min_score; + bool strand = false; + + while(pos < l) + { + if(pos == POS) + { + pos++; + continue; + } + + double pos_score[2] = {0,0}; + + if(seq_strand == '+') + { + for(unsigned int j = 0; j < L; j++) + pos_score[0] += get_M_value(j, S[pos+j]); + } + else + { + for(unsigned int j = 0; j < L; j++) + pos_score[0] += get_rc_M_value(j, S[pos+j]); + } + + if(seq_score < pos_score[0]) + { + seq_score = pos_score[0]; + max_pos = pos; + strand = false; + } + + + pos++; + } + + seq_score = 1 + (seq_score - max_score)/(max_score - min_score); + + *P = max_pos; + *s = strand; + + return seq_score; +} + +double matrix::scan_seq_ds_second_best(char *S, unsigned int l, unsigned int *P, bool *s, int POS) +{ + int pos = 0, max_pos = 0, count = 0; + double seq_score; + bool strand = false; + + POS -= L/2; + l -= L/2; + + while(pos < l) + { +// if(pos == POS) + if(pos >= (POS - L) && pos <= (POS + L)) + { + pos++; + continue; + } + + double pos_score[2] = {0,0}; + + for(unsigned int j = 0; j < L; j++) + { + pos_score[0] += get_M_value(j, S[pos+j]); + pos_score[1] += get_rc_M_value(j, S[pos+j]); + } + + if(seq_score < pos_score[0] || !count) + { + seq_score = pos_score[0]; + max_pos = pos; + strand = false; + } + if(seq_score < pos_score[1]) + { + seq_score = pos_score[1]; + max_pos = pos; + strand = true; + } + + pos++; + count++; + } + + seq_score = 1 + (seq_score - max_score)/(max_score - min_score); + + *P = max_pos ;//+ L/2; + *s = strand; + + return seq_score; +} + + + +/*double matrix::scan_seq_ds_quick(char *S, bool flank, unsigned int l) +{ + unsigned int pos = 0, max_pos = 0; + double seq_score = min_score; + bool strand = false; + + while(pos < l) + { + double pos_score[2] = {0,0}; + + for(unsigned int j = 0; j < L; j++) + { + pos_score[0] += get_M_value(p_pos[j], S[pos+p_pos[j]]); + + if((pos_score[0] + p_max[j+1]) <= seq_score) + { + pos_score[0] = min_score; + break; + } + + } + + for(unsigned int j = 0; j < L; j++) + { + pos_score[1] += get_rc_M_value(rc_p_pos[j], S[pos+rc_p_pos[j]]); + + if((pos_score[1] + p_max[j+1]) <= seq_score) + { + pos_score[1] = min_score; + break; + } + } + + if(seq_score < pos_score[0]) + { + seq_score = pos_score[0]; + max_pos = pos; + strand = false; + } + if(seq_score < pos_score[1]) + { + seq_score = pos_score[1]; + max_pos = pos; + strand = true; + } + + pos++; + } + + seq_score = 1 + (seq_score - max_score)/(max_score - min_score); + + if(!flank) + { + v_score.push_back(seq_score); + v_pos.push_back(max_pos); + v_strand.push_back(strand); + } + + + return seq_score; +} +*/ +double matrix::scan_seq_ss(char *S, bool flank, unsigned int l, char seq_strand, vector<double> *W_buf) +{ + unsigned int pos = 0, max_pos = 0; + double seq_score = min_score; + bool strand = false; + + while(pos < l) + { + double pos_score[2] = {0,0}; + + if(seq_strand == '+') + { + for(unsigned int j = 0; j < L; j++) + pos_score[0] += get_M_value(j, S[pos+j]); + } + else + { + for(unsigned int j = 0; j < L; j++) + pos_score[0] += get_rc_M_value(j, S[pos+j]); + } + + if(seq_score < pos_score[0]) + { + seq_score = pos_score[0]; + max_pos = pos; + strand = false; + } + + if(!flank && POSITIONAL) + { + unsigned int vpos = pos / W_STEP; + + if(W_buf->at(vpos) < pos_score[0]) + W_buf->at(vpos) = pos_score[0]; + + if(vpos) + { + if(W_buf->at(vpos - 1) < pos_score[0]) + W_buf->at(vpos - 1) = pos_score[0]; + } + } + + pos++; + } + + seq_score = 1 + (seq_score - max_score)/(max_score - min_score); + + if(POSITIONAL && !flank) + for(unsigned int i = 0; i < W_buf->size(); i++) + W_buf->at(i) = 1 + (W_buf->at(i) - max_score)/(max_score - min_score); + + if(!flank) + { + v_score.push_back(seq_score); + v_pos.push_back(max_pos); + v_strand.push_back(strand); + } + + return seq_score; +} + +/* double matrix::scan_seq_ss_quick(char *S, bool flank, unsigned int l, char seq_strand) +{ + unsigned int pos = 0, max_pos = 0; + double seq_score = min_score; + bool strand = false; + + while(pos < l) + { + double pos_score[2] = {0,0}; + + if(seq_strand == '+') + { + for(unsigned int j = 0; j < L; j++) + { + pos_score[0] += get_M_value(p_pos[j], S[pos+p_pos[j]]); + + if((pos_score[0] + p_max[j+1]) <= seq_score) + { + pos_score[0] = min_score; + break; + } + + + } + } + else + { + for(unsigned int j = 0; j < L; j++) + { + pos_score[0] += get_rc_M_value(rc_p_pos[j], S[pos+rc_p_pos[j]]); + + if((pos_score[0] + p_max[j+1]) <= seq_score) + { + pos_score[0] = min_score; + break; + } + + } + } + + if(seq_score < pos_score[0]) + { + seq_score = pos_score[0]; + max_pos = pos; + strand = false; + } + + pos++; + } + + seq_score = 1 + (seq_score - max_score)/(max_score - min_score); + + if(!flank) + { + v_score.push_back(seq_score); + v_pos.push_back(max_pos); + v_strand.push_back(strand); + } + + return seq_score; +}*/ + +double matrix::get_M_value(int pos, char C) +{ + return M[pos][VI[C]]; +} + +double matrix::get_rc_M_value(int pos, char C) +{ + return rc_M[pos][VI[C]]; +} + +/* +void matrix::set_top_pos() +{ + multimap<double, mpoint> Tmap; + vector<double> p_max_buf, rc_p_max_buf; + + for(unsigned int x = 0; x < L; x++) + { + for(unsigned int y = 0; y < ALPHABET_SIZE; y++) + { + mpoint p; + + p.x = x; + p.y = y; + + Tmap.insert(make_pair(M[x][y],p)); + + if(!y) + { + p_max_buf.push_back(M[x][y]); + rc_p_max_buf.push_back(rc_M[x][y]); + } + else + { + if(M[x][y] > p_max_buf.back()) + p_max_buf[p_max_buf.size() - 1] = M[x][y]; + + if(rc_M[x][y] > rc_p_max_buf.back()) + rc_p_max_buf[rc_p_max_buf.size() - 1] = rc_M[x][y]; + } + + } + + // if(!QUIET) cerr << endl << p_max_buf.back(); + } + + multimap<double, mpoint>::reverse_iterator ri = Tmap.rbegin(); + + for(unsigned int i = 0; i < TOP_P; i++) + { + top_pos[i] = ri->second.x; + top_char[i] = ALPHABET[ri->second.y]; + + rc_top_pos[i] = (L - 1) - ri->second.x; + rc_top_char[i] = RC_ALPHABET[ri->second.y]; + + ri++; + } + +// if(!QUIET) cerr << endl << name() << '\t' << max_score << '\t' << min_score << endl; + + while(p_max.size() != p_max_buf.size()) + { + double buf = -INF; + unsigned int pos; + + for(unsigned int i = 0; i < p_max_buf.size(); i++) + if(p_max_buf.at(i) > buf) + { + pos = i; + buf = p_max_buf.at(i); + } + + p_pos.push_back(pos); + rc_p_pos.push_back((L - 1) - pos); + + p_max.push_back(buf); + + p_max_buf[pos] = -INF; + } + + for(unsigned int i = 0; i < L; i++) + { + //if(!QUIET) cerr << p_max[i] << "//"<< '\t'; + + for(unsigned int j = i + 1; j < L; j++) + p_max[i] += p_max[j]; + + // for(unsigned int j = i + 1; j < L; j++) + // rc_p_max[i] += rc_p_max[j]; + + // if(!QUIET) cerr << p_max[i] << "(" << p_pos[i] << ")" << rc_p_max[i] << '\t'; + } + + p_max.push_back(0); //WE NEED A 0 AT THE END OF THE VECTOR! +// rc_p_max.push_back(0); + +// if(!QUIET) cerr << endl; + + return; +} +*/ +double matrix::get_score_at(unsigned int a) +{ + return v_score.at(a); +} +unsigned int matrix::get_pos_at(unsigned int a) +{ + return v_pos.at(a); +} +bool matrix::get_strand_at(unsigned int a) +{ + return v_strand.at(a); +} + +double matrix::max() +{ + return max_score; +} + +double matrix::min() +{ + return min_score; +} + +string matrix::name() +{ + if(!Name.empty()) + return Name; + else + return Id; +} + +string matrix::id() +{ + return Id; +} + +double matrix::get_pvalue() +{ + return pvalue; +} + +double matrix::get_pvalue_bg() +{ + return pvalue_bg; +} + +double matrix::get_W_pvalue(unsigned int pos) +{ + return W_R_pvalue.at(pos); +} + +double matrix::get_W_R_Avg_at_pos(unsigned int pos) +{ + return W_R_Avg.at(pos); +} + +string matrix::o_or_u() +{ + if(O_U) + return "UNDER"; + else + return "OVER"; +} + +string matrix::o_or_u_bg() +{ + if(O_U_bg) + return "UNDER"; + else + return "OVER"; +} + +int matrix::W_o_or_u(unsigned int pos) +{ + if(W_O_U[pos]) + return -1; + else + return 1; +} + +double matrix::get_R_Avg() +{ + return R_Avg; +} +double matrix::get_F_Avg() +{ + return F_Avg; +} +double matrix::get_R_Var() +{ + return R_Var; +} +double matrix::get_F_Var() +{ + return F_Var; +} +double matrix::get_bg_Avg() +{ + return bg_Avg; +} + +double matrix::get_t() +{ + return t; +} + +double matrix::get_dof() +{ + return dof; +} + +bool matrix::have_bg() +{ + return BG; +} + +unsigned int matrix::bin_num() +{ + return BIN_NUM; +} + +unsigned int matrix::length() +{ + return L; +} + +double matrix::get_spearman() +{ + double s = spearman; + + s *= 10000; + + if(s > 0) + s = floor(s); + else if(s < 0) + s = ceil(s); + + s /= 10000; + + return s; +} + +double matrix::get_W_Avg() +{ + return W_Avg; +} + +double matrix::get_W_R_T_pvalue(unsigned int i) +{ + return W_R_T_pvalue.at(i); +} +// MATRIX CLASS END