annotate gene_fraction/src/alignment_util.h @ 0:0fd352f62446 draft default tip

planemo upload for repository https://github.com/ChrisD11/Duplicon commit 3ee0594c692faac542ffa58f4339d79b9b8aefbd-dirty
author chrisd
date Sun, 21 Feb 2016 06:05:24 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
0fd352f62446 planemo upload for repository https://github.com/ChrisD11/Duplicon commit 3ee0594c692faac542ffa58f4339d79b9b8aefbd-dirty
chrisd
parents:
diff changeset
1 #ifndef ALIGNMENT_UTIL_H
0fd352f62446 planemo upload for repository https://github.com/ChrisD11/Duplicon commit 3ee0594c692faac542ffa58f4339d79b9b8aefbd-dirty
chrisd
parents:
diff changeset
2 #define ALIGNMENT_UTIL_H
0fd352f62446 planemo upload for repository https://github.com/ChrisD11/Duplicon commit 3ee0594c692faac542ffa58f4339d79b9b8aefbd-dirty
chrisd
parents:
diff changeset
3
0fd352f62446 planemo upload for repository https://github.com/ChrisD11/Duplicon commit 3ee0594c692faac542ffa58f4339d79b9b8aefbd-dirty
chrisd
parents:
diff changeset
4 #include <string>
0fd352f62446 planemo upload for repository https://github.com/ChrisD11/Duplicon commit 3ee0594c692faac542ffa58f4339d79b9b8aefbd-dirty
chrisd
parents:
diff changeset
5 #include <vector>
0fd352f62446 planemo upload for repository https://github.com/ChrisD11/Duplicon commit 3ee0594c692faac542ffa58f4339d79b9b8aefbd-dirty
chrisd
parents:
diff changeset
6
0fd352f62446 planemo upload for repository https://github.com/ChrisD11/Duplicon commit 3ee0594c692faac542ffa58f4339d79b9b8aefbd-dirty
chrisd
parents:
diff changeset
7 #include <boost/algorithm/string.hpp>
0fd352f62446 planemo upload for repository https://github.com/ChrisD11/Duplicon commit 3ee0594c692faac542ffa58f4339d79b9b8aefbd-dirty
chrisd
parents:
diff changeset
8
0fd352f62446 planemo upload for repository https://github.com/ChrisD11/Duplicon commit 3ee0594c692faac542ffa58f4339d79b9b8aefbd-dirty
chrisd
parents:
diff changeset
9 #include "int_util.h"
0fd352f62446 planemo upload for repository https://github.com/ChrisD11/Duplicon commit 3ee0594c692faac542ffa58f4339d79b9b8aefbd-dirty
chrisd
parents:
diff changeset
10
0fd352f62446 planemo upload for repository https://github.com/ChrisD11/Duplicon commit 3ee0594c692faac542ffa58f4339d79b9b8aefbd-dirty
chrisd
parents:
diff changeset
11 // macro to check if read mapped
0fd352f62446 planemo upload for repository https://github.com/ChrisD11/Duplicon commit 3ee0594c692faac542ffa58f4339d79b9b8aefbd-dirty
chrisd
parents:
diff changeset
12 #define READ_UNMAPPED 4
0fd352f62446 planemo upload for repository https://github.com/ChrisD11/Duplicon commit 3ee0594c692faac542ffa58f4339d79b9b8aefbd-dirty
chrisd
parents:
diff changeset
13
0fd352f62446 planemo upload for repository https://github.com/ChrisD11/Duplicon commit 3ee0594c692faac542ffa58f4339d79b9b8aefbd-dirty
chrisd
parents:
diff changeset
14 /**
0fd352f62446 planemo upload for repository https://github.com/ChrisD11/Duplicon commit 3ee0594c692faac542ffa58f4339d79b9b8aefbd-dirty
chrisd
parents:
diff changeset
15 * Splits alignment into separate parts
0fd352f62446 planemo upload for repository https://github.com/ChrisD11/Duplicon commit 3ee0594c692faac542ffa58f4339d79b9b8aefbd-dirty
chrisd
parents:
diff changeset
16 */
0fd352f62446 planemo upload for repository https://github.com/ChrisD11/Duplicon commit 3ee0594c692faac542ffa58f4339d79b9b8aefbd-dirty
chrisd
parents:
diff changeset
17 std::vector<std::string> split_alignment(std::string &alignment) {
0fd352f62446 planemo upload for repository https://github.com/ChrisD11/Duplicon commit 3ee0594c692faac542ffa58f4339d79b9b8aefbd-dirty
chrisd
parents:
diff changeset
18 std::vector<std::string> parts;
0fd352f62446 planemo upload for repository https://github.com/ChrisD11/Duplicon commit 3ee0594c692faac542ffa58f4339d79b9b8aefbd-dirty
chrisd
parents:
diff changeset
19
0fd352f62446 planemo upload for repository https://github.com/ChrisD11/Duplicon commit 3ee0594c692faac542ffa58f4339d79b9b8aefbd-dirty
chrisd
parents:
diff changeset
20 boost::trim_if(alignment, boost::is_any_of("\t "));
0fd352f62446 planemo upload for repository https://github.com/ChrisD11/Duplicon commit 3ee0594c692faac542ffa58f4339d79b9b8aefbd-dirty
chrisd
parents:
diff changeset
21 // split on tab delimeter
0fd352f62446 planemo upload for repository https://github.com/ChrisD11/Duplicon commit 3ee0594c692faac542ffa58f4339d79b9b8aefbd-dirty
chrisd
parents:
diff changeset
22 boost::split(parts, alignment, boost::is_any_of("\t "), boost::token_compress_on);
0fd352f62446 planemo upload for repository https://github.com/ChrisD11/Duplicon commit 3ee0594c692faac542ffa58f4339d79b9b8aefbd-dirty
chrisd
parents:
diff changeset
23
0fd352f62446 planemo upload for repository https://github.com/ChrisD11/Duplicon commit 3ee0594c692faac542ffa58f4339d79b9b8aefbd-dirty
chrisd
parents:
diff changeset
24 return parts;
0fd352f62446 planemo upload for repository https://github.com/ChrisD11/Duplicon commit 3ee0594c692faac542ffa58f4339d79b9b8aefbd-dirty
chrisd
parents:
diff changeset
25 }
0fd352f62446 planemo upload for repository https://github.com/ChrisD11/Duplicon commit 3ee0594c692faac542ffa58f4339d79b9b8aefbd-dirty
chrisd
parents:
diff changeset
26
0fd352f62446 planemo upload for repository https://github.com/ChrisD11/Duplicon commit 3ee0594c692faac542ffa58f4339d79b9b8aefbd-dirty
chrisd
parents:
diff changeset
27 /**
0fd352f62446 planemo upload for repository https://github.com/ChrisD11/Duplicon commit 3ee0594c692faac542ffa58f4339d79b9b8aefbd-dirty
chrisd
parents:
diff changeset
28 * Validates bit flag
0fd352f62446 planemo upload for repository https://github.com/ChrisD11/Duplicon commit 3ee0594c692faac542ffa58f4339d79b9b8aefbd-dirty
chrisd
parents:
diff changeset
29 */
0fd352f62446 planemo upload for repository https://github.com/ChrisD11/Duplicon commit 3ee0594c692faac542ffa58f4339d79b9b8aefbd-dirty
chrisd
parents:
diff changeset
30 bool is_good_flag(const int &bit_flag) {
0fd352f62446 planemo upload for repository https://github.com/ChrisD11/Duplicon commit 3ee0594c692faac542ffa58f4339d79b9b8aefbd-dirty
chrisd
parents:
diff changeset
31 if( (bit_flag & READ_UNMAPPED) > 0) return false;
0fd352f62446 planemo upload for repository https://github.com/ChrisD11/Duplicon commit 3ee0594c692faac542ffa58f4339d79b9b8aefbd-dirty
chrisd
parents:
diff changeset
32 return true;
0fd352f62446 planemo upload for repository https://github.com/ChrisD11/Duplicon commit 3ee0594c692faac542ffa58f4339d79b9b8aefbd-dirty
chrisd
parents:
diff changeset
33 }
0fd352f62446 planemo upload for repository https://github.com/ChrisD11/Duplicon commit 3ee0594c692faac542ffa58f4339d79b9b8aefbd-dirty
chrisd
parents:
diff changeset
34
0fd352f62446 planemo upload for repository https://github.com/ChrisD11/Duplicon commit 3ee0594c692faac542ffa58f4339d79b9b8aefbd-dirty
chrisd
parents:
diff changeset
35 /**
0fd352f62446 planemo upload for repository https://github.com/ChrisD11/Duplicon commit 3ee0594c692faac542ffa58f4339d79b9b8aefbd-dirty
chrisd
parents:
diff changeset
36 * Validates rname
0fd352f62446 planemo upload for repository https://github.com/ChrisD11/Duplicon commit 3ee0594c692faac542ffa58f4339d79b9b8aefbd-dirty
chrisd
parents:
diff changeset
37 */
0fd352f62446 planemo upload for repository https://github.com/ChrisD11/Duplicon commit 3ee0594c692faac542ffa58f4339d79b9b8aefbd-dirty
chrisd
parents:
diff changeset
38 bool is_good_rname(const std::string &rname) {
0fd352f62446 planemo upload for repository https://github.com/ChrisD11/Duplicon commit 3ee0594c692faac542ffa58f4339d79b9b8aefbd-dirty
chrisd
parents:
diff changeset
39 return rname.compare("*") != 0;
0fd352f62446 planemo upload for repository https://github.com/ChrisD11/Duplicon commit 3ee0594c692faac542ffa58f4339d79b9b8aefbd-dirty
chrisd
parents:
diff changeset
40 }
0fd352f62446 planemo upload for repository https://github.com/ChrisD11/Duplicon commit 3ee0594c692faac542ffa58f4339d79b9b8aefbd-dirty
chrisd
parents:
diff changeset
41
0fd352f62446 planemo upload for repository https://github.com/ChrisD11/Duplicon commit 3ee0594c692faac542ffa58f4339d79b9b8aefbd-dirty
chrisd
parents:
diff changeset
42 /**
0fd352f62446 planemo upload for repository https://github.com/ChrisD11/Duplicon commit 3ee0594c692faac542ffa58f4339d79b9b8aefbd-dirty
chrisd
parents:
diff changeset
43 * Validates pos
0fd352f62446 planemo upload for repository https://github.com/ChrisD11/Duplicon commit 3ee0594c692faac542ffa58f4339d79b9b8aefbd-dirty
chrisd
parents:
diff changeset
44 */
0fd352f62446 planemo upload for repository https://github.com/ChrisD11/Duplicon commit 3ee0594c692faac542ffa58f4339d79b9b8aefbd-dirty
chrisd
parents:
diff changeset
45 bool is_good_pos(const int &pos) {
0fd352f62446 planemo upload for repository https://github.com/ChrisD11/Duplicon commit 3ee0594c692faac542ffa58f4339d79b9b8aefbd-dirty
chrisd
parents:
diff changeset
46 return pos > 0;
0fd352f62446 planemo upload for repository https://github.com/ChrisD11/Duplicon commit 3ee0594c692faac542ffa58f4339d79b9b8aefbd-dirty
chrisd
parents:
diff changeset
47 }
0fd352f62446 planemo upload for repository https://github.com/ChrisD11/Duplicon commit 3ee0594c692faac542ffa58f4339d79b9b8aefbd-dirty
chrisd
parents:
diff changeset
48
0fd352f62446 planemo upload for repository https://github.com/ChrisD11/Duplicon commit 3ee0594c692faac542ffa58f4339d79b9b8aefbd-dirty
chrisd
parents:
diff changeset
49 /**
0fd352f62446 planemo upload for repository https://github.com/ChrisD11/Duplicon commit 3ee0594c692faac542ffa58f4339d79b9b8aefbd-dirty
chrisd
parents:
diff changeset
50 * Validates cigar
0fd352f62446 planemo upload for repository https://github.com/ChrisD11/Duplicon commit 3ee0594c692faac542ffa58f4339d79b9b8aefbd-dirty
chrisd
parents:
diff changeset
51 */
0fd352f62446 planemo upload for repository https://github.com/ChrisD11/Duplicon commit 3ee0594c692faac542ffa58f4339d79b9b8aefbd-dirty
chrisd
parents:
diff changeset
52 bool is_good_cigar(const std::string &cigar) {
0fd352f62446 planemo upload for repository https://github.com/ChrisD11/Duplicon commit 3ee0594c692faac542ffa58f4339d79b9b8aefbd-dirty
chrisd
parents:
diff changeset
53 return cigar.compare("*") != 0;
0fd352f62446 planemo upload for repository https://github.com/ChrisD11/Duplicon commit 3ee0594c692faac542ffa58f4339d79b9b8aefbd-dirty
chrisd
parents:
diff changeset
54 }
0fd352f62446 planemo upload for repository https://github.com/ChrisD11/Duplicon commit 3ee0594c692faac542ffa58f4339d79b9b8aefbd-dirty
chrisd
parents:
diff changeset
55
0fd352f62446 planemo upload for repository https://github.com/ChrisD11/Duplicon commit 3ee0594c692faac542ffa58f4339d79b9b8aefbd-dirty
chrisd
parents:
diff changeset
56 /**
0fd352f62446 planemo upload for repository https://github.com/ChrisD11/Duplicon commit 3ee0594c692faac542ffa58f4339d79b9b8aefbd-dirty
chrisd
parents:
diff changeset
57 * Validates seq
0fd352f62446 planemo upload for repository https://github.com/ChrisD11/Duplicon commit 3ee0594c692faac542ffa58f4339d79b9b8aefbd-dirty
chrisd
parents:
diff changeset
58 */
0fd352f62446 planemo upload for repository https://github.com/ChrisD11/Duplicon commit 3ee0594c692faac542ffa58f4339d79b9b8aefbd-dirty
chrisd
parents:
diff changeset
59 bool is_good_seq(const std::string &seq) {
0fd352f62446 planemo upload for repository https://github.com/ChrisD11/Duplicon commit 3ee0594c692faac542ffa58f4339d79b9b8aefbd-dirty
chrisd
parents:
diff changeset
60 return seq.compare("*") != 0;
0fd352f62446 planemo upload for repository https://github.com/ChrisD11/Duplicon commit 3ee0594c692faac542ffa58f4339d79b9b8aefbd-dirty
chrisd
parents:
diff changeset
61 }
0fd352f62446 planemo upload for repository https://github.com/ChrisD11/Duplicon commit 3ee0594c692faac542ffa58f4339d79b9b8aefbd-dirty
chrisd
parents:
diff changeset
62
0fd352f62446 planemo upload for repository https://github.com/ChrisD11/Duplicon commit 3ee0594c692faac542ffa58f4339d79b9b8aefbd-dirty
chrisd
parents:
diff changeset
63 /**
0fd352f62446 planemo upload for repository https://github.com/ChrisD11/Duplicon commit 3ee0594c692faac542ffa58f4339d79b9b8aefbd-dirty
chrisd
parents:
diff changeset
64 * Validates alignment fields
0fd352f62446 planemo upload for repository https://github.com/ChrisD11/Duplicon commit 3ee0594c692faac542ffa58f4339d79b9b8aefbd-dirty
chrisd
parents:
diff changeset
65 */
0fd352f62446 planemo upload for repository https://github.com/ChrisD11/Duplicon commit 3ee0594c692faac542ffa58f4339d79b9b8aefbd-dirty
chrisd
parents:
diff changeset
66 bool fields_are_good(std::vector<std::string> &parts) {
0fd352f62446 planemo upload for repository https://github.com/ChrisD11/Duplicon commit 3ee0594c692faac542ffa58f4339d79b9b8aefbd-dirty
chrisd
parents:
diff changeset
67 int bit_flag = s_to_i(parts[1]);
0fd352f62446 planemo upload for repository https://github.com/ChrisD11/Duplicon commit 3ee0594c692faac542ffa58f4339d79b9b8aefbd-dirty
chrisd
parents:
diff changeset
68 int pos = s_to_i(parts[3]);
0fd352f62446 planemo upload for repository https://github.com/ChrisD11/Duplicon commit 3ee0594c692faac542ffa58f4339d79b9b8aefbd-dirty
chrisd
parents:
diff changeset
69
0fd352f62446 planemo upload for repository https://github.com/ChrisD11/Duplicon commit 3ee0594c692faac542ffa58f4339d79b9b8aefbd-dirty
chrisd
parents:
diff changeset
70 std::string rname = parts[2];
0fd352f62446 planemo upload for repository https://github.com/ChrisD11/Duplicon commit 3ee0594c692faac542ffa58f4339d79b9b8aefbd-dirty
chrisd
parents:
diff changeset
71 std::string cigar = parts[5];
0fd352f62446 planemo upload for repository https://github.com/ChrisD11/Duplicon commit 3ee0594c692faac542ffa58f4339d79b9b8aefbd-dirty
chrisd
parents:
diff changeset
72 std::string seq = parts[9];
0fd352f62446 planemo upload for repository https://github.com/ChrisD11/Duplicon commit 3ee0594c692faac542ffa58f4339d79b9b8aefbd-dirty
chrisd
parents:
diff changeset
73
0fd352f62446 planemo upload for repository https://github.com/ChrisD11/Duplicon commit 3ee0594c692faac542ffa58f4339d79b9b8aefbd-dirty
chrisd
parents:
diff changeset
74 if(!(is_good_flag(bit_flag))) return false;
0fd352f62446 planemo upload for repository https://github.com/ChrisD11/Duplicon commit 3ee0594c692faac542ffa58f4339d79b9b8aefbd-dirty
chrisd
parents:
diff changeset
75 if(!(is_good_pos(pos))) return false;
0fd352f62446 planemo upload for repository https://github.com/ChrisD11/Duplicon commit 3ee0594c692faac542ffa58f4339d79b9b8aefbd-dirty
chrisd
parents:
diff changeset
76 if(!(is_good_rname(rname))) return false;
0fd352f62446 planemo upload for repository https://github.com/ChrisD11/Duplicon commit 3ee0594c692faac542ffa58f4339d79b9b8aefbd-dirty
chrisd
parents:
diff changeset
77 if(!(is_good_cigar(cigar))) return false;
0fd352f62446 planemo upload for repository https://github.com/ChrisD11/Duplicon commit 3ee0594c692faac542ffa58f4339d79b9b8aefbd-dirty
chrisd
parents:
diff changeset
78 if(!(is_good_seq(seq))) return false;
0fd352f62446 planemo upload for repository https://github.com/ChrisD11/Duplicon commit 3ee0594c692faac542ffa58f4339d79b9b8aefbd-dirty
chrisd
parents:
diff changeset
79
0fd352f62446 planemo upload for repository https://github.com/ChrisD11/Duplicon commit 3ee0594c692faac542ffa58f4339d79b9b8aefbd-dirty
chrisd
parents:
diff changeset
80 return true;
0fd352f62446 planemo upload for repository https://github.com/ChrisD11/Duplicon commit 3ee0594c692faac542ffa58f4339d79b9b8aefbd-dirty
chrisd
parents:
diff changeset
81 }
0fd352f62446 planemo upload for repository https://github.com/ChrisD11/Duplicon commit 3ee0594c692faac542ffa58f4339d79b9b8aefbd-dirty
chrisd
parents:
diff changeset
82
0fd352f62446 planemo upload for repository https://github.com/ChrisD11/Duplicon commit 3ee0594c692faac542ffa58f4339d79b9b8aefbd-dirty
chrisd
parents:
diff changeset
83 /**
0fd352f62446 planemo upload for repository https://github.com/ChrisD11/Duplicon commit 3ee0594c692faac542ffa58f4339d79b9b8aefbd-dirty
chrisd
parents:
diff changeset
84 * Stores alignments that pass validity checks
0fd352f62446 planemo upload for repository https://github.com/ChrisD11/Duplicon commit 3ee0594c692faac542ffa58f4339d79b9b8aefbd-dirty
chrisd
parents:
diff changeset
85 */
0fd352f62446 planemo upload for repository https://github.com/ChrisD11/Duplicon commit 3ee0594c692faac542ffa58f4339d79b9b8aefbd-dirty
chrisd
parents:
diff changeset
86 bool is_good_alignment(std::string &alignment) {
0fd352f62446 planemo upload for repository https://github.com/ChrisD11/Duplicon commit 3ee0594c692faac542ffa58f4339d79b9b8aefbd-dirty
chrisd
parents:
diff changeset
87 std::vector<std::string> alignment_parts;
0fd352f62446 planemo upload for repository https://github.com/ChrisD11/Duplicon commit 3ee0594c692faac542ffa58f4339d79b9b8aefbd-dirty
chrisd
parents:
diff changeset
88
0fd352f62446 planemo upload for repository https://github.com/ChrisD11/Duplicon commit 3ee0594c692faac542ffa58f4339d79b9b8aefbd-dirty
chrisd
parents:
diff changeset
89 alignment_parts = split_alignment(alignment);
0fd352f62446 planemo upload for repository https://github.com/ChrisD11/Duplicon commit 3ee0594c692faac542ffa58f4339d79b9b8aefbd-dirty
chrisd
parents:
diff changeset
90
0fd352f62446 planemo upload for repository https://github.com/ChrisD11/Duplicon commit 3ee0594c692faac542ffa58f4339d79b9b8aefbd-dirty
chrisd
parents:
diff changeset
91 if(!(fields_are_good(alignment_parts)))
0fd352f62446 planemo upload for repository https://github.com/ChrisD11/Duplicon commit 3ee0594c692faac542ffa58f4339d79b9b8aefbd-dirty
chrisd
parents:
diff changeset
92 return false;
0fd352f62446 planemo upload for repository https://github.com/ChrisD11/Duplicon commit 3ee0594c692faac542ffa58f4339d79b9b8aefbd-dirty
chrisd
parents:
diff changeset
93 return true;
0fd352f62446 planemo upload for repository https://github.com/ChrisD11/Duplicon commit 3ee0594c692faac542ffa58f4339d79b9b8aefbd-dirty
chrisd
parents:
diff changeset
94 }
0fd352f62446 planemo upload for repository https://github.com/ChrisD11/Duplicon commit 3ee0594c692faac542ffa58f4339d79b9b8aefbd-dirty
chrisd
parents:
diff changeset
95
0fd352f62446 planemo upload for repository https://github.com/ChrisD11/Duplicon commit 3ee0594c692faac542ffa58f4339d79b9b8aefbd-dirty
chrisd
parents:
diff changeset
96 #endif /* ALIGNMENT_UTIL_H */