annotate gene_fraction/src/alignment_util.h @ 0:2aad8349d84b draft

planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
author chrisd
date Sun, 21 Feb 2016 23:20:22 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
2aad8349d84b planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
1 #ifndef ALIGNMENT_UTIL_H
2aad8349d84b planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
2 #define ALIGNMENT_UTIL_H
2aad8349d84b planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
3
2aad8349d84b planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
4 #include <string>
2aad8349d84b planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
5 #include <vector>
2aad8349d84b planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
6
2aad8349d84b planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
7 #include <boost/algorithm/string.hpp>
2aad8349d84b planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
8
2aad8349d84b planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
9 #include "int_util.h"
2aad8349d84b planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
10
2aad8349d84b planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
11 // macro to check if read mapped
2aad8349d84b planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
12 #define READ_UNMAPPED 4
2aad8349d84b planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
13
2aad8349d84b planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
14 /**
2aad8349d84b planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
15 * Splits alignment into separate parts
2aad8349d84b planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
16 */
2aad8349d84b planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
17 std::vector<std::string> split_alignment(std::string &alignment) {
2aad8349d84b planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
18 std::vector<std::string> parts;
2aad8349d84b planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
19
2aad8349d84b planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
20 boost::trim_if(alignment, boost::is_any_of("\t "));
2aad8349d84b planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
21 // split on tab delimeter
2aad8349d84b planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
22 boost::split(parts, alignment, boost::is_any_of("\t "), boost::token_compress_on);
2aad8349d84b planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
23
2aad8349d84b planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
24 return parts;
2aad8349d84b planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
25 }
2aad8349d84b planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
26
2aad8349d84b planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
27 /**
2aad8349d84b planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
28 * Validates bit flag
2aad8349d84b planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
29 */
2aad8349d84b planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
30 bool is_good_flag(const int &bit_flag) {
2aad8349d84b planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
31 if( (bit_flag & READ_UNMAPPED) > 0) return false;
2aad8349d84b planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
32 return true;
2aad8349d84b planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
33 }
2aad8349d84b planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
34
2aad8349d84b planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
35 /**
2aad8349d84b planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
36 * Validates rname
2aad8349d84b planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
37 */
2aad8349d84b planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
38 bool is_good_rname(const std::string &rname) {
2aad8349d84b planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
39 return rname.compare("*") != 0;
2aad8349d84b planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
40 }
2aad8349d84b planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
41
2aad8349d84b planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
42 /**
2aad8349d84b planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
43 * Validates pos
2aad8349d84b planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
44 */
2aad8349d84b planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
45 bool is_good_pos(const int &pos) {
2aad8349d84b planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
46 return pos > 0;
2aad8349d84b planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
47 }
2aad8349d84b planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
48
2aad8349d84b planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
49 /**
2aad8349d84b planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
50 * Validates cigar
2aad8349d84b planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
51 */
2aad8349d84b planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
52 bool is_good_cigar(const std::string &cigar) {
2aad8349d84b planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
53 return cigar.compare("*") != 0;
2aad8349d84b planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
54 }
2aad8349d84b planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
55
2aad8349d84b planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
56 /**
2aad8349d84b planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
57 * Validates seq
2aad8349d84b planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
58 */
2aad8349d84b planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
59 bool is_good_seq(const std::string &seq) {
2aad8349d84b planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
60 return seq.compare("*") != 0;
2aad8349d84b planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
61 }
2aad8349d84b planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
62
2aad8349d84b planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
63 /**
2aad8349d84b planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
64 * Validates alignment fields
2aad8349d84b planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
65 */
2aad8349d84b planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
66 bool fields_are_good(std::vector<std::string> &parts) {
2aad8349d84b planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
67 int bit_flag = s_to_i(parts[1]);
2aad8349d84b planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
68 int pos = s_to_i(parts[3]);
2aad8349d84b planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
69
2aad8349d84b planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
70 std::string rname = parts[2];
2aad8349d84b planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
71 std::string cigar = parts[5];
2aad8349d84b planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
72 std::string seq = parts[9];
2aad8349d84b planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
73
2aad8349d84b planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
74 if(!(is_good_flag(bit_flag))) return false;
2aad8349d84b planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
75 if(!(is_good_pos(pos))) return false;
2aad8349d84b planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
76 if(!(is_good_rname(rname))) return false;
2aad8349d84b planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
77 if(!(is_good_cigar(cigar))) return false;
2aad8349d84b planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
78 if(!(is_good_seq(seq))) return false;
2aad8349d84b planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
79
2aad8349d84b planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
80 return true;
2aad8349d84b planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
81 }
2aad8349d84b planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
82
2aad8349d84b planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
83 /**
2aad8349d84b planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
84 * Stores alignments that pass validity checks
2aad8349d84b planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
85 */
2aad8349d84b planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
86 bool is_good_alignment(std::string &alignment) {
2aad8349d84b planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
87 std::vector<std::string> alignment_parts;
2aad8349d84b planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
88
2aad8349d84b planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
89 alignment_parts = split_alignment(alignment);
2aad8349d84b planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
90
2aad8349d84b planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
91 if(!(fields_are_good(alignment_parts)))
2aad8349d84b planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
92 return false;
2aad8349d84b planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
93 return true;
2aad8349d84b planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
94 }
2aad8349d84b planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
95
2aad8349d84b planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
96 #endif /* ALIGNMENT_UTIL_H */