Mercurial > repos > chrisd > testshed
diff snp_caller/src/alignment_util.h @ 0:0fd352f62446 draft default tip
planemo upload for repository https://github.com/ChrisD11/Duplicon commit 3ee0594c692faac542ffa58f4339d79b9b8aefbd-dirty
author | chrisd |
---|---|
date | Sun, 21 Feb 2016 06:05:24 -0500 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/snp_caller/src/alignment_util.h Sun Feb 21 06:05:24 2016 -0500 @@ -0,0 +1,130 @@ +#ifndef ALIGNMENT_UTIL_H +#define ALIGNMENT_UTIL_H + +#include "int_util.h" + +#include <string> +#include <vector> + +#include <boost/algorithm/string.hpp> + +#define READ_PAIRED 1 +#define PROPER_PAIR 2 +#define READ_UNMAPPED 4 +#define MATE_UNMAPPED 8 + +std::vector<std::string> split_alignment(std::string &alignment) { + std::vector<std::string> parts; + boost::trim_if(alignment, boost::is_any_of("\t ")); + boost::split(parts, alignment, boost::is_any_of("\t "), boost::token_compress_on); + return parts; +} + +bool is_good_se_flag(const int &flag) { + if( (flag & READ_UNMAPPED) > 0) { + return false; + } + else { + return true; + } +} + +bool is_good_pe_flag(const int &flag) { + if( (flag & READ_UNMAPPED) > 0 || (flag & MATE_UNMAPPED) > 0 || + (flag & READ_PAIRED) == 0 || (flag & PROPER_PAIR == 0) ) { + return false; + } + else { + return true; + } +} + +bool is_good_rname(const std::string &rname) { + return rname.compare("*") != 0; +} + +bool is_good_pos(const int &pos) { + return pos > 0; +} + +bool is_good_pnext(const int &pnext) { + return pnext > 0; +} + +bool is_good_cigar(const std::string &cigar) { + return cigar.compare("*") != 0; +} + +bool is_good_seq(const std::string &seq) { + return seq.compare("*") != 0; +} + +bool is_alignment_unique(const std::string &alignment) { + if(alignment.find("XT:A:U") != std::string::npos) { + return true; + } + else { + return false; + } +} + +/*bool is_good_alignment(std::string &alignment) { + std::vector<std::string> alignment_parts; + + alignment_parts = split_alignment(alignment); + + if(!(fields_are_good(alignment_parts))) + return false; + else + return true; +}*/ + +bool se_fields_are_good(std::string &alignment, bool best) { + std::vector<std::string> parts = split_alignment(alignment); + int flag = s_to_i(parts[1]); + int pos = s_to_i(parts[3]); + + std::string rname = parts[2]; + std::string cigar = parts[5]; + std::string seq = parts[9]; + + if(!(is_good_se_flag(flag))) return false; + if(!(is_good_pos(pos))) return false; + if(!(is_good_rname(rname))) return false; + if(!(is_good_cigar(cigar))) return false; + if(!(is_good_seq(seq))) return false; + if(best) { + if(!(is_alignment_unique(alignment))) { + return false; + } + } + + return true; +} + +bool pe_fields_are_good(std::string &alignment, bool best) { + std::vector<std::string> parts = split_alignment(alignment); + int flag = s_to_i(parts[1]); + int pos = s_to_i(parts[3]); + int pnext = s_to_i(parts[7]); + + std::string rname = parts[2]; + std::string cigar = parts[5]; + std::string seq = parts[9]; + + if(!(is_good_pe_flag(flag))) return false; + if(!(is_good_pos(pos))) return false; + if(!(is_good_rname(rname))) return false; + if(!(is_good_cigar(cigar))) return false; + if(!(is_good_pnext(pnext))) return false; + if(!(is_good_seq(seq))) return false; + if(best) { + if(!(is_alignment_unique(alignment))) { + return false; + } + } + + return true; +} + +#endif // ALIGNMENT_UTIL_H