diff snp_caller/src/alignment_util.h @ 0:0fd352f62446 draft default tip

planemo upload for repository https://github.com/ChrisD11/Duplicon commit 3ee0594c692faac542ffa58f4339d79b9b8aefbd-dirty
author chrisd
date Sun, 21 Feb 2016 06:05:24 -0500
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/snp_caller/src/alignment_util.h	Sun Feb 21 06:05:24 2016 -0500
@@ -0,0 +1,130 @@
+#ifndef ALIGNMENT_UTIL_H
+#define ALIGNMENT_UTIL_H
+
+#include "int_util.h"
+
+#include <string>
+#include <vector>
+
+#include <boost/algorithm/string.hpp>
+
+#define READ_PAIRED   1
+#define PROPER_PAIR   2
+#define READ_UNMAPPED 4
+#define MATE_UNMAPPED 8
+
+std::vector<std::string> split_alignment(std::string &alignment) {
+	std::vector<std::string> parts;
+	boost::trim_if(alignment, boost::is_any_of("\t "));
+	boost::split(parts, alignment, boost::is_any_of("\t "), boost::token_compress_on);
+	return parts;
+}
+
+bool is_good_se_flag(const int &flag) {
+	if( (flag & READ_UNMAPPED) > 0) {
+		return false;
+	}
+	else {
+		return true;
+	}
+}
+
+bool is_good_pe_flag(const int &flag) {
+	if( (flag & READ_UNMAPPED) > 0 || (flag & MATE_UNMAPPED) > 0 ||
+            (flag & READ_PAIRED) == 0 || (flag & PROPER_PAIR == 0) ) {
+		return false;
+	}
+	else {
+		return true;
+	}
+}
+
+bool is_good_rname(const std::string &rname) {
+	return rname.compare("*") != 0;
+}
+
+bool is_good_pos(const int &pos) {
+	return pos > 0;
+}
+
+bool is_good_pnext(const int &pnext) {
+	return pnext > 0;
+}
+
+bool is_good_cigar(const std::string &cigar) {
+	return cigar.compare("*") != 0;
+}
+
+bool is_good_seq(const std::string &seq) {
+	return seq.compare("*") != 0;
+}
+
+bool is_alignment_unique(const std::string &alignment) {
+	if(alignment.find("XT:A:U") != std::string::npos) {
+		return true;
+	}
+	else {
+		return false;
+	}
+}
+
+/*bool is_good_alignment(std::string &alignment) {
+	std::vector<std::string> alignment_parts;
+
+	alignment_parts = split_alignment(alignment);
+
+	if(!(fields_are_good(alignment_parts)))
+		return false;
+	else
+		return true;
+}*/
+
+bool se_fields_are_good(std::string &alignment, bool best) {
+	std::vector<std::string> parts = split_alignment(alignment);
+	int flag = s_to_i(parts[1]);
+	int pos = s_to_i(parts[3]);
+	
+	std::string rname = parts[2];
+	std::string cigar = parts[5];
+	std::string seq = parts[9];
+
+	if(!(is_good_se_flag(flag))) return false;
+	if(!(is_good_pos(pos))) return false;
+	if(!(is_good_rname(rname))) return false;
+	if(!(is_good_cigar(cigar))) return false;
+	if(!(is_good_seq(seq))) return false;
+	if(best) {
+		if(!(is_alignment_unique(alignment))) {
+			return false;
+		}
+	}
+
+	return true;
+}
+
+bool pe_fields_are_good(std::string &alignment, bool best) {
+	std::vector<std::string> parts = split_alignment(alignment);
+	int flag = s_to_i(parts[1]);
+        int pos = s_to_i(parts[3]);
+	int pnext = s_to_i(parts[7]);
+
+        std::string rname = parts[2];
+        std::string cigar = parts[5];
+        std::string seq = parts[9];
+
+        if(!(is_good_pe_flag(flag))) return false;
+        if(!(is_good_pos(pos))) return false;
+        if(!(is_good_rname(rname))) return false;
+        if(!(is_good_cigar(cigar))) return false;
+	if(!(is_good_pnext(pnext))) return false;
+        if(!(is_good_seq(seq))) return false;
+	if(best) {
+		if(!(is_alignment_unique(alignment))) {
+			return false;
+		}
+	}
+
+        return true;		
+}
+
+#endif // ALIGNMENT_UTIL_H