view snp_caller/src/alignment_util.h @ 0:0fd352f62446 draft default tip

planemo upload for repository https://github.com/ChrisD11/Duplicon commit 3ee0594c692faac542ffa58f4339d79b9b8aefbd-dirty
author chrisd
date Sun, 21 Feb 2016 06:05:24 -0500
parents
children
line wrap: on
line source

#ifndef ALIGNMENT_UTIL_H
#define ALIGNMENT_UTIL_H

#include "int_util.h"

#include <string>
#include <vector>

#include <boost/algorithm/string.hpp>

#define READ_PAIRED   1
#define PROPER_PAIR   2
#define READ_UNMAPPED 4
#define MATE_UNMAPPED 8

std::vector<std::string> split_alignment(std::string &alignment) {
	std::vector<std::string> parts;
	boost::trim_if(alignment, boost::is_any_of("\t "));
	boost::split(parts, alignment, boost::is_any_of("\t "), boost::token_compress_on);
	return parts;
}

bool is_good_se_flag(const int &flag) {
	if( (flag & READ_UNMAPPED) > 0) {
		return false;
	}
	else {
		return true;
	}
}

bool is_good_pe_flag(const int &flag) {
	if( (flag & READ_UNMAPPED) > 0 || (flag & MATE_UNMAPPED) > 0 ||
            (flag & READ_PAIRED) == 0 || (flag & PROPER_PAIR == 0) ) {
		return false;
	}
	else {
		return true;
	}
}

bool is_good_rname(const std::string &rname) {
	return rname.compare("*") != 0;
}

bool is_good_pos(const int &pos) {
	return pos > 0;
}

bool is_good_pnext(const int &pnext) {
	return pnext > 0;
}

bool is_good_cigar(const std::string &cigar) {
	return cigar.compare("*") != 0;
}

bool is_good_seq(const std::string &seq) {
	return seq.compare("*") != 0;
}

bool is_alignment_unique(const std::string &alignment) {
	if(alignment.find("XT:A:U") != std::string::npos) {
		return true;
	}
	else {
		return false;
	}
}

/*bool is_good_alignment(std::string &alignment) {
	std::vector<std::string> alignment_parts;

	alignment_parts = split_alignment(alignment);

	if(!(fields_are_good(alignment_parts)))
		return false;
	else
		return true;
}*/

bool se_fields_are_good(std::string &alignment, bool best) {
	std::vector<std::string> parts = split_alignment(alignment);
	int flag = s_to_i(parts[1]);
	int pos = s_to_i(parts[3]);
	
	std::string rname = parts[2];
	std::string cigar = parts[5];
	std::string seq = parts[9];

	if(!(is_good_se_flag(flag))) return false;
	if(!(is_good_pos(pos))) return false;
	if(!(is_good_rname(rname))) return false;
	if(!(is_good_cigar(cigar))) return false;
	if(!(is_good_seq(seq))) return false;
	if(best) {
		if(!(is_alignment_unique(alignment))) {
			return false;
		}
	}

	return true;
}

bool pe_fields_are_good(std::string &alignment, bool best) {
	std::vector<std::string> parts = split_alignment(alignment);
	int flag = s_to_i(parts[1]);
        int pos = s_to_i(parts[3]);
	int pnext = s_to_i(parts[7]);

        std::string rname = parts[2];
        std::string cigar = parts[5];
        std::string seq = parts[9];

        if(!(is_good_pe_flag(flag))) return false;
        if(!(is_good_pos(pos))) return false;
        if(!(is_good_rname(rname))) return false;
        if(!(is_good_cigar(cigar))) return false;
	if(!(is_good_pnext(pnext))) return false;
        if(!(is_good_seq(seq))) return false;
	if(best) {
		if(!(is_alignment_unique(alignment))) {
			return false;
		}
	}

        return true;		
}

#endif // ALIGNMENT_UTIL_H