Mercurial > repos > chrisd > testing
changeset 1:20e03d65fc25 draft
planemo upload for repository https://github.com/ChrisD11/Tools commit 7ad4de2f3fdb2e49cefcfc465cc36c96a1602441
author | chrisd |
---|---|
date | Mon, 22 Feb 2016 04:52:09 -0500 |
parents | 2aad8349d84b |
children | ed4a3695d6e8 |
files | README csa gene_fraction/src/Alignments.cpp gene_fraction/src/Alignments.h gene_fraction/src/Alignments.o gene_fraction/src/Fasta.cpp gene_fraction/src/Fasta.h gene_fraction/src/Fasta.o gene_fraction/src/FastaRecord.cpp gene_fraction/src/FastaRecord.h gene_fraction/src/FastaRecord.o gene_fraction/src/Makefile gene_fraction/src/Sam.cpp gene_fraction/src/Sam.h gene_fraction/src/Sam.o gene_fraction/src/SamRatio.h gene_fraction/src/alignment_util.h gene_fraction/src/args.h gene_fraction/src/dir_util.h gene_fraction/src/gene_fraction.xml gene_fraction/src/int_util.h gene_fraction/src/main.cpp gene_fraction/src/main.o gene_fraction/src/ref.fa gene_fraction/src/res gene_fraction/src/test.sam tool_dependencies.xml |
diffstat | 26 files changed, 19 insertions(+), 1027 deletions(-) [+] |
line wrap: on
line diff
--- a/gene_fraction/src/Alignments.cpp Sun Feb 21 23:20:22 2016 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,35 +0,0 @@ -#include "Alignments.h" -#include "int_util.h" - -#include <boost/algorithm/string.hpp> - -#include <iostream> -#include <sstream> - -Alignments::Alignments(std::string alignment) : _alignment(alignment) { - fill_alignment_fields(alignment); -} - -void Alignments::fill_alignment_fields(const std::string &alignment) { - std::istringstream ss(alignment); - ss >> field.QNAME >> field.FLAG >> field.RNAME >> field.POS >> - field.MAPQ >> field.CIGAR >> field.RNEXT >> field.PNEXT >> - field.TLEN >> field.SEQ >> field.QUAL; -} - -std::vector<std::pair<int,char>> Alignments::cigar() { - return get_cigar_operations(field.CIGAR); -} - -std::vector<std::pair<int,char>> Alignments::get_cigar_operations(const std::string &cigar) { - std::vector<std::pair<int,char>> p; - int count; - char operation; - - std::istringstream ss(cigar); - while(ss >> count >> operation) { - p.push_back(std::make_pair(count, operation)); - } - - return p; -}
--- a/gene_fraction/src/Alignments.h Sun Feb 21 23:20:22 2016 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,68 +0,0 @@ -#ifndef ALIGNMENTS_H -#define ALIGNMENTS_H - -#include <string> -#include <vector> - -/** - * Stores information about an alignment - */ -struct alignment_fields { - std::string QNAME; - int FLAG; - std::string RNAME; - int POS; - int MAPQ; - std::string CIGAR; - std::string RNEXT; - int PNEXT; - int TLEN; - std::string SEQ; - std::string QUAL; -}; - -/** - * Class for dealing with alignments - */ -class Alignments { -public: - /** - * Ctor that initializes alignment - */ - Alignments(std::string alignment); - - /** - * Stores information about each of the eleven - * required alignment fields - */ - void fill_alignment_fields(const std::string &alignment); - - std::vector<std::pair<int,char>> cigar(); - - inline std::string alignment() const { return _alignment; }; - - inline std::string qname() const { return field.QNAME; }; - inline std::string rname() const { return field.RNAME; }; - inline std::string cigar() const { return field.CIGAR; }; - inline std::string rnext() const { return field.RNEXT; }; - inline std::string seq() const { return field.SEQ; }; - inline std::string qual() const { return field.QUAL; }; - - inline int flag() const { return field.FLAG; }; - inline int pos() const { return field.POS; }; - inline int mapq() const { return field.MAPQ; }; - inline int pnext() const { return field.PNEXT; }; - inline int tlen() const { return field.TLEN; }; - -private: - /** - * Returns a pair of cigar operations as (occurrence, operation) - * Ex: 10M5I -> (10, M), (5, I) - */ - std::vector<std::pair<int,char>> get_cigar_operations(const std::string &cigar); - - std::string _alignment; - alignment_fields field; -}; - -#endif /* ALIGNMENTS_H */
--- a/gene_fraction/src/Fasta.cpp Sun Feb 21 23:20:22 2016 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,39 +0,0 @@ -#include "Fasta.h" -#include "args.h" - -#include <iostream> -#include <fstream> -#include <vector> -#include <string> - -Fasta::Fasta(std::string amr_fp) : _amr_fp(amr_fp) {} - -void Fasta::read_fasta(const std::string &amr_fp) { - std::ifstream in(amr_fp.c_str()); - if(!in) { - usage(); - exit(EXIT_FAILURE); - } - - std::string gene_id, gene, line; - while(std::getline(in, line)) { - std::size_t gene_idx = line.find(" "); - - if(gene_idx != std::string::npos) - gene_id = line.substr(1, gene_idx-1); - else - gene_id = line.substr(1, line.length()); - - std::getline(in, gene); - records.push_back(FastaRecord(gene_id, gene)); - } - in.close(); - - FastaRecord::sort_by_gene_id(records); -} - - - - - -
--- a/gene_fraction/src/Fasta.h Sun Feb 21 23:20:22 2016 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,28 +0,0 @@ -#ifndef FASTA_H -#define FASTA_H - -#include <string> -#include <vector> -#include "FastaRecord.h" - -/** - * Class for dealing with fasta files - */ -class Fasta { -public: - /** - * Constructor that sets amr file path - */ - Fasta(std::string amr_fp); - - /** - * Reads fasta file from file path - */ - void read_fasta(const std::string &amr_fp); - - std::vector<FastaRecord> records; -private: - std::string _amr_fp; -}; - -#endif /* FASTA_H */
--- a/gene_fraction/src/FastaRecord.cpp Sun Feb 21 23:20:22 2016 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,62 +0,0 @@ -#include "FastaRecord.h" - -#include <algorithm> - -FastaRecord::FastaRecord(std::string gene_id, std::string gene) : - _gene_id(gene_id), _gene(gene), _base_hits(_gene.length(), 0), - _gene_hits(0) {} - -std::string FastaRecord::gene_id() const { return _gene_id; } - -std::string FastaRecord::gene() const { return _gene; } - -void FastaRecord::update_gene_hits() { - _gene_hits++; -} - -int FastaRecord::gene_hits() const { - return _gene_hits; -} - -int FastaRecord::get_base_hits() const { - return static_cast<int>(count(_base_hits.begin(), _base_hits.end(), 1)); -} - -int FastaRecord::find_gene(const std::vector<FastaRecord> &records, - const std::string &gene_id, std::string seq) { - int gene_index; - - std::vector<FastaRecord>::const_iterator low; - // binary search for fasta record index - low = std::lower_bound(records.begin(), records.end(), FastaRecord(gene_id, seq), - [](const FastaRecord &a, const FastaRecord &b) - { return a.gene_id() < b.gene_id(); }); - gene_index = (low - records.begin()); - - return gene_index; -} - -void FastaRecord::sort_by_gene_id(std::vector<FastaRecord> &records) { - // sort records by gene id - sort(records.begin(), records.end(), [](const FastaRecord &a, const FastaRecord &b) { return a.gene_id() < b.gene_id(); }); -} - -void FastaRecord::reset_base_hits(std::vector<FastaRecord> &records) { - for_each(records.begin(), records.end(), [](FastaRecord &a) { std::fill(a.base_hits().begin(), a.base_hits().end(), 0); }); -} - -void FastaRecord::reset_gene_hits(std::vector<FastaRecord> &records) { - for_each(records.begin(), records.end(), [](FastaRecord &a) { a._gene_hits = 0; }); -} - -std::vector<int> &FastaRecord::base_hits() { - return _base_hits; -} - - - - - - - -
--- a/gene_fraction/src/FastaRecord.h Sun Feb 21 23:20:22 2016 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,86 +0,0 @@ -#ifndef FASTA_RECORD_H -#define FASTA_RECORD_H - -#include <string> -#include <vector> - -/** - * Class for dealing with fasta records - */ -class FastaRecord { -public: - /** - * Ctor that initializes gene id and gene - */ - FastaRecord(std::string gene_id, std::string gene); - - /** - * Returns a string gene id - */ - std::string gene_id() const; - - /** - * Returns the gene associated with gene id - */ - std::string gene() const; - - /** - * Returns the total base hits for a gene - */ - int get_base_hits() const; - - /** - * Returns the amount of genes that were hit - * during the gene fraction calculation - */ - int gene_hits() const; - - /** - * - */ - void update_base_hits(const int &index); - - /** - * - */ - void update_gene_hits(); - - /** - * Searches for a fasta record corresponding - * to gene id - */ - static int find_gene(const std::vector<FastaRecord> &records, - const std::string &gene_id, - std::string seq = ""); - - /** - * Sorts fasta records by gene id - */ - static void sort_by_gene_id(std::vector<FastaRecord> &records); - - /** - * Resets base hits vector to 0's. - * This occurs after each sample is processed - */ - static void reset_base_hits(std::vector<FastaRecord> &records); - - /** - * Resets gene hits primitive to 0. - * This happens after each sample is processed - */ - static void reset_gene_hits(std::vector<FastaRecord> &records); - - std::vector<int> &base_hits(); - - std::string _gene_id; - std::string _gene; - std::vector<int> _base_hits; - -private: - int _gene_hits; -}; - - - - -#endif /* FASTA_RECORD_H */
--- a/gene_fraction/src/Makefile Sun Feb 21 23:20:22 2016 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,14 +0,0 @@ -output: main.o Sam.o Alignments.o Fasta.o FastaRecord.o - g++ -std=c++11 main.o Sam.o Alignments.o Fasta.o FastaRecord.o -o csa -main.o: main.cpp - g++ -c -std=c++11 main.cpp -Sam.o: Sam.cpp - g++ -c -std=c++11 Sam.cpp -Alignments.o: Alignments.cpp - g++ -c -std=c++11 Alignments.cpp -Fasta.o: Fasta.cpp - g++ -c -std=c++11 Fasta.cpp -FastaRecord.o: FastaRecord.cpp - g++ -c -std=c++11 FastaRecord.cpp -clean: - rm *.o csa
--- a/gene_fraction/src/Sam.cpp Sun Feb 21 23:20:22 2016 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,41 +0,0 @@ -#include "Sam.h" -#include "args.h" -#include "dir_util.h" -#include "alignment_util.h" - -#include <iostream> -#include <fstream> - -Sam::Sam(std::string sam_fp) : _sam_fp(sam_fp) {} - -void Sam::read_sam(cmd_args args) { - if(args.bam_stream) read_from_stdin(); - else read_from_file(args.sam_fp); -} - -void Sam::read_from_stdin() { - std::string line; - while(std::getline(std::cin, line)) { - if(line[0] == '@') continue; - alignment.push_back(line); - } -} - -void Sam::read_from_file(const std::string &sam_fp) { - std::ifstream in(sam_fp.c_str()); - if(!in) { - usage(); - exit(EXIT_FAILURE); - } - - std::string line; - while(getline(in, line)) { - if(line[0] == '@') continue; - if(is_good_alignment(line)) - alignment.push_back(line); - } - - in.close(); -} - -
--- a/gene_fraction/src/Sam.h Sun Feb 21 23:20:22 2016 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,48 +0,0 @@ -#ifndef SAM_H -#define SAM_H - -#include <string> -#include <vector> - -#include "args.h" -#include "Alignments.h" - -/** - * Class for dealing with sam files - */ - -class Sam { -public: - /** - * Ctor initializes sam file path - */ - Sam(std::string sam_fp); - void read_sam(cmd_args args); - - /** - * Reads sam file from stdin - */ - void read_from_stdin(); - - /** - * Reads sam file from directory or file path - */ - void read_from_file(const std::string &sam_fp); - - /** - * - */ - void read_from_dir(const std::string &sam_dir_fp); - - std::vector<Alignments> alignment; - -private: - std::string _sam_fp; -}; - - - - - -#endif /* SAM_H */ -
--- a/gene_fraction/src/SamRatio.h Sun Feb 21 23:20:22 2016 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,248 +0,0 @@ -#ifndef SAM_RATIO_H -#define SAM_RATIO_H - -#include <iostream> -#include <fstream> -#include <algorithm> - -#include "FastaRecord.h" -#include "Alignments.h" -#include "args.h" - -typedef std::vector<std::pair<int,char>> cigar_str; - -/** - * - */ -struct header { - std::string level = "Level"; - std::string iteration = "Iteration"; - std::string gene_id = "Gene id"; - std::string gene_fraction = "Gene Fraction"; - std::string hits = "Hits"; -}; - -/** - * Reports the total number of bases that were touched for each - * gene. This is largely calculated using the positional and seq - * information found in fields four and ten of each alignment - */ -void analyze_coverage(FastaRecord &record, Alignments &alignment) { - record.update_gene_hits(); - - cigar_str cigar = alignment.cigar(); - - int len; - char op; - - int occurrence; - int pos_in_gene = alignment.pos(); - - int start, stop; - int base_hits = record._base_hits.size(); // func this - int read_length = alignment.seq().length(); //func this - - if(pos_in_gene == 1) { - occurrence = 0; - for(int i = 0; i < cigar.size(); i++) { - len = cigar[i].first; - op = cigar[i].second; - - switch(op) { - case 'M': - occurrence += len; - break; - case 'I': - occurrence += len; - break; - default: - break; - } - } - - start = read_length - occurrence; - stop = start + read_length; - - for(int i = start; i < base_hits; i++) { - if(i == stop) break; - record._base_hits[i] = 1; - } - } - else { - start = pos_in_gene - 1; - stop = start + read_length; - - for(int i = start; i < base_hits; i++) { - if(i == stop) break; - record._base_hits[i] = 1; - } - } -} - -/** - * Returns gene fraction of fasta record - * Returns -1 if gene fraction is not greater than threshold - */ -double coverage(const FastaRecord &record, const int &threshold) { - double gene_coverage; - - int base_hits, gene_size; - - base_hits = record.get_base_hits(); - gene_size = record.gene().length(); - - gene_coverage = (static_cast<double>(base_hits)/static_cast<double>(gene_size))*100; - - if(gene_coverage > threshold) - return gene_coverage; - return -1; -} - -/** - * Writes header to output file when - * reading from stdin - */ -void bam_stream_header() { - header h; - char sep = ','; - - std::cout << h.level << sep << h.iteration << sep - << h.gene_id << sep << h.gene_fraction << sep - << h.hits << '\n'; -} - -/** - * Writes header to output file when - * reading from sam file - */ -void file_header(const std::string &out_fp, const std::string &sam_fp) { - header h; - std::ofstream out(out_fp.c_str(), std::ofstream::app ); - char sep = ','; - - //out << "@" << sam_fp << '\n'; - out << h.level << sep << h.iteration << sep - << h.gene_id << sep << h.gene_fraction << sep - << h.hits << '\n'; - out.close(); -} - -/** - * - */ -void create_header(cmd_args &args) { - if(args.bam_stream) { - bam_stream_header(); - } - else { - file_header(args.out_fp, args.sam_fp); - } -} - -/** - * Writes results to output file when reading from - * stdin - */ -void bam_stream_results(std::vector<FastaRecord> &records, - const int &level, const int &iteration, - cmd_args &args) { - - double gene_fraction; - int hits_seen; - std::string id; - char sep = ','; - - for(auto &rec : records) { - gene_fraction = coverage(rec, args.threshold); - hits_seen = rec.gene_hits(); - id = rec.gene_id(); - - if(gene_fraction > 0) { - std::cout << level << sep << iteration << sep - << id << sep << gene_fraction << sep - << hits_seen << '\n'; - } - } -} - -/** - * Write results when reading sam file from - * path - */ -void file_results(std::vector<FastaRecord> &records, - const int level, const int &iteration, - cmd_args &args) { - - std::ofstream out(args.out_fp.c_str(), std::ofstream::app); - - double gene_fraction; - int hits_seen; - std::string id; - char sep = ','; - - for(auto &rec : records) { - gene_fraction = coverage(rec, args.threshold); - hits_seen = rec.gene_hits(); - id = rec.gene_id(); - - if(gene_fraction > 0) { - out << level << sep << iteration << sep - << id << sep << gene_fraction << sep - << hits_seen << '\n'; - } - } - out.close(); -} - -/** - * - */ -void report_results(std::vector<FastaRecord> &records, - const int level, const int &iteration, - cmd_args &args) { - - if(args.bam_stream) { - bam_stream_results(records,level,iteration,args); - } - else { - file_results(records,level,iteration,args); - } -} - -/** - * Generates a sequence of samples from sam file specified - * by the starting level, max level, and skip pattern - */ -void generate_samples(std::vector<FastaRecord> &records, - std::vector<Alignments> &alignments, - cmd_args &args) { - - int read_count = alignments.size(); - int sample_size; - - srand(unsigned(time(0))); - - std::vector<int> sequence(read_count); - iota(sequence.begin(), sequence.end(), 0); - - create_header(args); - - for(int level = args.min; level <= args.max; level += args.skip) { - for(int sample = 0; sample < args.s_per_lev; sample++) { - random_shuffle(sequence.begin(), sequence.end(), randomize); - sample_size = round(((static_cast<double>(level)/100)*read_count)); - std::vector<int> chosen(sequence.begin(), sequence.begin()+sample_size); - - for(int a_idx = 0; a_idx < chosen.size(); a_idx++) { - std::string rname = alignments[chosen[a_idx]].rname(); - int gene_idx = FastaRecord::find_gene(records, rname); - analyze_coverage(records[gene_idx], alignments[chosen[a_idx]]); - } - report_results(records,level,sample+1,args); - FastaRecord::reset_base_hits(records); - FastaRecord::reset_gene_hits(records); - } - } -} - -#endif /* SAM_RATIO_H */
--- a/gene_fraction/src/alignment_util.h Sun Feb 21 23:20:22 2016 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,96 +0,0 @@ -#ifndef ALIGNMENT_UTIL_H -#define ALIGNMENT_UTIL_H - -#include <string> -#include <vector> - -#include <boost/algorithm/string.hpp> - -#include "int_util.h" - -// macro to check if read mapped -#define READ_UNMAPPED 4 - -/** - * Splits alignment into separate parts - */ -std::vector<std::string> split_alignment(std::string &alignment) { - std::vector<std::string> parts; - - boost::trim_if(alignment, boost::is_any_of("\t ")); - // split on tab delimeter - boost::split(parts, alignment, boost::is_any_of("\t "), boost::token_compress_on); - - return parts; -} - -/** - * Validates bit flag - */ -bool is_good_flag(const int &bit_flag) { - if( (bit_flag & READ_UNMAPPED) > 0) return false; - return true; -} - -/** - * Validates rname - */ -bool is_good_rname(const std::string &rname) { - return rname.compare("*") != 0; -} - -/** - * Validates pos - */ -bool is_good_pos(const int &pos) { - return pos > 0; -} - -/** - * Validates cigar - */ -bool is_good_cigar(const std::string &cigar) { - return cigar.compare("*") != 0; -} - -/** - * Validates seq - */ -bool is_good_seq(const std::string &seq) { - return seq.compare("*") != 0; -} - -/** - * Validates alignment fields - */ -bool fields_are_good(std::vector<std::string> &parts) { - int bit_flag = s_to_i(parts[1]); - int pos = s_to_i(parts[3]); - - std::string rname = parts[2]; - std::string cigar = parts[5]; - std::string seq = parts[9]; - - if(!(is_good_flag(bit_flag))) return false; - if(!(is_good_pos(pos))) return false; - if(!(is_good_rname(rname))) return false; - if(!(is_good_cigar(cigar))) return false; - if(!(is_good_seq(seq))) return false; - - return true; -} - -/** - * Stores alignments that pass validity checks - */ -bool is_good_alignment(std::string &alignment) { - std::vector<std::string> alignment_parts; - - alignment_parts = split_alignment(alignment); - - if(!(fields_are_good(alignment_parts))) - return false; - return true; -} - -#endif /* ALIGNMENT_UTIL_H */
--- a/gene_fraction/src/args.h Sun Feb 21 23:20:22 2016 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,90 +0,0 @@ -#ifndef ARGS_H -#define ARGS_H - -#include "int_util.h" - -#include <string> -#include <vector> -#include <list> - -static void usage() { - fprintf(stderr, "\n"); - fprintf(stderr, "Program: Coverage Sampler \n"); - fprintf(stderr, "Contact: Chris Dean <cdean11@rams.colostate.edu\n\n"); - fprintf(stderr, "Usage: csa [options]\n\n"); - fprintf(stderr, "Options:\n\n"); - fprintf(stderr, " -amr_fp amr database path\n"); - fprintf(stderr, " -sam_fp sam file path\n"); - fprintf(stderr, " -min starting level\n"); - fprintf(stderr, " -max ending level\n"); - fprintf(stderr, " -skip amount of levels to skip\n"); - fprintf(stderr, " -t gene fraction threshold\n"); - fprintf(stderr, " -samples amount of samples per level\n"); - fprintf(stderr, " -d directory parsing\n"); - fprintf(stderr, " -bam bam file parsing\n"); - fprintf(stderr, " -out_fp output file path\n\n"); -} - -/** - * Encapsulates information input - * from the command line. - */ -struct cmd_args { - std::string amr_fp; - std::string sam_fp; - std::list<std::string> sam_dir_fp; - std::string out_fp; - - int threshold; - int min; - int max; - int skip; - int s_per_lev; - - bool sam_dir = false; /* This will be set to true when parsing a - directory of sam files. */ - bool bam_stream = false; /* This will be set to true when executing - samtools view -h example.bam | csa > output - from the command line. */ -}; - -/** - * Returns a struct of command line arguments. - */ -static inline cmd_args -parse_command_line(int argc, char *argv[]) { - std::vector<std::string> args(argv, argv + argc); - - cmd_args arg; - - for(int i = 1; i < argc; i++) { - if(args[i].compare("-amr_fp") == 0) - arg.amr_fp = args[++i]; - else if(args[i].compare("-sam_fp") == 0) - arg.sam_fp = args[++i]; - else if(args[i].compare("-out_fp") == 0) - arg.out_fp = args[++i]; - else if(args[i].compare("-t") == 0) - arg.threshold = s_to_i(args[++i]); - else if(args[i].compare("-min") == 0) - arg.min = s_to_i(args[++i]); - else if(args[i].compare("-max") == 0) - arg.max = s_to_i(args[++i]); - else if(args[i].compare("-skip") == 0) - arg.skip = s_to_i(args[++i]); - else if(args[i].compare("-samples") == 0) - arg.s_per_lev = s_to_i(args[++i]); - else if(args[i].compare("-d") == 0) - arg.sam_dir = true; - else if(args[i].compare("-bam") == 0) - arg.bam_stream = true; - else { - usage(); - exit(EXIT_FAILURE); - } - } - - return arg; -} - -#endif /* ARGS_H */
--- a/gene_fraction/src/dir_util.h Sun Feb 21 23:20:22 2016 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,43 +0,0 @@ -#ifndef DIR_UTIL_H -#define DIR_UTIL_H - -#include <list> -#include <iostream> -#include <string> -#include <dirent.h> - -/* - * Parses a directory of sam files - */ -static inline std::list<std::string> -parse_sam_dir(const std::string &directory) { - DIR *dir; - struct dirent *ent; - std::string dir_path = directory; - - dir = opendir(dir_path.c_str()); - // is dir open/valid? - if(dir == NULL) { - std::cout << "Not a valid directory" << std::endl; - exit(EXIT_FAILURE); - } - - std::list<std::string> sam_files; - std::string fn; - std::string ext; - std::string file_type = ".sam"; - - // get all files with a .sam file extension - while((ent = readdir(dir)) != NULL) { - fn = dir_path + std::string(ent->d_name); - ext = fn.substr(fn.length()-4, fn.length()); - if(ext.compare(file_type) == 0) { - sam_files.push_back(fn); - } - } - closedir(dir); - - return sam_files; -} - -#endif /* DIR_UTIL_H */
--- a/gene_fraction/src/gene_fraction.xml Sun Feb 21 23:20:22 2016 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,57 +0,0 @@ -<tool id="gene_fraction" name="Coverage sampler" version="0.1.0"> - <requirements> - </requirements> - <stdio> - <exit_code range="1:" /> - </stdio> - <command><![CDATA[ - csa - -amr_fp $input1 - -sam_fp $input2 - -min $min - -max $max - -skip $skip - -t $threshold - -samples $samples - -out_fp $output1 - ]]></command> - <inputs> - <param type="data" name="input1" format="fasta" /> - <param type="data" name="input2" format="sam" /> - <param name="min" type="integer" label="Minimum sample value" - value="1" min="1" max="100" help="(-min)" /> - <param name="max" type="integer" label="Maximum sample value" - value="1" min="1" max="100" help="(-max)" /> - <param name="skip" type="integer" label="Amount of levels to skip" - value="1" min="1" max="100" help="(-skip)" /> - <param name="threshold" type="integer" label="Gene fraction threshold" - value="1" min="1" max="100" help="(-t)" /> - <param name="samples" type="integer" label="Amount of samples per level" - value="1" min="1" max="100" help="(-samples)" /> - </inputs> - <outputs> - <data name="output1" format="tabular" /> - </outputs> - <help><![CDATA[ - -Program: Coverage Sampler -Contact: Chris Dean <cdean11@rams.colostate.edu - -Usage: csa [options] - -Options: - - -amr_fp amr database path - -sam_fp sam file path - -min starting level - -max ending level - -skip amount of levels to skip - -t gene fraction threshold - -samples amount of samples per level - -d directory parsing - -bam bam file parsing - -out_fp output file path - - - ]]></help> -</tool>
--- a/gene_fraction/src/int_util.h Sun Feb 21 23:20:22 2016 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,29 +0,0 @@ -#ifndef INT_UTIL_H -#define INT_UTIL_H - -#include <string> -#include <sstream> - -/** - * Given a string, return its integer. - */ -static inline int -s_to_i(const std::string &s) { - std::istringstream ss(s); - int i; - ss >> i; - return i; -} - -/** - * Given an integer, return a random number - * between 0 and i. - */ -static inline int -randomize(const int &i) { - return rand() % i; -} - -#endif /*INT_UTIL_H */ - -
--- a/gene_fraction/src/main.cpp Sun Feb 21 23:20:22 2016 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,37 +0,0 @@ -#include <string> -#include <iostream> -#include <vector> - -#include "int_util.h" -#include "dir_util.h" -#include "args.h" -#include "Fasta.h" -#include "Sam.h" -#include "SamRatio.h" - -using namespace std; - -int main(int argc, char *argv[]) { - cmd_args args; - args = parse_command_line(argc, argv); - - Fasta f(args.amr_fp); - f.read_fasta(args.amr_fp); - - if(args.sam_dir) { - list<string> sam_files = parse_sam_dir(args.sam_fp); - for(auto &fn : sam_files) { - args.sam_fp = fn; - Sam s(args.sam_fp); - s.read_sam(args); - generate_samples(f.records, s.alignment, args); - } - } - else { - Sam s(args.sam_fp); - s.read_sam(args); - generate_samples(f.records, s.alignment, args); - } - - return 0; -}
--- a/gene_fraction/src/ref.fa Sun Feb 21 23:20:22 2016 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,2 +0,0 @@ ->seq1 -AAAAAAAAAA
--- a/gene_fraction/src/res Sun Feb 21 23:20:22 2016 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,3 +0,0 @@ -@test.sam -Level,Iteration,Gene id,Gene Fraction,Hits -100,1,seq1,50,1
--- a/gene_fraction/src/test.sam Sun Feb 21 23:20:22 2016 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,1 +0,0 @@ -QNAME 0 seq1 3 0 5M 0 0 0 AAAAA ;;;;;
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_dependencies.xml Mon Feb 22 04:52:09 2016 -0500 @@ -0,0 +1,19 @@ +<?xml version="1.0"?> +<tool_dependency> + <package name="csa" version="0.1"> + <install version="1.0"> + <actions> + <action type="shell_command">git clone --recursive https://github.com/ChrisD11/csa.git</action> + <action type="shell_command">make</action> + <action type="move_file"> + <source>csa</source> + <destination>$INSTALL_DIR/bin</destination> + </action> + <action type="set_environment"> + <environment_variable name="PATH" action="prepend_to">$INSTALL_DIR/bin</environment_variable> + </action> + </actions> + </install> + <readme>Compiling csa requires a C++ compiler</readme> + </package> +</tool_dependency>