changeset 1:20e03d65fc25 draft

planemo upload for repository https://github.com/ChrisD11/Tools commit 7ad4de2f3fdb2e49cefcfc465cc36c96a1602441
author chrisd
date Mon, 22 Feb 2016 04:52:09 -0500
parents 2aad8349d84b
children ed4a3695d6e8
files README csa gene_fraction/src/Alignments.cpp gene_fraction/src/Alignments.h gene_fraction/src/Alignments.o gene_fraction/src/Fasta.cpp gene_fraction/src/Fasta.h gene_fraction/src/Fasta.o gene_fraction/src/FastaRecord.cpp gene_fraction/src/FastaRecord.h gene_fraction/src/FastaRecord.o gene_fraction/src/Makefile gene_fraction/src/Sam.cpp gene_fraction/src/Sam.h gene_fraction/src/Sam.o gene_fraction/src/SamRatio.h gene_fraction/src/alignment_util.h gene_fraction/src/args.h gene_fraction/src/dir_util.h gene_fraction/src/gene_fraction.xml gene_fraction/src/int_util.h gene_fraction/src/main.cpp gene_fraction/src/main.o gene_fraction/src/ref.fa gene_fraction/src/res gene_fraction/src/test.sam tool_dependencies.xml
diffstat 26 files changed, 19 insertions(+), 1027 deletions(-) [+]
line wrap: on
line diff
Binary file csa has changed
--- a/gene_fraction/src/Alignments.cpp	Sun Feb 21 23:20:22 2016 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,35 +0,0 @@
-#include "Alignments.h"
-#include "int_util.h"
-
-#include <boost/algorithm/string.hpp>
-
-#include <iostream>
-#include <sstream>
-
-Alignments::Alignments(std::string alignment) : _alignment(alignment) {
-	fill_alignment_fields(alignment);
-}
-
-void Alignments::fill_alignment_fields(const std::string &alignment) {
-	std::istringstream ss(alignment);
-	ss >> field.QNAME >> field.FLAG >> field.RNAME >> field.POS >>
-	      field.MAPQ >> field.CIGAR >> field.RNEXT >> field.PNEXT >>
-	      field.TLEN >> field.SEQ >> field.QUAL;
-}
-
-std::vector<std::pair<int,char>> Alignments::cigar() {
-	return get_cigar_operations(field.CIGAR);
-}
-
-std::vector<std::pair<int,char>> Alignments::get_cigar_operations(const std::string &cigar) {
-	std::vector<std::pair<int,char>> p;
-    	int count;
-    	char operation;
-
-    	std::istringstream ss(cigar);
-    	while(ss >> count >> operation) {
-        	p.push_back(std::make_pair(count, operation));
-    	}
-
-    	return p;
-}
--- a/gene_fraction/src/Alignments.h	Sun Feb 21 23:20:22 2016 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,68 +0,0 @@
-#ifndef ALIGNMENTS_H
-#define ALIGNMENTS_H
-
-#include <string>
-#include <vector>
-
-/**
- * Stores information about an alignment
- */
-struct alignment_fields {
-    std::string QNAME;
-    int FLAG;
-    std::string RNAME;
-    int POS;
-    int MAPQ;
-    std::string CIGAR;
-    std::string RNEXT;
-    int PNEXT;
-    int TLEN;
-    std::string SEQ;
-    std::string QUAL;
-};
-
-/**
- * Class for dealing with alignments
- */
-class Alignments {
-public:
-	/**
- 	 * Ctor that initializes alignment
- 	 */ 
-	Alignments(std::string alignment);
-
-	/**
- 	 * Stores information about each of the eleven
- 	 * required alignment fields
- 	 */ 
-    	void fill_alignment_fields(const std::string &alignment);
-
-    	std::vector<std::pair<int,char>> cigar();
-
-	inline std::string alignment() const	    { return _alignment;  };
-
-    	inline std::string qname() const            { return field.QNAME; };
-    	inline std::string rname() const            { return field.RNAME; };
-    	inline std::string cigar() const            { return field.CIGAR; };
-    	inline std::string rnext() const            { return field.RNEXT; };
-    	inline std::string seq() const              { return field.SEQ;   };
-    	inline std::string qual() const             { return field.QUAL;  };
-
-    	inline int flag() const                     { return field.FLAG;  };
-    	inline int pos() const                      { return field.POS;   };
-    	inline int mapq() const                     { return field.MAPQ;  };
-    	inline int pnext() const                    { return field.PNEXT; };
-    	inline int tlen() const			    { return field.TLEN;  };
-
-private:
-	/**
-         * Returns a pair of cigar operations as (occurrence, operation)
-         * Ex: 10M5I -> (10, M), (5, I)
-         */
-    	std::vector<std::pair<int,char>> get_cigar_operations(const std::string &cigar);
-
-    	std::string _alignment;
-    	alignment_fields field;
-};
-
-#endif /* ALIGNMENTS_H */
Binary file gene_fraction/src/Alignments.o has changed
--- a/gene_fraction/src/Fasta.cpp	Sun Feb 21 23:20:22 2016 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,39 +0,0 @@
-#include "Fasta.h"
-#include "args.h"
-
-#include <iostream>
-#include <fstream>
-#include <vector>
-#include <string>
-
-Fasta::Fasta(std::string amr_fp) : _amr_fp(amr_fp) {}
-
-void Fasta::read_fasta(const std::string &amr_fp) {
-	std::ifstream in(amr_fp.c_str());
-	if(!in) {
-		usage();
-		exit(EXIT_FAILURE);
-	}
-
-	std::string gene_id, gene, line;
-	while(std::getline(in, line)) {
-		std::size_t gene_idx = line.find(" ");
-		
-		if(gene_idx != std::string::npos)
-			gene_id = line.substr(1, gene_idx-1);
-		else
-			gene_id = line.substr(1, line.length());
-
-		std::getline(in, gene);
-		records.push_back(FastaRecord(gene_id, gene));
-	}
-	in.close();
-	
-	FastaRecord::sort_by_gene_id(records);
-}
-
-
-
-
-
-
--- a/gene_fraction/src/Fasta.h	Sun Feb 21 23:20:22 2016 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,28 +0,0 @@
-#ifndef FASTA_H
-#define FASTA_H
-
-#include <string>
-#include <vector>
-#include "FastaRecord.h"
-
-/**
- * Class for dealing with fasta files
- */
-class Fasta {
-public:
-	/** 
- 	 * Constructor that sets amr file path 
- 	 */
-	Fasta(std::string amr_fp);
-
-	/**
- 	 * Reads fasta file from file path 
- 	 */
-	void read_fasta(const std::string &amr_fp);
-	
-	std::vector<FastaRecord> records;
-private:
-	std::string _amr_fp;
-};
-
-#endif /* FASTA_H */
Binary file gene_fraction/src/Fasta.o has changed
--- a/gene_fraction/src/FastaRecord.cpp	Sun Feb 21 23:20:22 2016 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,62 +0,0 @@
-#include "FastaRecord.h"
-
-#include <algorithm>
-
-FastaRecord::FastaRecord(std::string gene_id, std::string gene) :
-	_gene_id(gene_id), _gene(gene), _base_hits(_gene.length(), 0),
-	_gene_hits(0) {}
-
-std::string FastaRecord::gene_id() const { return _gene_id; }
-
-std::string FastaRecord::gene() const { return _gene; }
-
-void FastaRecord::update_gene_hits() {
-	_gene_hits++;
-}
-
-int FastaRecord::gene_hits() const {
-	return _gene_hits;
-}
-
-int FastaRecord::get_base_hits() const {
-	return static_cast<int>(count(_base_hits.begin(), _base_hits.end(), 1));
-}
-
-int FastaRecord::find_gene(const std::vector<FastaRecord> &records,
-			   const std::string &gene_id, std::string seq) {
-	int gene_index;
-
-	std::vector<FastaRecord>::const_iterator low;
-	// binary search for fasta record index
-	low = std::lower_bound(records.begin(), records.end(), FastaRecord(gene_id, seq),
-                                   [](const FastaRecord &a, const FastaRecord &b)
-                                   { return a.gene_id() < b.gene_id(); });
-	gene_index = (low - records.begin());
-
-	return gene_index;
-}
-
-void FastaRecord::sort_by_gene_id(std::vector<FastaRecord> &records) {
-	// sort records by gene id
-	sort(records.begin(), records.end(), [](const FastaRecord &a, const FastaRecord &b) { return a.gene_id() < b.gene_id(); });
-}
-
-void FastaRecord::reset_base_hits(std::vector<FastaRecord> &records) {
-	for_each(records.begin(), records.end(), [](FastaRecord &a) { std::fill(a.base_hits().begin(), a.base_hits().end(), 0); });
-}
-
-void FastaRecord::reset_gene_hits(std::vector<FastaRecord> &records) {
-	for_each(records.begin(), records.end(), [](FastaRecord &a) { a._gene_hits = 0; });
-}
-
-std::vector<int> &FastaRecord::base_hits() {
-        return _base_hits;
-}
-
-
-
-
-
-
-	
-
--- a/gene_fraction/src/FastaRecord.h	Sun Feb 21 23:20:22 2016 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,86 +0,0 @@
-#ifndef FASTA_RECORD_H
-#define FASTA_RECORD_H
-
-#include <string>
-#include <vector>
-
-/**
- * Class for dealing with fasta records
- */
-class FastaRecord {
-public:
-	/**
- 	 * Ctor that initializes gene id and gene
- 	 */ 
-	FastaRecord(std::string gene_id, std::string gene);
-
-	/**
- 	 * Returns a string gene id
- 	 */
-	std::string gene_id() const;
-
-	/**
- 	 * Returns the gene associated with gene id
- 	 */
-	std::string gene() const;
-
-	/**
- 	 * Returns the total base hits for a gene
- 	 */ 
-	int get_base_hits() const;
-
-	/**
- 	 * Returns the amount of genes that were hit
- 	 * during the gene fraction calculation
- 	 */ 
-	int gene_hits() const;
-	
-	/**
- 	 *
- 	 */ 
-	void update_base_hits(const int &index);
-
-	/**
- 	 *
- 	 */ 
-	void update_gene_hits();
-
-	/**
- 	 * Searches for a fasta record corresponding
- 	 * to gene id
- 	 */ 
-	static int find_gene(const std::vector<FastaRecord> &records, 
-			     const std::string &gene_id, 
-                             std::string seq = "");
-
-	/**
- 	 * Sorts fasta records by gene id
- 	 */ 
-	static void sort_by_gene_id(std::vector<FastaRecord> &records);
-
-	/**
- 	 * Resets base hits vector to 0's. 
- 	 * This occurs after each sample is processed
- 	 */ 
-	static void reset_base_hits(std::vector<FastaRecord> &records);
-
-	/**
- 	 * Resets gene hits primitive to 0.
- 	 * This happens after each sample is processed 
- 	 */
-	static void reset_gene_hits(std::vector<FastaRecord> &records);
-
-	std::vector<int> &base_hits();
-
-	std::string _gene_id;
-	std::string _gene;
-	std::vector<int> _base_hits;
-
-private:
-	int _gene_hits;
-};
-
-
-
-
-#endif /* FASTA_RECORD_H */
Binary file gene_fraction/src/FastaRecord.o has changed
--- a/gene_fraction/src/Makefile	Sun Feb 21 23:20:22 2016 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,14 +0,0 @@
-output: main.o Sam.o Alignments.o Fasta.o FastaRecord.o
-	g++ -std=c++11 main.o Sam.o Alignments.o Fasta.o FastaRecord.o -o csa
-main.o: main.cpp
-	g++ -c -std=c++11 main.cpp
-Sam.o: Sam.cpp
-	g++ -c -std=c++11 Sam.cpp
-Alignments.o: Alignments.cpp
-	g++ -c -std=c++11 Alignments.cpp
-Fasta.o: Fasta.cpp
-	g++ -c -std=c++11 Fasta.cpp
-FastaRecord.o: FastaRecord.cpp
-	g++ -c -std=c++11 FastaRecord.cpp
-clean:
-	rm *.o csa
--- a/gene_fraction/src/Sam.cpp	Sun Feb 21 23:20:22 2016 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,41 +0,0 @@
-#include "Sam.h"
-#include "args.h"
-#include "dir_util.h"
-#include "alignment_util.h"
-
-#include <iostream>
-#include <fstream>
-
-Sam::Sam(std::string sam_fp) : _sam_fp(sam_fp) {}
-
-void Sam::read_sam(cmd_args args) {
-	if(args.bam_stream) read_from_stdin();
-	else read_from_file(args.sam_fp);
-}
-
-void Sam::read_from_stdin() {
-	std::string line;
-	while(std::getline(std::cin, line)) {
-		if(line[0] == '@') continue;
-		alignment.push_back(line);
-	}
-}
-
-void Sam::read_from_file(const std::string &sam_fp) {
-	std::ifstream in(sam_fp.c_str());
-	if(!in) {
-		usage();
-		exit(EXIT_FAILURE);
-	}
-
-	std::string line;
-	while(getline(in, line)) {
-		if(line[0] == '@') continue;
-		if(is_good_alignment(line))
-			alignment.push_back(line);
-	}
-
-	in.close();
-}
-
-
--- a/gene_fraction/src/Sam.h	Sun Feb 21 23:20:22 2016 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,48 +0,0 @@
-#ifndef SAM_H
-#define SAM_H
-
-#include <string>
-#include <vector>
-
-#include "args.h"
-#include "Alignments.h"
-
-/**
- * Class for dealing with sam files
- */
-
-class Sam {
-public:
-	/**
- 	 * Ctor initializes sam file path
- 	 */ 
-	Sam(std::string sam_fp);
-	void read_sam(cmd_args args);
-
-	/**
- 	 * Reads sam file from stdin 
- 	 */ 
-	void read_from_stdin();
-
-	/**
-         * Reads sam file from directory or file path
-         */	 
-	void read_from_file(const std::string &sam_fp);
-
-	/**
- 	 *
- 	 */	 
-	void read_from_dir(const std::string &sam_dir_fp);
-
-	std::vector<Alignments> alignment;
-
-private:
-	std::string _sam_fp;
-};
-
-
-
-
-
-#endif /* SAM_H */
-
Binary file gene_fraction/src/Sam.o has changed
--- a/gene_fraction/src/SamRatio.h	Sun Feb 21 23:20:22 2016 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,248 +0,0 @@
-#ifndef SAM_RATIO_H
-#define SAM_RATIO_H
-
-#include <iostream>
-#include <fstream>
-#include <algorithm>
-
-#include "FastaRecord.h"
-#include "Alignments.h"
-#include "args.h"
-
-typedef std::vector<std::pair<int,char>> cigar_str;
-
-/**
- *
- */
-struct header {
-	std::string level =             "Level";
-        std::string iteration =         "Iteration";
-        std::string gene_id =           "Gene id";
-        std::string gene_fraction =     "Gene Fraction";
-        std::string hits =              "Hits";
-};
-
-/**
- * Reports the total number of bases that were touched for each
- * gene. This is largely calculated using the positional and seq
- * information found in fields four and ten of each alignment
- */
-void analyze_coverage(FastaRecord &record, Alignments &alignment) {
-	record.update_gene_hits();
-
-	cigar_str cigar = alignment.cigar();
-
-	int len;
-	char op;
-
-	int occurrence;
-	int pos_in_gene = alignment.pos();
-
-	int start, stop;
-	int base_hits = record._base_hits.size(); // func this
-	int read_length = alignment.seq().length(); //func this
-
-	if(pos_in_gene == 1) {
-		occurrence = 0;
-		for(int i = 0; i < cigar.size(); i++) {
-			len = cigar[i].first;
-			op = cigar[i].second;
-
-			switch(op) {
-				case 'M':
-					occurrence += len;
-					break;
-				case 'I':
-					occurrence += len;
-					break;
-				default:
-					break;
-			}
-		}
-
-		start = read_length - occurrence;
-		stop = start + read_length;
-
-		for(int i = start; i < base_hits; i++) {
-			if(i == stop) break;
-			record._base_hits[i] = 1;
-		}
-	}
-	else {
-		start = pos_in_gene - 1;
-		stop = start + read_length;
-
-		for(int i = start; i < base_hits; i++) {
-			if(i == stop) break;
-			record._base_hits[i] = 1;
-		}
-	}
-}
-
-/**
- * Returns gene fraction of fasta record
- * Returns -1 if gene fraction is not greater than threshold
- */
-double coverage(const FastaRecord &record, const int &threshold) {
-        double gene_coverage;
-
-        int base_hits, gene_size;
-
-        base_hits = record.get_base_hits();
-        gene_size = record.gene().length();
-
-        gene_coverage = (static_cast<double>(base_hits)/static_cast<double>(gene_size))*100;
-
-        if(gene_coverage > threshold)
-                return gene_coverage;
-        return -1;
-}
-
-/**
- * Writes header to output file when
- * reading from stdin
- */
-void bam_stream_header() {
-	header h;
-	char sep = ',';
-
-	std::cout << h.level << sep << h.iteration << sep 
-                  << h.gene_id << sep << h.gene_fraction << sep 
-                  << h.hits << '\n';
-}
-
-/**
- * Writes header to output file when
- * reading from sam file
- */
-void file_header(const std::string &out_fp, const std::string &sam_fp) {
-	header h;
-	std::ofstream out(out_fp.c_str(), std::ofstream::app );
-	char sep = ',';
-
-	//out << "@" << sam_fp << '\n';
-	out << h.level << sep << h.iteration << sep 
-            << h.gene_id << sep << h.gene_fraction << sep 
-            << h.hits << '\n';
-	out.close();
-}
-
-/**
- *
- */
-void create_header(cmd_args &args) {
-	if(args.bam_stream) {
-		bam_stream_header();
-	}
-	else {
-		file_header(args.out_fp, args.sam_fp);
-	}
-}
-
-/**
- * Writes results to output file when reading from
- * stdin
- */
-void bam_stream_results(std::vector<FastaRecord> &records,
-                        const int &level, const int &iteration,
-                        cmd_args &args) {
-
-	double gene_fraction;
-	int hits_seen;
-	std::string id;
-	char sep = ',';
-
-	for(auto &rec : records) {
-		gene_fraction = coverage(rec, args.threshold);
-		hits_seen = rec.gene_hits();
-		id = rec.gene_id();
-
-		if(gene_fraction > 0) {
-			std::cout << level << sep << iteration << sep
-			          << id << sep << gene_fraction << sep
-			          << hits_seen << '\n';			
-		}
-	}
-}
-
-/**
- * Write results when reading sam file from
- * path
- */
-void file_results(std::vector<FastaRecord> &records,
-                  const int level, const int &iteration,
-                  cmd_args &args) {
-
-	std::ofstream out(args.out_fp.c_str(), std::ofstream::app);
-	
-	double gene_fraction;
-	int hits_seen;
-	std::string id;
-	char sep = ',';
-
-	for(auto &rec : records) {
-		gene_fraction = coverage(rec, args.threshold);
-		hits_seen = rec.gene_hits();
-		id = rec.gene_id();
-
-		if(gene_fraction > 0) {
-			out << level << sep << iteration << sep
-			    << id << sep << gene_fraction << sep
-                            << hits_seen << '\n';
-		}
-	}
-	out.close();
-}
-
-/**
- *
- */
-void report_results(std::vector<FastaRecord> &records,
-		    const int level, const int &iteration,
-		    cmd_args &args) {
-
-	if(args.bam_stream) {
-		bam_stream_results(records,level,iteration,args);
-	}
-	else {
-		file_results(records,level,iteration,args);
-	}
-}
-
-/**
- * Generates a sequence of samples from sam file specified
- * by the starting level, max level, and skip pattern
- */
-void generate_samples(std::vector<FastaRecord> &records,
-                      std::vector<Alignments> &alignments,
-		      cmd_args &args) {
-
-	int read_count = alignments.size();
-	int sample_size;
-
-	srand(unsigned(time(0)));
-
-	std::vector<int> sequence(read_count);
-	iota(sequence.begin(), sequence.end(), 0);
-
-	create_header(args);	
-
-	for(int level = args.min; level <= args.max; level += args.skip) {
-		for(int sample = 0; sample < args.s_per_lev; sample++) {
-			random_shuffle(sequence.begin(), sequence.end(), randomize);
-			sample_size = round(((static_cast<double>(level)/100)*read_count));
-			std::vector<int> chosen(sequence.begin(), sequence.begin()+sample_size);
-
-			for(int a_idx = 0; a_idx < chosen.size(); a_idx++) {
-				std::string rname = alignments[chosen[a_idx]].rname();
-				int gene_idx = FastaRecord::find_gene(records, rname);	
-				analyze_coverage(records[gene_idx], alignments[chosen[a_idx]]);
-			}
-			report_results(records,level,sample+1,args);
-			FastaRecord::reset_base_hits(records);
-			FastaRecord::reset_gene_hits(records);
-		}
-	}
-}
-
-#endif /* SAM_RATIO_H */
--- a/gene_fraction/src/alignment_util.h	Sun Feb 21 23:20:22 2016 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,96 +0,0 @@
-#ifndef ALIGNMENT_UTIL_H
-#define ALIGNMENT_UTIL_H
-
-#include <string>
-#include <vector>
-
-#include <boost/algorithm/string.hpp>
-
-#include "int_util.h"
-
-// macro to check if read mapped
-#define READ_UNMAPPED 4
-
-/**
- * Splits alignment into separate parts
- */
-std::vector<std::string> split_alignment(std::string &alignment) {
-    	std::vector<std::string> parts;
-
-    	boost::trim_if(alignment, boost::is_any_of("\t "));
-	// split on tab delimeter
-    	boost::split(parts, alignment, boost::is_any_of("\t "), boost::token_compress_on);
-	
-    	return parts;
-}
-
-/**
- * Validates bit flag
- */
-bool is_good_flag(const int &bit_flag) {
-    	if( (bit_flag & READ_UNMAPPED) > 0) return false;
-    	return true;
-}
-
-/**
- * Validates rname
- */
-bool is_good_rname(const std::string &rname) {
-    	return rname.compare("*") != 0;
-}
-
-/**
- * Validates pos
- */
-bool is_good_pos(const int &pos) {
-    	return pos > 0;
-}
-
-/**
- * Validates cigar
- */
-bool is_good_cigar(const std::string &cigar) {
-    	return cigar.compare("*") != 0;
-}
-
-/**
- * Validates seq
- */
-bool is_good_seq(const std::string &seq) {
-    	return seq.compare("*") != 0;
-}
-
-/**
- * Validates alignment fields
- */
-bool fields_are_good(std::vector<std::string> &parts) {
-    	int bit_flag = s_to_i(parts[1]);
-    	int pos = s_to_i(parts[3]);
-
-    	std::string rname = parts[2];
-    	std::string cigar = parts[5];
-    	std::string seq = parts[9];
-
-    	if(!(is_good_flag(bit_flag))) return false;
-    	if(!(is_good_pos(pos))) return false;
-    	if(!(is_good_rname(rname))) return false;
-    	if(!(is_good_cigar(cigar))) return false;
-    	if(!(is_good_seq(seq))) return false;
-
-    	return true;
-}
-
-/**
- * Stores alignments that pass validity checks
- */
-bool is_good_alignment(std::string &alignment) {
-    	std::vector<std::string> alignment_parts;
-
-    	alignment_parts = split_alignment(alignment);
-
-    	if(!(fields_are_good(alignment_parts)))
-        	return false;
-    	return true;
-}
-
-#endif /* ALIGNMENT_UTIL_H */
--- a/gene_fraction/src/args.h	Sun Feb 21 23:20:22 2016 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,90 +0,0 @@
-#ifndef ARGS_H
-#define ARGS_H
-
-#include "int_util.h"
-
-#include <string>
-#include <vector>
-#include <list>
-
-static void usage() {
-        fprintf(stderr, "\n");
-        fprintf(stderr, "Program: Coverage Sampler \n");
-        fprintf(stderr, "Contact: Chris Dean <cdean11@rams.colostate.edu\n\n");
-        fprintf(stderr, "Usage: csa [options]\n\n");
-        fprintf(stderr, "Options:\n\n");
-        fprintf(stderr, "    -amr_fp    amr database path\n");
-        fprintf(stderr, "    -sam_fp    sam file path\n");
-        fprintf(stderr, "    -min       starting level\n");
-        fprintf(stderr, "    -max       ending level\n");
-        fprintf(stderr, "    -skip      amount of levels to skip\n");
-        fprintf(stderr, "    -t         gene fraction threshold\n");
-        fprintf(stderr, "    -samples   amount of samples per level\n");
-        fprintf(stderr, "    -d         directory parsing\n");
-        fprintf(stderr, "    -bam       bam file parsing\n");
-        fprintf(stderr, "    -out_fp    output file path\n\n");
-}
-
-/**
- * Encapsulates information input
- * from the command line.
- */
-struct cmd_args {
-	std::string amr_fp;
-	std::string sam_fp;
-	std::list<std::string> sam_dir_fp;
-	std::string out_fp;
-	
-	int threshold;
-	int min;
-	int max;
-	int skip;
-	int s_per_lev;
-
-	bool sam_dir = false;		/* This will be set to true when parsing a
-                                           directory of sam files. */
-	bool bam_stream = false;	/* This will be set to true when executing
-                                           samtools view -h example.bam | csa > output
-                                           from the command line. */
-};
-
-/**
- * Returns a struct of command line arguments.
- */
-static inline cmd_args
-parse_command_line(int argc, char *argv[]) {
-	std::vector<std::string> args(argv, argv + argc);
-
-	cmd_args arg;
-
-	for(int i = 1; i < argc; i++) {
-		if(args[i].compare("-amr_fp") == 0)
-			arg.amr_fp = args[++i];
-		else if(args[i].compare("-sam_fp") == 0) 
-			arg.sam_fp = args[++i];
-		else if(args[i].compare("-out_fp") == 0)
-			arg.out_fp = args[++i];
-		else if(args[i].compare("-t") == 0)
-			arg.threshold = s_to_i(args[++i]);
-		else if(args[i].compare("-min") == 0)
-			arg.min = s_to_i(args[++i]);
-		else if(args[i].compare("-max") == 0)
-			arg.max = s_to_i(args[++i]);
-		else if(args[i].compare("-skip") == 0)
-			arg.skip = s_to_i(args[++i]);
-		else if(args[i].compare("-samples") == 0)
-			arg.s_per_lev = s_to_i(args[++i]);
-		else if(args[i].compare("-d") == 0)
-			arg.sam_dir = true;
-		else if(args[i].compare("-bam") == 0)
-			arg.bam_stream = true;
-		else {
-			usage();
-			exit(EXIT_FAILURE);
-		}
-	}
-
-	return arg;
-}
-
-#endif /* ARGS_H */
--- a/gene_fraction/src/dir_util.h	Sun Feb 21 23:20:22 2016 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,43 +0,0 @@
-#ifndef DIR_UTIL_H
-#define DIR_UTIL_H
-
-#include <list>
-#include <iostream>
-#include <string>
-#include <dirent.h>
-
-/*
- * Parses a directory of sam files
- */
-static inline std::list<std::string>
-parse_sam_dir(const std::string &directory) {
-        DIR *dir;
-        struct dirent *ent;
-        std::string dir_path = directory;
-
-        dir = opendir(dir_path.c_str());
-	// is dir open/valid?
-        if(dir == NULL) {
-                std::cout << "Not a valid directory" << std::endl;
-                exit(EXIT_FAILURE);
-        }
-
-        std::list<std::string> sam_files;
-        std::string fn;
-        std::string ext;
-        std::string file_type = ".sam";
-
-	// get all files with a .sam file extension
-        while((ent = readdir(dir)) != NULL) {
-                fn = dir_path + std::string(ent->d_name);
-                ext = fn.substr(fn.length()-4, fn.length());
-                if(ext.compare(file_type) == 0) {
-                        sam_files.push_back(fn);
-                }
-        }
-        closedir(dir);
-
-        return sam_files;
-}
-
-#endif /* DIR_UTIL_H */
--- a/gene_fraction/src/gene_fraction.xml	Sun Feb 21 23:20:22 2016 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,57 +0,0 @@
-<tool id="gene_fraction" name="Coverage sampler" version="0.1.0">
-    <requirements>
-    </requirements>
-    <stdio>
-        <exit_code range="1:" />
-    </stdio>
-    <command><![CDATA[
-	csa
-	    -amr_fp $input1
-            -sam_fp $input2
-	    -min $min
-	    -max $max
-	    -skip $skip
-	    -t $threshold
-	    -samples $samples
-	    -out_fp $output1
-    ]]></command>
-    <inputs>
-	<param type="data" name="input1" format="fasta" />
-	<param type="data" name="input2" format="sam" />
-	<param name="min" type="integer" label="Minimum sample value"
-	       value="1" min="1" max="100" help="(-min)" />
-	<param name="max" type="integer" label="Maximum sample value"
-		value="1" min="1" max="100" help="(-max)" />
-	<param name="skip" type="integer" label="Amount of levels to skip"
-		value="1" min="1" max="100" help="(-skip)" />
-	<param name="threshold" type="integer" label="Gene fraction threshold"
-		value="1" min="1" max="100" help="(-t)" />
-	<param name="samples" type="integer" label="Amount of samples per level"
-		value="1" min="1" max="100" help="(-samples)" />
-    </inputs>
-    <outputs>
-	<data name="output1" format="tabular" />
-    </outputs>
-    <help><![CDATA[
-        
-Program: Coverage Sampler 
-Contact: Chris Dean <cdean11@rams.colostate.edu
-
-Usage: csa [options]
-
-Options:
-
-    -amr_fp    amr database path
-    -sam_fp    sam file path
-    -min       starting level
-    -max       ending level
-    -skip      amount of levels to skip
-    -t         gene fraction threshold
-    -samples   amount of samples per level
-    -d         directory parsing
-    -bam       bam file parsing
-    -out_fp    output file path
-
-
-    ]]></help>
-</tool>
--- a/gene_fraction/src/int_util.h	Sun Feb 21 23:20:22 2016 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,29 +0,0 @@
-#ifndef INT_UTIL_H
-#define INT_UTIL_H
-
-#include <string>
-#include <sstream>
-
-/**
- * Given a string, return its integer.
- */
-static inline int
-s_to_i(const std::string &s) {
-	std::istringstream ss(s);
-	int i;
-	ss >> i;
-	return i;
-}
-
-/**
- * Given an integer, return a random number
- * between 0 and i.
- */
-static inline int
-randomize(const int &i) {
-	return rand() % i;
-}
-
-#endif /*INT_UTIL_H */
-
-
--- a/gene_fraction/src/main.cpp	Sun Feb 21 23:20:22 2016 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,37 +0,0 @@
-#include <string>
-#include <iostream>
-#include <vector>
-
-#include "int_util.h"
-#include "dir_util.h"
-#include "args.h"
-#include "Fasta.h"
-#include "Sam.h"
-#include "SamRatio.h"
-
-using namespace std;
-
-int main(int argc, char *argv[]) {
-	cmd_args args;
-	args = parse_command_line(argc, argv);
-
-	Fasta f(args.amr_fp);
-	f.read_fasta(args.amr_fp);	
-
-	if(args.sam_dir) {
-		list<string> sam_files = parse_sam_dir(args.sam_fp);
-		for(auto &fn : sam_files) {
-			args.sam_fp = fn;
-			Sam s(args.sam_fp);
-			s.read_sam(args);
-			generate_samples(f.records, s.alignment, args);
-		}
-	}
-	else {
-		Sam s(args.sam_fp);
-		s.read_sam(args);
-		generate_samples(f.records, s.alignment, args);
-	}
-
-	return 0;
-}
Binary file gene_fraction/src/main.o has changed
--- a/gene_fraction/src/ref.fa	Sun Feb 21 23:20:22 2016 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,2 +0,0 @@
->seq1
-AAAAAAAAAA
--- a/gene_fraction/src/res	Sun Feb 21 23:20:22 2016 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,3 +0,0 @@
-@test.sam
-Level,Iteration,Gene id,Gene Fraction,Hits
-100,1,seq1,50,1
--- a/gene_fraction/src/test.sam	Sun Feb 21 23:20:22 2016 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,1 +0,0 @@
-QNAME	0	seq1	3	0	5M	0	0	0	AAAAA	;;;;;
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_dependencies.xml	Mon Feb 22 04:52:09 2016 -0500
@@ -0,0 +1,19 @@
+<?xml version="1.0"?>
+<tool_dependency>
+    <package name="csa" version="0.1">
+        <install version="1.0">
+            <actions>
+                <action type="shell_command">git clone --recursive https://github.com/ChrisD11/csa.git</action>
+                <action type="shell_command">make</action>
+                <action type="move_file">
+                    <source>csa</source>
+                    <destination>$INSTALL_DIR/bin</destination>
+                </action>
+                <action type="set_environment">
+                    <environment_variable name="PATH" action="prepend_to">$INSTALL_DIR/bin</environment_variable>
+                </action>
+            </actions>
+        </install>
+        <readme>Compiling csa requires a C++ compiler</readme>
+    </package>
+</tool_dependency>