Mercurial > repos > rnateam > bctools
changeset 50:0b9aab6aaebf draft
Uploaded 16cfcafe8b42055c5dd64e62c42b82b455027a40
author | rnateam |
---|---|
date | Tue, 26 Jan 2016 04:38:27 -0500 |
parents | 303f6402a035 |
children | c226eef33bab |
files | convert_bc_to_binary_RY.py convert_bc_to_binary_RY.xml coords2clnt.py coords2clnt.xml extract_aln_ends.py extract_aln_ends.xml extract_bcs.py extract_bcs.xml merge_pcr_duplicates.py merge_pcr_duplicates.xml remove_tail.py remove_tail.xml rm_spurious_events.pl rm_spurious_events.py rm_spurious_events.xml tool_dependencies.xml |
diffstat | 16 files changed, 295 insertions(+), 206 deletions(-) [+] |
line wrap: on
line diff
--- a/convert_bc_to_binary_RY.py Sat Dec 19 06:16:22 2015 -0500 +++ b/convert_bc_to_binary_RY.py Tue Jan 26 04:38:27 2016 -0500 @@ -1,5 +1,13 @@ #!/usr/bin/env python +import argparse +import logging +from string import maketrans +from sys import stdout +from Bio import SeqIO +from Bio.Seq import Seq +from Bio.Alphabet import IUPAC + tool_description = """ Convert standard nucleotides to IUPAC nucleotide codes used for binary barcodes. @@ -19,19 +27,6 @@ Status: Testing """ -import argparse -import logging -from string import maketrans -from sys import stdout -from Bio import SeqIO -from Bio.Seq import Seq -from Bio.Alphabet import IUPAC - -# # avoid ugly python IOError when stdout output is piped into another program -# # and then truncated (such as piping to head) -# from signal import signal, SIGPIPE, SIG_DFL -# signal(SIGPIPE, SIG_DFL) - # parse command line arguments parser = argparse.ArgumentParser(description=tool_description, epilog=epilog,
--- a/convert_bc_to_binary_RY.xml Sat Dec 19 06:16:22 2015 -0500 +++ b/convert_bc_to_binary_RY.xml Tue Jan 26 04:38:27 2016 -0500 @@ -5,7 +5,7 @@ </macros> <expand macro="requirements" /> <expand macro="stdio" /> - <version_command>python convert_bc_to_binary_RY.py --version</version_command> + <version_command>python $__tool_directory__/convert_bc_to_binary_RY.py --version</version_command> <command interpreter="python"><![CDATA[ convert_bc_to_binary_RY.py #if $positional_1 and $positional_1 is not None:
--- a/coords2clnt.py Sat Dec 19 06:16:22 2015 -0500 +++ b/coords2clnt.py Tue Jan 26 04:38:27 2016 -0500 @@ -1,5 +1,15 @@ #!/usr/bin/env python +import argparse +import logging +from sys import stdout +from pybedtools import BedTool +from pybedtools.featurefuncs import five_prime +# avoid ugly python IOError when stdout output is piped into another program +# and then truncated (such as piping to head) +from signal import signal, SIGPIPE, SIG_DFL +signal(SIGPIPE, SIG_DFL) + tool_description = """ Given coordinates of the aligned reads, calculate positions of the crosslinked nucleotides. Crosslinked nts are assumed to be one nt upstream of the 5'-end of @@ -25,16 +35,6 @@ Status: Testing """ -import argparse -import logging -from sys import stdout -from pybedtools import BedTool -from pybedtools.featurefuncs import five_prime -# avoid ugly python IOError when stdout output is piped into another program -# and then truncated (such as piping to head) -from signal import signal, SIGPIPE, SIG_DFL -signal(SIGPIPE, SIG_DFL) - # parse command line arguments parser = argparse.ArgumentParser(description=tool_description, epilog=epilog,
--- a/coords2clnt.xml Sat Dec 19 06:16:22 2015 -0500 +++ b/coords2clnt.xml Tue Jan 26 04:38:27 2016 -0500 @@ -8,7 +8,7 @@ <stdio> <exit_code level="fatal" range="1:"/> </stdio> - <version_command>python coords2clnt.py --version</version_command> + <version_command>python $__tool_directory__/coords2clnt.py --version</version_command> <command interpreter="python"><![CDATA[coords2clnt.py #if $positional_1 and $positional_1 is not None: $positional_1
--- a/extract_aln_ends.py Sat Dec 19 06:16:22 2015 -0500 +++ b/extract_aln_ends.py Tue Jan 26 04:38:27 2016 -0500 @@ -1,5 +1,17 @@ #!/usr/bin/env python +import argparse +import logging +from sys import stdout +from shutil import rmtree +from tempfile import mkdtemp +from pybedtools import BedTool +import pysam +# avoid ugly python IOError when stdout output is piped into another program +# and then truncated (such as piping to head) +from signal import signal, SIGPIPE, SIG_DFL +signal(SIGPIPE, SIG_DFL) + tool_description = """ Extract alignment ends from sam file. @@ -32,25 +44,12 @@ Status: Development """ -import argparse -import logging -from sys import stdout -from shutil import rmtree -from tempfile import mkdtemp -from pybedtools import BedTool -import pysam - class DefaultsRawDescriptionHelpFormatter(argparse.ArgumentDefaultsHelpFormatter, argparse.RawDescriptionHelpFormatter): # To join the behaviour of RawDescriptionHelpFormatter with that of ArgumentDefaultsHelpFormatter pass -# avoid ugly python IOError when stdout output is piped into another program -# and then truncated (such as piping to head) -from signal import signal, SIGPIPE, SIG_DFL -signal(SIGPIPE, SIG_DFL) - # parse command line arguments parser = argparse.ArgumentParser(description=tool_description, epilog=epilog,
--- a/extract_aln_ends.xml Sat Dec 19 06:16:22 2015 -0500 +++ b/extract_aln_ends.xml Tue Jan 26 04:38:27 2016 -0500 @@ -5,7 +5,7 @@ </macros> <expand macro="requirements" /> <expand macro="stdio" /> - <version_command>python extract_aln_ends.py --version</version_command> + <version_command>python $__tool_directory__/extract_aln_ends.py --version</version_command> <command interpreter="python"><![CDATA[ extract_aln_ends.py #if $positional_1 and $positional_1 is not None:
--- a/extract_bcs.py Sat Dec 19 06:16:22 2015 -0500 +++ b/extract_bcs.py Tue Jan 26 04:38:27 2016 -0500 @@ -1,5 +1,15 @@ #!/usr/bin/env python +import argparse +import logging +import re +from sys import stdout +from Bio.SeqIO.QualityIO import FastqGeneralIterator +# avoid ugly python IOError when stdout output is piped into another program +# and then truncated (such as piping to head) +from signal import signal, SIGPIPE, SIG_DFL +signal(SIGPIPE, SIG_DFL) + tool_description = """ Exract barcodes from a FASTQ file according to a user-specified pattern. @@ -19,17 +29,6 @@ Status: Testing """ -import argparse -import logging -import re -from sys import stdout -from Bio.SeqIO.QualityIO import FastqGeneralIterator - -# avoid ugly python IOError when stdout output is piped into another program -# and then truncated (such as piping to head) -from signal import signal, SIGPIPE, SIG_DFL -signal(SIGPIPE, SIG_DFL) - # parse command line arguments parser = argparse.ArgumentParser(description=tool_description, epilog=epilog,
--- a/extract_bcs.xml Sat Dec 19 06:16:22 2015 -0500 +++ b/extract_bcs.xml Tue Jan 26 04:38:27 2016 -0500 @@ -5,7 +5,7 @@ </macros> <expand macro="requirements" /> <expand macro="stdio" /> - <version_command>python extract_bcs.py --version</version_command> + <version_command>python $__tool_directory__/extract_bcs.py --version</version_command> <command interpreter="python"><![CDATA[ extract_bcs.py #if $positional_1 and $positional_1 is not None:
--- a/merge_pcr_duplicates.py Sat Dec 19 06:16:22 2015 -0500 +++ b/merge_pcr_duplicates.py Tue Jan 26 04:38:27 2016 -0500 @@ -1,5 +1,18 @@ #!/usr/bin/env python +import argparse +import logging +from sys import stdout +import pandas as pd +from subprocess import check_call +from shutil import rmtree +from tempfile import mkdtemp +from os.path import isfile +# avoid ugly python IOError when stdout output is piped into another program +# and then truncated (such as piping to head) +from signal import signal, SIGPIPE, SIG_DFL +signal(SIGPIPE, SIG_DFL) + tool_description = """ Merge PCR duplicates according to random barcode library. @@ -28,24 +41,6 @@ Status: Testing """ -import argparse -import logging -from sys import stdout -from Bio import SeqIO -import pandas as pd - -# avoid ugly python IOError when stdout output is piped into another program -# and then truncated (such as piping to head) -from signal import signal, SIGPIPE, SIG_DFL -signal(SIGPIPE, SIG_DFL) - - -def fasta_tuple_generator(fasta_iterator): - "Yields id, sequence tuples given an iterator over Biopython SeqIO objects." - for record in input_seq_iterator: - yield (record.id, str(record.seq)) - - # parse command line arguments parser = argparse.ArgumentParser(description=tool_description, epilog=epilog, @@ -56,16 +51,11 @@ help="Path to bed6 file containing alignments.") parser.add_argument( "bclib", - help="Path to fasta barcode library.") + help="Path to fastq barcode library.") # optional arguments parser.add_argument( "-o", "--outfile", help="Write results to this file.") -parser.add_argument( - "--fasta-library", - dest="fasta_library", - action="store_true", - help="Read random barcode library as fasta format.") # misc arguments parser.add_argument( "-v", "--verbose", @@ -78,7 +68,7 @@ parser.add_argument( '--version', action='version', - version='0.1.0') + version='0.2.0') args = parser.parse_args() @@ -96,50 +86,65 @@ logging.info(" outfile: '{}'".format(args.outfile)) logging.info("") -# load barcode library into dictionary -input_handle = open(args.bclib, "rU") -if args.fasta_library: - input_seq_iterator = SeqIO.parse(input_handle, "fasta") -else: - input_seq_iterator = SeqIO.parse(input_handle, "fastq") -bcs = pd.DataFrame.from_records( - data=fasta_tuple_generator(input_seq_iterator), - columns=["read_id", "bc"]) +# see if alignments are empty and the tool can quit +n_alns = sum(1 for line in open(args.alignments)) +if n_alns == 0: + logging.warning("WARNING: Working on empty set of alignments, writing empty output.") + eventalnout = (open(args.outfile, "w") if args.outfile is not None else stdout) + eventalnout.close() + exit(0) + +# check input filenames +if not isfile(args.bclib): + raise Exception("ERROR: barcode library '{}' not found.") +if not isfile(args.alignments): + raise Exception("ERROR: alignments '{}' not found.") + +try: + tmpdir = mkdtemp() + logging.debug("tmpdir: " + tmpdir) -# load alignments -alns = pd.read_csv( - args.alignments, - sep="\t", - names=["chrom", "start", "stop", "read_id", "score", "strand"]) + # prepare barcode library + syscall1 = "cat " + args.bclib + " | awk 'BEGIN{OFS=\"\\t\"}NR%4==1{gsub(/^@/,\"\"); id=$1}NR%4==2{bc=$1}NR%4==3{print id,bc}' | sort -k1,1 > " + tmpdir + "/bclib.csv" + check_call(syscall1, shell=True) + + # prepare alinments + syscall2 = "cat " + args.alignments + " | awk -F \"\\t\" 'BEGIN{OFS=\"\\t\"}{split($4, a, \" \"); $4 = a[1]; print}'| sort -k4,4 > " + tmpdir + "/alns.csv" + check_call(syscall2, shell=True) -# keep id parts up to first whitespace -alns["read_id"] = alns["read_id"].str.split(' ').str.get(0) + # join barcode library and alignments + syscall3 = "join -1 1 -2 4 " + tmpdir + "/bclib.csv " + tmpdir + "/alns.csv " + " | awk 'BEGIN{OFS=\"\\t\"}{print $3,$4,$5,$2,$6,$7}' > " + tmpdir + "/bcalib.csv" + check_call(syscall3, shell=True) -# combine barcode library and alignments -bcalib = pd.merge( - bcs, alns, - on="read_id", - how="inner", - sort=False) + # get alignments combined with barcodes + bcalib = pd.read_csv( + tmpdir + "/bcalib.csv", + sep="\t", + names=["chrom", "start", "stop", "bc", "score", "strand"]) +finally: + logging.debug("removed tmpdir: " + tmpdir) + rmtree(tmpdir) + +# fail if alignments given but combined library is empty if bcalib.empty: raise Exception("ERROR: no common entries for alignments and barcode library found. Please check your input files.") -n_alns = len(alns.index) + +# warn if not all alignments could be assigned a barcode n_bcalib = len(bcalib.index) if n_bcalib < n_alns: logging.warning( - "{} of {} alignments could not be associated with a random barcode.".format( - n_alns - n_bcalib, n_alns)) + "{} of {} alignments could not be associated with a random barcode.".format(n_alns - n_bcalib, n_alns)) # remove entries with barcodes that has uncalled base N bcalib_cleaned = bcalib.drop(bcalib[bcalib.bc.str.contains("N")].index) n_bcalib_cleaned = len(bcalib_cleaned) -if n_bcalib_cleaned < n_bcalib: - msg = "{} of {} alignments had random barcodes containing uncalled bases and were dropped.".format( - n_bcalib - n_bcalib_cleaned, n_bcalib) - if n_bcalib_cleaned < (0.8 * n_bcalib): - logging.warning(msg) - else: - logging.info(msg) +# if n_bcalib_cleaned < n_bcalib: +# msg = "{} of {} alignments had random barcodes containing uncalled bases and were dropped.".format( +# n_bcalib - n_bcalib_cleaned, n_bcalib) +# if n_bcalib_cleaned < (0.8 * n_bcalib): +# logging.warning(msg) +# else: +# logging.info(msg) # count and merge pcr duplicates # grouping sorts by keys, so the ouput will be properly sorted
--- a/merge_pcr_duplicates.xml Sat Dec 19 06:16:22 2015 -0500 +++ b/merge_pcr_duplicates.xml Tue Jan 26 04:38:27 2016 -0500 @@ -1,11 +1,15 @@ -<tool id="merge_pcr_duplicates.py" name="Merge PCR duplicates" version="0.1.0"> +<tool id="merge_pcr_duplicates.py" name="Merge PCR duplicates" version="0.2.0"> <description>according to random barcode library.</description> <macros> <import>macros.xml</import> </macros> <expand macro="requirements" /> <expand macro="stdio" /> - <version_command>python merge_pcr_duplicates.py --version</version_command> + <requirements> + <requirement type="package" version="4.1.0">gnu_awk</requirement> + <requirement type="package" version="8.22">gnu_coreutils</requirement> + </requirements> + <version_command>python $__tool_directory__/merge_pcr_duplicates.py --version</version_command> <command interpreter="python"><![CDATA[merge_pcr_duplicates.py #if $positional_1 and $positional_1 is not None: $positional_1 @@ -18,7 +22,7 @@ > $default]]></command> <inputs> <param area="false" label="bed6 file containing alignments." name="positional_1" type="data" format="bed"/> - <param area="false" label="fasta barcode library." name="positional_2" type="data" format="fastq"/> + <param area="false" label="fastaq barcode library." name="positional_2" type="data" format="fastq"/> </inputs> <outputs> <data format="bed" hidden="false" name="default"/>
--- a/remove_tail.py Sat Dec 19 06:16:22 2015 -0500 +++ b/remove_tail.py Tue Jan 26 04:38:27 2016 -0500 @@ -1,5 +1,14 @@ #!/usr/bin/env python +import argparse +import logging +from sys import stdout +from Bio.SeqIO.QualityIO import FastqGeneralIterator +# avoid ugly python IOError when stdout output is piped into another program +# and then truncated (such as piping to head) +from signal import signal, SIGPIPE, SIG_DFL +signal(SIGPIPE, SIG_DFL) + tool_description = """ Remove a certain number of nucleotides from the 3'-tails of sequences in fastq format. @@ -18,16 +27,6 @@ Status: Testing """ -import argparse -import logging -from sys import stdout -from Bio.SeqIO.QualityIO import FastqGeneralIterator - -# avoid ugly python IOError when stdout output is piped into another program -# and then truncated (such as piping to head) -from signal import signal, SIGPIPE, SIG_DFL -signal(SIGPIPE, SIG_DFL) - # parse command line arguments parser = argparse.ArgumentParser(description=tool_description, epilog=epilog,
--- a/remove_tail.xml Sat Dec 19 06:16:22 2015 -0500 +++ b/remove_tail.xml Tue Jan 26 04:38:27 2016 -0500 @@ -5,7 +5,7 @@ </macros> <expand macro="requirements" /> <expand macro="stdio" /> - <version_command>python remove_tail.py --version</version_command> + <version_command>python $__tool_directory__/remove_tail.py --version</version_command> <command interpreter="python"><![CDATA[remove_tail.py #if $positional_1 and $positional_1 is not None: $positional_1
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/rm_spurious_events.pl Tue Jan 26 04:38:27 2016 -0500 @@ -0,0 +1,87 @@ +#!/usr/local/perl/bin/perl +use feature ':5.10'; +use strict 'vars'; +use warnings; +use Getopt::Long; +use Pod::Usage; +use List::Util qw/ min max /; +use POSIX qw/ceil floor/; +use File::Temp qw(tempdir); +use File::Basename; + +=head1 NAME + +filterSquashedReads.pl --frac_max FLOAT + +=head1 SYNOPSIS + +this script reads sites after pcr duplicate removal. +for all crosslinking sites sharing start and stop coordinates, the maximum number +of squashed reads is determined. +alignments having less than frac_max * max reads are discarded + +assumes bed entries to be sorted chr,strand,start,stop,score with score descending + +Options: + + --frac_max filter out alignments supported by less reads than this fraction of the maximum number of reads per position + -debug enable debug output + -help brief help message + -man full documentation + +=head1 DESCRIPTION + +=cut + +############################################################################### +# parse command line options +############################################################################### +my $frac_max = 0.1; +my $help; +my $man; +my $debug; +my $result = GetOptions ( "frac_max=f" => \$frac_max, + "help" => \$help, + "man" => \$man, + "debug" => \$debug); +pod2usage(-exitstatus => 1, -verbose => 1) if $help; +pod2usage(-exitstatus => 0, -verbose => 2) if $man; +($result) or pod2usage(2); + +############################################################################### +# main +############################################################################### +my $current_chr = ''; +my $current_start = -1; +my $current_stop = -1; +my $current_strand = ''; +my $current_max = -1; +my $current_threshold = -1; + +while (<>) { + my ($chr, $start, $stop, $id, $count, $strand) = split("\t"); + # my ($count, undef, $start, $chr, $strand, $stop) = split("\t"); + + if ($current_start != $start or + $current_stop != $stop or + $current_chr ne $chr or + $current_strand ne $strand) { + # if this is the first occourence of these coordinates + # this must be the new maximum + # save new state + $current_start = $start; + $current_stop = $stop; + $current_chr = $chr; + $current_strand = $strand; + # print record and record maximum + $current_max = $count; + $current_threshold = $count*$frac_max; + print $_; + $debug and say "new threshold ${current_threshold} @ $chr $start $stop $strand $count"; + } elsif ($count >= $current_threshold) { + # if it is not the first occourence, evaluate threshold and print if valid + print $_; + } else { + $debug and say "below threshold @ $chr $start $stop $strand $count"; + } +}
--- a/rm_spurious_events.py Sat Dec 19 06:16:22 2015 -0500 +++ b/rm_spurious_events.py Tue Jan 26 04:38:27 2016 -0500 @@ -1,5 +1,10 @@ #!/usr/bin/env python +import argparse +import logging +from subprocess import check_call +import os + tool_description = """ Remove spurious events originating from errors in random sequence tags. @@ -7,8 +12,6 @@ of events the maximum number of PCR duplicates is determined. All events that are supported by less than 10 percent of this maximum count are removed. -By default output is written to stdout. - Input: * bed6 file containing crosslinking events with score field set to number of PCR duplicates @@ -19,7 +22,7 @@ Example usage: - remove spurious events from spurious.bed and write results to file cleaned.bed -rm_spurious_events.py spurious.bed --out cleaned.bed +rm_spurious_events.py spurious.bed --oufile cleaned.bed """ epilog = """ @@ -30,89 +33,74 @@ Status: Testing """ -import argparse -import logging -from sys import stdout -import pandas as pd - class DefaultsRawDescriptionHelpFormatter(argparse.ArgumentDefaultsHelpFormatter, argparse.RawDescriptionHelpFormatter): # To join the behaviour of RawDescriptionHelpFormatter with that of ArgumentDefaultsHelpFormatter pass -# avoid ugly python IOError when stdout output is piped into another program -# and then truncated (such as piping to head) -from signal import signal, SIGPIPE, SIG_DFL -signal(SIGPIPE, SIG_DFL) - -# parse command line arguments -parser = argparse.ArgumentParser(description=tool_description, - epilog=epilog, - formatter_class=DefaultsRawDescriptionHelpFormatter) -# positional arguments -parser.add_argument( - "events", - help="Path to bed6 file containing alignments.") -# optional arguments -parser.add_argument( - "-o", "--outfile", - help="Write results to this file.") -parser.add_argument( - "-t", "--threshold", - type=float, - default=0.1, - help="Threshold for spurious event removal." -) -# misc arguments -parser.add_argument( - "-v", "--verbose", - help="Be verbose.", - action="store_true") -parser.add_argument( - "-d", "--debug", - help="Print lots of debugging information", - action="store_true") -parser.add_argument( - '--version', - action='version', - version='0.1.0') -args = parser.parse_args() - -if args.debug: - logging.basicConfig(level=logging.DEBUG, format="%(asctime)s - %(filename)s - %(levelname)s - %(message)s") -elif args.verbose: - logging.basicConfig(level=logging.INFO, format="%(filename)s - %(levelname)s - %(message)s") -else: - logging.basicConfig(format="%(filename)s - %(levelname)s - %(message)s") -logging.info("Parsed arguments:") -logging.info(" alignments: '{}'".format(args.events)) -logging.info(" threshold: '{}'".format(args.threshold)) -if args.outfile: - logging.info(" outfile: enabled writing to file") - logging.info(" outfile: '{}'".format(args.outfile)) -logging.info("") +def main(): + # parse command line arguments + parser = argparse.ArgumentParser(description=tool_description, + epilog=epilog, + formatter_class=DefaultsRawDescriptionHelpFormatter) + # positional arguments + parser.add_argument( + "events", + help="Path to bed6 file containing alignments.") + # optional arguments + parser.add_argument( + "-o", "--outfile", + required=True, + help="Write results to this file.") + parser.add_argument( + "-t", "--threshold", + type=float, + default=0.1, + help="Threshold for spurious event removal." + ) + # misc arguments + parser.add_argument( + "-v", "--verbose", + help="Be verbose.", + action="store_true") + parser.add_argument( + "-d", "--debug", + help="Print lots of debugging information", + action="store_true") + parser.add_argument( + '--version', + action='version', + version='0.1.0') -# check threshold parameter value -if args.threshold < 0 or args.threshold > 1: - raise ValueError("Threshold must be in [0,1].") - -# load alignments -alns = pd.read_csv( - args.events, - sep="\t", - names=["chrom", "start", "stop", "read_id", "score", "strand"]) + args = parser.parse_args() -# remove all alignments that not enough PCR duplicates with respect to -# the group maximum -grouped = alns.groupby(['chrom', 'start', 'stop', 'strand'], group_keys=False) -alns_cleaned = grouped.apply(lambda g: g[g["score"] >= args.threshold * g["score"].max()]) + if args.debug: + logging.basicConfig(level=logging.DEBUG, format="%(asctime)s - %(filename)s - %(levelname)s - %(message)s") + elif args.verbose: + logging.basicConfig(level=logging.INFO, format="%(filename)s - %(levelname)s - %(message)s") + else: + logging.basicConfig(format="%(filename)s - %(levelname)s - %(message)s") + logging.info("Parsed arguments:") + logging.info(" alignments: '{}'".format(args.events)) + logging.info(" threshold: '{}'".format(args.threshold)) + if args.outfile: + logging.info(" outfile: enabled writing to file") + logging.info(" outfile: '{}'".format(args.outfile)) + logging.info("") -# write coordinates of crosslinking event alignments -alns_cleaned_out = (open(args.outfile, "w") if args.outfile is not None else stdout) -alns_cleaned.to_csv( - alns_cleaned_out, - columns=['chrom', 'start', 'stop', 'read_id', 'score', 'strand'], - sep="\t", index=False, header=False) -alns_cleaned_out.close() + # check threshold parameter value + if args.threshold < 0 or args.threshold > 1: + raise ValueError("Threshold must be in [0,1].") + + if not os.path.isfile(args.events): + raise Exception("ERROR: file '{}' not found.") + + # prepare barcode library + syscall = "cat " + args.events + " | sort -k1,1V -k6,6 -k2,2n -k3,3 -k5,5nr | perl " + os.path.dirname(os.path.realpath(__file__)) + "/rm_spurious_events.pl --frac_max " + str(args.threshold) + "| sort -k1,1V -k2,2n -k3,3n -k6,6 -k4,4 -k5,5nr > " + args.outfile + check_call(syscall, shell=True) + + +if __name__ == "__main__": + main()
--- a/rm_spurious_events.xml Sat Dec 19 06:16:22 2015 -0500 +++ b/rm_spurious_events.xml Tue Jan 26 04:38:27 2016 -0500 @@ -5,7 +5,11 @@ </macros> <expand macro="requirements" /> <expand macro="stdio" /> - <version_command>python rm_spurious_events.py --version</version_command> + <requirements> + <requirement type="package" version="5.18">perl</requirement> + <requirement type="package" version="8.22">gnu_coreutils</requirement> + </requirements> + <version_command>python $__tool_directory__/rm_spurious_events.py --version</version_command> <command interpreter="python"><![CDATA[rm_spurious_events.py #if $positional_1 and $positional_1 is not None: $positional_1 @@ -14,7 +18,7 @@ #if $threshold and $threshold is not None: --threshold $threshold #end if -> $default]]></command> +--outfile $default]]></command> <inputs> <param area="false" label="bed6 file containing alignments." name="positional_1" type="data" format="bed"/> <param help="(--threshold)" label="Threshold for spurious event removal." name="threshold" optional="true" type="float" value="0.1"/>
--- a/tool_dependencies.xml Sat Dec 19 06:16:22 2015 -0500 +++ b/tool_dependencies.xml Tue Jan 26 04:38:27 2016 -0500 @@ -7,18 +7,27 @@ <repository name="package_biopython_1_65" owner="biopython"/> </package> --> <package name="pandas" version="0.16"> - <repository changeset_revision="8766fb73e88f" name="package_python_2_7_pandas_0_16" owner="iuc" toolshed="https://testtoolshed.g2.bx.psu.edu" /> + <repository changeset_revision="67abd81bd3ac" name="package_python_2_7_pandas_0_16" owner="iuc" toolshed="https://testtoolshed.g2.bx.psu.edu" /> </package> <!-- <package name="pybedtools" version="0.7.4"> <repository name="package_python_2_7_pybedtools_0_7_4" owner="iuc"/> </package> --> <package name="pybedtools" version="0.6.9"> - <repository changeset_revision="7d07b5c08b8f" name="package_python_2_7_pybedtools_0_6_9" owner="iuc" toolshed="https://testtoolshed.g2.bx.psu.edu" /> + <repository changeset_revision="c4641c3a869f" name="package_python_2_7_pybedtools_0_6_9" owner="iuc" toolshed="https://testtoolshed.g2.bx.psu.edu" /> </package> <package name="pysam" version="0.8.3"> - <repository changeset_revision="f88a5f4ac6c1" name="package_python_2_7_pysam_0_8_3" owner="iuc" toolshed="https://testtoolshed.g2.bx.psu.edu" /> + <repository changeset_revision="7ac80143c68d" name="package_python_2_7_pysam_0_8_3" owner="iuc" toolshed="https://testtoolshed.g2.bx.psu.edu" /> </package> <package name="flexbar" version="2.5"> <repository changeset_revision="45d1b8373762" name="package_flexbar_2_5" owner="rnateam" toolshed="https://testtoolshed.g2.bx.psu.edu" /> </package> + <package name="gnu_awk" version="4.1.0"> + <repository changeset_revision="0f0bdef2f686" name="package_gnu_awk_4_1_0" owner="iuc" toolshed="https://testtoolshed.g2.bx.psu.edu" /> + </package> + <package name="gnu_coreutils" version="8.22"> + <repository changeset_revision="8b60cf3e0c07" name="package_gnu_coreutils_8_22" owner="iuc" toolshed="https://testtoolshed.g2.bx.psu.edu" /> + </package> + <package name="perl" version="5.18"> + <repository changeset_revision="6f144bd786a8" name="package_perl_5_18" owner="iuc" toolshed="https://testtoolshed.g2.bx.psu.edu" /> + </package> </tool_dependency>