Mercurial > repos > peterjc > sample_seqs
changeset 8:4231c585b6dd draft
Uploaded v0.2.1, fixed missing test file, added more tests
author | peterjc |
---|---|
date | Mon, 16 Mar 2015 13:40:54 -0400 |
parents | 4099383e0ac8 |
children | a1141d5f1533 |
files | tools/sample_seqs/README.rst tools/sample_seqs/sample_seqs.py tools/sample_seqs/sample_seqs.xml |
diffstat | 3 files changed, 38 insertions(+), 8 deletions(-) [+] |
line wrap: on
line diff
--- a/tools/sample_seqs/README.rst Fri Mar 06 05:02:33 2015 -0500 +++ b/tools/sample_seqs/README.rst Mon Mar 16 13:40:54 2015 -0400 @@ -1,7 +1,7 @@ Galaxy tool to sub-sample sequence files ======================================== -This tool is copyright 2014 by Peter Cock, The James Hutton Institute +This tool is copyright 2014-2014 by Peter Cock, The James Hutton Institute (formerly SCRI, Scottish Crop Research Institute), UK. All rights reserved. See the licence text below (MIT licence). @@ -63,6 +63,9 @@ This works by first counting all your sequences, then calculates the percentage required in order to sample them uniformly (evenly). This makes two passes through the input and is therefore slower. +v0.2.1 - Was missing a file for the functional tests. + - Included testing of stdout messages. + - Includes testing of failure modes. ======= ======================================================================
--- a/tools/sample_seqs/sample_seqs.py Fri Mar 06 05:02:33 2015 -0500 +++ b/tools/sample_seqs/sample_seqs.py Mon Mar 16 13:40:54 2015 -0400 @@ -64,7 +64,7 @@ options, args = parser.parse_args() if options.version: - print("v0.2.0") + print("v0.2.1") sys.exit(0) in_file = options.input @@ -172,7 +172,7 @@ if N < 1: sys_exit("Bad -c count argument %r" % options.count) total = count_sequences(in_file, seq_format) - print("Input file has %i sequences" % total) + sys.stderr.write("Input file has %i sequences\n" % total) if interleaved: # Paired if total % 2:
--- a/tools/sample_seqs/sample_seqs.xml Fri Mar 06 05:02:33 2015 -0500 +++ b/tools/sample_seqs/sample_seqs.xml Mon Mar 16 13:40:54 2015 -0400 @@ -1,4 +1,4 @@ -<tool id="sample_seqs" name="Sub-sample sequences files" version="0.2.0"> +<tool id="sample_seqs" name="Sub-sample sequences files" version="0.2.1"> <description>e.g. to reduce coverage</description> <requirements> <requirement type="package" version="1.65">biopython</requirement> @@ -122,25 +122,52 @@ <param name="type" value="percentage" /> <param name="percent" value="20.0" /> <output name="output_file" file="MID4_GLZRM4E04_rnd30_frclip.sample_N5.sff" ftype="sff"/> + <assert_stderr> + <has_line line="Sampling 20.000% of sequences" /> + <has_line line="Selected 5 records" /> + </assert_stderr> </test> <test> <param name="input_file" value="MID4_GLZRM4E04_rnd30_frclip.sff" ftype="sff" /> <param name="type" value="everyNth" /> - <param name="percent" value="5" /> + <param name="every_n" value="5" /> <param name="interleaved" value="true" /> <output name="output_file" file="MID4_GLZRM4E04_rnd30_frclip.pair_sample_N5.sff" ftype="sff"/> + <assert_stderr> + <has_line line="Sampling every 5th sequence" /> + <has_line line="Selected 3 pairs" /> + </assert_stderr> </test> <test> - <param name="input_file" value="MID4_GLZRM4E04_rnd30.sff" ftype="sff" /> + <param name="input_file" value="MID4_GLZRM4E04_rnd30_frclip.sff" ftype="sff" /> <param name="type" value="desired_count" /> - <param name="count" value="30" /> - <output name="output_file" file="MID4_GLZRM4E04_rnd30.sff" ftype="sff"/> + <param name="count" value="25" /> + <output name="output_file" file="MID4_GLZRM4E04_rnd30_frclip.sff" ftype="sff"/> + <assert_stderr> + <has_line line="Input file has 25 sequences" /> + <has_line line="Taking all the sequences" /> + <has_line line="Selected 25 records" /> + </assert_stderr> </test> <test> <param name="input_file" value="MID4_GLZRM4E04_rnd30_frclip.sff" ftype="sff" /> <param name="type" value="desired_count" /> <param name="count" value="1" /> <output name="output_file" file="MID4_GLZRM4E04_rnd30_frclip.sample_C1.sff" ftype="sff"/> + <assert_stderr> + <has_line line="Input file has 25 sequences" /> + <has_line line="Sampling just first sequence!" /> + <has_line line="Selected 1 records" /> + </assert_stderr> + </test> + <test expect_failure="true" expect_exit_code="1"> + <param name="input_file" value="MID4_GLZRM4E04_rnd30_frclip.sff" ftype="sff" /> + <param name="type" value="desired_count" /> + <param name="count" value="30" /> + <assert_stderr> + <has_line line="Input file has 25 sequences" /> + <has_line line="Requested 30 sequences, but file only has 25." /> + </assert_stderr> </test> </tests> <help>