Mercurial > repos > peterjc > make_nr
changeset 5:c2c77e786cfc draft
"make_nr v0.0.2"
author | peterjc |
---|---|
date | Thu, 18 Mar 2021 12:48:20 +0000 |
parents | 4e8ccac06848 |
children | 566561a4e526 |
files | test-data/empty.fasta tools/make_nr/README.rst tools/make_nr/make_nr.py tools/make_nr/make_nr.xml |
diffstat | 3 files changed, 27 insertions(+), 5 deletions(-) [+] |
line wrap: on
line diff
--- a/tools/make_nr/README.rst Thu Aug 20 20:16:45 2020 +0000 +++ b/tools/make_nr/README.rst Thu Mar 18 12:48:20 2021 +0000 @@ -83,7 +83,9 @@ ======= ====================================================================== Version Changes ------- ---------------------------------------------------------------------- -v0.0.0 - Initial version +v0.0.2 - Fixed bug writing files when there were no duplicates +v0.0.1 - Added option to sort merged IDs, and support for gzipped files +v0.0.0 - Initial version (not published to main Galaxy Tool Shed) ======= ======================================================================
--- a/tools/make_nr/make_nr.py Thu Aug 20 20:16:45 2020 +0000 +++ b/tools/make_nr/make_nr.py Thu Mar 18 12:48:20 2021 +0000 @@ -12,13 +12,14 @@ import gzip import os +import shutil import sys from optparse import OptionParser if "-v" in sys.argv or "--version" in sys.argv: - print("v0.0.1") + print("v0.0.2") sys.exit(0) @@ -139,9 +140,20 @@ "leaving %i representative records\n" % (unique, len(duplicates), len(representatives)) ) + elif len(input_fasta) == 1: + # Single file, no need to even edit titles + shutil.copy(os.path.abspath(input_fasta[0]), output_fasta) + sys.stderr.write("No perfect duplicates in file, %i unique entries\n" % unique) else: - os.symlink(os.path.abspath(input_fasta), output_fasta) - sys.stderr.write("No perfect duplicates in file, %i unique entries\n" % unique) + with open(output_fasta, "w") as handle: + for f in input_fasta: + with gzip_open(f) as in_handle: + for title, seq in SimpleFastaParser(in_handle): + handle.write(">%s\n%s\n" % (title, seq)) + sys.stderr.write( + "No perfect duplicates in %i files, %i unique entries\n" + % (len(input_fasta), unique) + ) make_nr(args, options.output, options.sep, options.alphasort)
--- a/tools/make_nr/make_nr.xml Thu Aug 20 20:16:45 2020 +0000 +++ b/tools/make_nr/make_nr.xml Thu Mar 18 12:48:20 2021 +0000 @@ -1,4 +1,4 @@ -<tool id="make_nr" name="Make FASTA non-redundant" version="0.0.1"> +<tool id="make_nr" name="Make FASTA non-redundant" version="0.0.2"> <description>by combining duplicated sequences</description> <requirements> <requirement type="package" version="1.67">biopython</requirement> @@ -51,6 +51,14 @@ <param name="alphasort" value="-a"/> <output name="output" file="deduplicate.sortids.fasta" ftype="fasta"/> </test> + <test> + <param name="input" value="empty.fasta" ftype="fasta"/> + <output name="output" file="empty.fasta" ftype="fasta"/> + </test> + <test> + <param name="input" value="empty.fasta,empty.fasta" ftype="fasta"/> + <output name="output" file="empty.fasta" ftype="fasta"/> + </test> </tests> <help> **What it does**