changeset 5:c2c77e786cfc draft

"make_nr v0.0.2"
author peterjc
date Thu, 18 Mar 2021 12:48:20 +0000
parents 4e8ccac06848
children 566561a4e526
files test-data/empty.fasta tools/make_nr/README.rst tools/make_nr/make_nr.py tools/make_nr/make_nr.xml
diffstat 3 files changed, 27 insertions(+), 5 deletions(-) [+]
line wrap: on
line diff
--- a/tools/make_nr/README.rst	Thu Aug 20 20:16:45 2020 +0000
+++ b/tools/make_nr/README.rst	Thu Mar 18 12:48:20 2021 +0000
@@ -83,7 +83,9 @@
 ======= ======================================================================
 Version Changes
 ------- ----------------------------------------------------------------------
-v0.0.0  - Initial version
+v0.0.2  - Fixed bug writing files when there were no duplicates
+v0.0.1  - Added option to sort merged IDs, and support for gzipped files
+v0.0.0  - Initial version (not published to main Galaxy Tool Shed)
 ======= ======================================================================
 
 
--- a/tools/make_nr/make_nr.py	Thu Aug 20 20:16:45 2020 +0000
+++ b/tools/make_nr/make_nr.py	Thu Mar 18 12:48:20 2021 +0000
@@ -12,13 +12,14 @@
 
 import gzip
 import os
+import shutil
 import sys
 
 from optparse import OptionParser
 
 
 if "-v" in sys.argv or "--version" in sys.argv:
-    print("v0.0.1")
+    print("v0.0.2")
     sys.exit(0)
 
 
@@ -139,9 +140,20 @@
             "leaving %i representative records\n"
             % (unique, len(duplicates), len(representatives))
         )
+    elif len(input_fasta) == 1:
+        # Single file, no need to even edit titles
+        shutil.copy(os.path.abspath(input_fasta[0]), output_fasta)
+        sys.stderr.write("No perfect duplicates in file, %i unique entries\n" % unique)
     else:
-        os.symlink(os.path.abspath(input_fasta), output_fasta)
-        sys.stderr.write("No perfect duplicates in file, %i unique entries\n" % unique)
+        with open(output_fasta, "w") as handle:
+            for f in input_fasta:
+                with gzip_open(f) as in_handle:
+                    for title, seq in SimpleFastaParser(in_handle):
+                        handle.write(">%s\n%s\n" % (title, seq))
+        sys.stderr.write(
+            "No perfect duplicates in %i files, %i unique entries\n"
+            % (len(input_fasta), unique)
+        )
 
 
 make_nr(args, options.output, options.sep, options.alphasort)
--- a/tools/make_nr/make_nr.xml	Thu Aug 20 20:16:45 2020 +0000
+++ b/tools/make_nr/make_nr.xml	Thu Mar 18 12:48:20 2021 +0000
@@ -1,4 +1,4 @@
-<tool id="make_nr" name="Make FASTA non-redundant" version="0.0.1">
+<tool id="make_nr" name="Make FASTA non-redundant" version="0.0.2">
     <description>by combining duplicated sequences</description>
     <requirements>
         <requirement type="package" version="1.67">biopython</requirement>
@@ -51,6 +51,14 @@
             <param name="alphasort" value="-a"/>
             <output name="output" file="deduplicate.sortids.fasta" ftype="fasta"/>
         </test>
+        <test>
+            <param name="input" value="empty.fasta" ftype="fasta"/>
+            <output name="output" file="empty.fasta" ftype="fasta"/>
+        </test>
+        <test>
+            <param name="input" value="empty.fasta,empty.fasta" ftype="fasta"/>
+            <output name="output" file="empty.fasta" ftype="fasta"/>
+        </test>
     </tests>
     <help>
 **What it does**