changeset 24:fe354f5dd0ee draft

planemo upload for repository https://github.com/pjbriggs/Amplicon_analysis-galaxy commit 34034189622f4cf14edd12a4de43739c37b50730
author pjbriggs
date Thu, 30 Aug 2018 08:13:55 -0400
parents 545f23776953
children fd65832d1074
files amplicon_analysis_pipeline.py amplicon_analysis_pipeline.xml relabel_fasta.py
diffstat 3 files changed, 97 insertions(+), 1 deletions(-) [+]
line wrap: on
line diff
--- a/amplicon_analysis_pipeline.py	Wed Aug 29 10:57:51 2018 -0400
+++ b/amplicon_analysis_pipeline.py	Thu Aug 30 08:13:55 2018 -0400
@@ -234,6 +234,15 @@
                                find_executable("single_rarefaction.py")))
     os.environ["QIIME_CONFIG_FP"] = qiime_config_file
     print "-- set QIIME_CONFIG_FP: %s" % os.environ["QIIME_CONFIG_FP"]
+    # Check for fasta_number.py
+    fasta_number = find_executable("fasta_number.py")
+    if not fasta_number:
+        # Make a link to relabel_fasta.py
+        relabel_fasta_path = os.path.join(os.path.realpath(__file__),
+                                          "relabel_fasta.py")
+        fasta_number_path = os.path.join(os.path.abspath("bin"),
+                                         "fasta_number.py")
+        os.symlink(relabel_fasta_path,fasta_number_path)
 
     # Construct the pipeline command
     print "Amplicon analysis: constructing pipeline command"
--- a/amplicon_analysis_pipeline.xml	Wed Aug 29 10:57:51 2018 -0400
+++ b/amplicon_analysis_pipeline.xml	Thu Aug 30 08:13:55 2018 -0400
@@ -5,7 +5,6 @@
     <requirement type="package" version="1.2.2">amplicon_analysis_pipeline</requirement>
     <requirement type="package" version="2010-04-29">microbiomeutil-chimeraslayer</requirement>
     <requirement type="package" version="1.2.22">uclust-qiime</requirement>
-    <requirement type="package">fasta_number</requirement>
     <!-- bioconda dependencies -->
     <requirement type="package" version="2.7">python</requirement>
     <requirement type="package" version="1.11">cutadapt</requirement>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/relabel_fasta.py	Thu Aug 30 08:13:55 2018 -0400
@@ -0,0 +1,88 @@
+#!/usr/bin/env python
+
+DESCRIPTION = \
+"""Replace FASTA labels with new labels <PREFIX>1, <PREFIX>2,
+<PREFIX>3 ... (<PREFIX> is provided by the user via the command
+line).
+
+Can be used to label OTUs as OTU_1, OTU_2 etc.
+
+This reimplements the functionality of the fasta_number.py utility
+from https://drive5.com/python/fasta_number_py.html
+"""
+
+import argparse
+
+def relabel_fasta(fp,prefix,include_size=False):
+    """
+    Relabel sequence records in a FASTA file
+
+    Arguments:
+      fp (File): file-like object opened for reading
+        input FASTA data from
+      prefix (str): prefix to use in new labels
+      include_size (bool): if True then copy
+        'size=...' records into new labels (default
+        is not to copy the size)
+
+    Yields: updated lines from the input FASTA.
+    """
+    # Iterate over lines in file
+    nlabel = 0
+    for line in fp:
+        # Strip trailing newlines
+        line = line.rstrip('\n')
+        if not line:
+            # Skip blank lines
+            continue
+        elif line.startswith('>'):
+            # Deal with start of a sequence record
+            nlabel += 1
+            label = line[1:].strip()
+            if include_size:
+                # Extract size from the label
+                try:
+                    size = filter(
+                        lambda x: x.startswith("size="),
+                        label.split(';'))[0]
+                except Exception as ex:
+                    raise Exception("Couldn't locate 'size' in "
+                                    "label: %s" % label)
+                yield ">%s%d;%s" % (args.prefix,
+                                    nlabel,
+                                    size)
+            else:
+                yield ">%s%d" % (args.prefix,
+                                 nlabel)
+        else:
+            # Echo the line to output
+            yield line
+
+if __name__ == "__main__":
+    # Set up command line parser
+    p = argparse.ArgumentParser(description=DESCRIPTION)
+    p.add_argument("--needsize",
+                   action="store_true",
+                   help="include the size as part of the "
+                   "output label ('size=...' must be present "
+                   "in the input FASTA labels). Output labels "
+                   "will be '<PREFIX><NUMBER>;size=<SIZE>'")
+    p.add_argument("--nosize",
+                   action="store_true",
+                   help="don't include the size as part of "
+                   "the output label (this is the default)")
+    p.add_argument("fasta",
+                   metavar="FASTA",
+                   help="input FASTA file")
+    p.add_argument("prefix",
+                   metavar="PREFIX",
+                   help="prefix to use for labels in output")
+    # Process command line
+    args = p.parse_args()
+    # Relabel FASTA
+    with open(args.fasta,'rU') as fasta:
+        for line in relabel_fasta(fasta,
+                                  args.prefix,
+                                  include_size=args.needsize):
+            print line
+