Mercurial > repos > pjbriggs > amplicon_analysis_pipeline
changeset 24:fe354f5dd0ee draft
planemo upload for repository https://github.com/pjbriggs/Amplicon_analysis-galaxy commit 34034189622f4cf14edd12a4de43739c37b50730
author | pjbriggs |
---|---|
date | Thu, 30 Aug 2018 08:13:55 -0400 |
parents | 545f23776953 |
children | fd65832d1074 |
files | amplicon_analysis_pipeline.py amplicon_analysis_pipeline.xml relabel_fasta.py |
diffstat | 3 files changed, 97 insertions(+), 1 deletions(-) [+] |
line wrap: on
line diff
--- a/amplicon_analysis_pipeline.py Wed Aug 29 10:57:51 2018 -0400 +++ b/amplicon_analysis_pipeline.py Thu Aug 30 08:13:55 2018 -0400 @@ -234,6 +234,15 @@ find_executable("single_rarefaction.py"))) os.environ["QIIME_CONFIG_FP"] = qiime_config_file print "-- set QIIME_CONFIG_FP: %s" % os.environ["QIIME_CONFIG_FP"] + # Check for fasta_number.py + fasta_number = find_executable("fasta_number.py") + if not fasta_number: + # Make a link to relabel_fasta.py + relabel_fasta_path = os.path.join(os.path.realpath(__file__), + "relabel_fasta.py") + fasta_number_path = os.path.join(os.path.abspath("bin"), + "fasta_number.py") + os.symlink(relabel_fasta_path,fasta_number_path) # Construct the pipeline command print "Amplicon analysis: constructing pipeline command"
--- a/amplicon_analysis_pipeline.xml Wed Aug 29 10:57:51 2018 -0400 +++ b/amplicon_analysis_pipeline.xml Thu Aug 30 08:13:55 2018 -0400 @@ -5,7 +5,6 @@ <requirement type="package" version="1.2.2">amplicon_analysis_pipeline</requirement> <requirement type="package" version="2010-04-29">microbiomeutil-chimeraslayer</requirement> <requirement type="package" version="1.2.22">uclust-qiime</requirement> - <requirement type="package">fasta_number</requirement> <!-- bioconda dependencies --> <requirement type="package" version="2.7">python</requirement> <requirement type="package" version="1.11">cutadapt</requirement>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/relabel_fasta.py Thu Aug 30 08:13:55 2018 -0400 @@ -0,0 +1,88 @@ +#!/usr/bin/env python + +DESCRIPTION = \ +"""Replace FASTA labels with new labels <PREFIX>1, <PREFIX>2, +<PREFIX>3 ... (<PREFIX> is provided by the user via the command +line). + +Can be used to label OTUs as OTU_1, OTU_2 etc. + +This reimplements the functionality of the fasta_number.py utility +from https://drive5.com/python/fasta_number_py.html +""" + +import argparse + +def relabel_fasta(fp,prefix,include_size=False): + """ + Relabel sequence records in a FASTA file + + Arguments: + fp (File): file-like object opened for reading + input FASTA data from + prefix (str): prefix to use in new labels + include_size (bool): if True then copy + 'size=...' records into new labels (default + is not to copy the size) + + Yields: updated lines from the input FASTA. + """ + # Iterate over lines in file + nlabel = 0 + for line in fp: + # Strip trailing newlines + line = line.rstrip('\n') + if not line: + # Skip blank lines + continue + elif line.startswith('>'): + # Deal with start of a sequence record + nlabel += 1 + label = line[1:].strip() + if include_size: + # Extract size from the label + try: + size = filter( + lambda x: x.startswith("size="), + label.split(';'))[0] + except Exception as ex: + raise Exception("Couldn't locate 'size' in " + "label: %s" % label) + yield ">%s%d;%s" % (args.prefix, + nlabel, + size) + else: + yield ">%s%d" % (args.prefix, + nlabel) + else: + # Echo the line to output + yield line + +if __name__ == "__main__": + # Set up command line parser + p = argparse.ArgumentParser(description=DESCRIPTION) + p.add_argument("--needsize", + action="store_true", + help="include the size as part of the " + "output label ('size=...' must be present " + "in the input FASTA labels). Output labels " + "will be '<PREFIX><NUMBER>;size=<SIZE>'") + p.add_argument("--nosize", + action="store_true", + help="don't include the size as part of " + "the output label (this is the default)") + p.add_argument("fasta", + metavar="FASTA", + help="input FASTA file") + p.add_argument("prefix", + metavar="PREFIX", + help="prefix to use for labels in output") + # Process command line + args = p.parse_args() + # Relabel FASTA + with open(args.fasta,'rU') as fasta: + for line in relabel_fasta(fasta, + args.prefix, + include_size=args.needsize): + print line +