Mercurial > repos > pjbriggs > amplicon_analysis_pipeline
changeset 5:bbfc9638ba84 draft
First version with (partial) bioconda deps.
author | pjbriggs |
---|---|
date | Wed, 13 Jun 2018 08:39:26 -0400 |
parents | 013bf1e2cc8f |
children | 45db1f97795c |
files | README.rst amplicon_analysis_pipeline.xml relabel_fasta.py |
diffstat | 3 files changed, 84 insertions(+), 14 deletions(-) [+] |
line wrap: on
line diff
--- a/README.rst Wed Jun 13 07:43:57 2018 -0400 +++ b/README.rst Wed Jun 13 08:39:26 2018 -0400 @@ -194,19 +194,16 @@ - bioawk 1.0 - fastqc 0.11.3 - R 3.2.0 - -Some dependencies are available but with the "wrong" versions: - - - spades (need 3.5.0) - - qiime (need 1.8.0) - - blast (need 2.2.26) - - vsearch (need 1.1.3) + - spades 3.5.0 + - qiime 1.8.0 + - blast-legacy 2.2.26 + - vsearch 1.1.3 + - fasta-splitter 0.2.4 + - rdp_classifier 2.2 The following dependencies are currently unavailable: - fasta_number (need 02jun2015) - - fasta-splitter (need 0.2.4) - - rdp_classifier (need 2.2) - microbiomeutil (need r20110519) (NB usearch 6.1.544 and 8.0.1623 are special cases which must be @@ -218,6 +215,7 @@ ========== ====================================================================== Version Changes ---------- ---------------------------------------------------------------------- +1.2.2.1 Update to get dependencies from bioconda 1.2.2.0 Updated to Amplicon_Analysis_Pipeline version 1.2.2 (removes jackknifed analysis which is not captured by Galaxy tool) 1.2.1.0 Updated to Amplicon_Analysis_Pipeline version 1.2.1 (adds
--- a/amplicon_analysis_pipeline.xml Wed Jun 13 07:43:57 2018 -0400 +++ b/amplicon_analysis_pipeline.xml Wed Jun 13 08:39:26 2018 -0400 @@ -1,20 +1,24 @@ -<tool id="amplicon_analysis_pipeline" name="Amplicon Analysis Pipeline" version="1.2.2.0"> +<tool id="amplicon_analysis_pipeline" name="Amplicon Analysis Pipeline" version="1.2.2.1"> <description>analyse 16S rRNA data from Illumina Miseq paired-end reads</description> <requirements> + <!-- amplicon_analysis_pipeline not available in bioconda --> <requirement type="package" version="1.2.2">amplicon_analysis_pipeline</requirement> + <requirement type="package" version="2.7">python</requirement> <requirement type="package" version="1.11">cutadapt</requirement> - <requirement type="package" version="1.33">sickle</requirement> - <requirement type="package" version="27-08-2013">bioawk</requirement> + <requirement type="package" version="1.33">sickle-trim</requirement> + <requirement type="package" version="1.0">bioawk</requirement> <requirement type="package" version="2.8.1">pandaseq</requirement> <requirement type="package" version="3.5.0">spades</requirement> <requirement type="package" version="0.11.3">fastqc</requirement> <requirement type="package" version="1.8.0">qiime</requirement> - <requirement type="package" version="2.2.26">blast</requirement> + <requirement type="package" version="2.2.26">blast-legacy</requirement> <requirement type="package" version="0.2.4">fasta-splitter</requirement> - <requirement type="package" version="2.2">rdp-classifier</requirement> + <requirement type="package" version="2.2">rdp_classifier</requirement> <requirement type="package" version="3.2.0">R</requirement> <requirement type="package" version="1.1.3">vsearch</requirement> + <!-- microbiomeutil not available in bioconda --> <requirement type="package" version="2010-04-29">microbiomeutil</requirement> + <!-- fastq_number not available in bioconda --> <requirement type="package">fasta_number</requirement> </requirements> <stdio>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/relabel_fasta.py Wed Jun 13 08:39:26 2018 -0400 @@ -0,0 +1,68 @@ +#!/usr/bin/env python + +""" +Replace FASTA labels with new labels <PREFIX>1, <PREFIX>2, +<PREFIX>3 etc (where <PREFIX> is a user-provided argument). + +Can be used to label OTUs as OTU_1, OTU_2 etc. + +This is a reimplementation of the fasta_number.py script from +https://drive5.com/python/fasta_number_py.html +""" + +import argparse + +def relabel_fasta(fp,prefix,include_size=False): + """ + """ + # Iterate over lines in file + nlabel = 0 + for line in fp: + # Strip trailing newlines + line = line.rstrip('\n') + if not line: + # Skip blank lines + continue + elif line.startswith('>'): + # + nlabel += 1 + label = line[1:].strip() + if args.needsize: + # Extract size from the label + try: + size = filter( + lambda x: x.startswith("size="), + label.split(';'))[0] + except Exception as ex: + raise Exception("Couldn't locate 'size' in " + "label: %s" % label) + yield ">%s%d;%s" % (args.prefix, + nlabel, + size) + else: + yield ">%s%d" % (args.prefix, + nlabel) + else: + # Echo the line to output + yield line + +if __name__ == "__main__": + + # Set up command line parser + p = argparse.ArgumentParser() + p.add_argument("--needsize",action="store_true") + p.add_argument("--nosize",action="store_true") + p.add_argument("fasta") + p.add_argument("prefix") + + # Process command line + args = p.parse_args() + + # Relabel FASTA + with open(args.fasta,'rU') as fasta: + for line in relabel_fasta(fasta, + args.prefix, + include_size=args.needsize): + print line + +