Mercurial > repos > bcclaywell > microbiome_pplacer_suite
changeset 0:d4690e65afcd draft
Uploaded
author | bcclaywell |
---|---|
date | Thu, 26 Feb 2015 18:16:36 -0500 |
parents | |
children | c8cc6529038c |
files | bootstrap-wrapper.sh bootstrap.py bootstrap.xml classification-wrapper.sh classification.xml datatypes_conf.xml decorate-wrapper.sh decorate.xml filter-wrapper.sh filter.xml macros.xml pplacer-wrapper.sh pplacer.py pplacer.xml preclassification-wrapper.sh preclassification.xml preprocessing-wrapper.sh preprocessing.xml refpkgzip_to_refpkg.xml render_datatable-wrapper.sh render_datatable.py render_datatable.xml taxtastic.py usearch-wrapper.sh usearch.xml util.sh xlsx_to_csv.xml |
diffstat | 27 files changed, 1512 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bootstrap-wrapper.sh Thu Feb 26 18:16:36 2015 -0500 @@ -0,0 +1,13 @@ +#!/bin/bash + +source $(dirname $0)/util.sh +source $1 + +python $(dirname $0)/bootstrap.py \ + --plate ${PLATE_ID} \ + ${JUNIOR} \ + --zone ${ZONE_ID} \ + --barcodes ${BARCODES} \ + --labels ${LABELS} \ + --metadata ${METADATA} \ + - < ${SAMPLE_INFO}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bootstrap.py Thu Feb 26 18:16:36 2015 -0500 @@ -0,0 +1,84 @@ +#!/usr/bin/env python + +from __future__ import print_function +import csv +import sys +import os +import argparse + +def warning(*objs): + print("WARNING: ", *objs, file=sys.stderr) + +def main(arguments): + + parser = argparse.ArgumentParser(arguments, description=__doc__, + formatter_class=argparse.RawDescriptionHelpFormatter) + parser.add_argument('infile', help = "CSV input", + type = argparse.FileType('r'), default = sys.stdin) + parser.add_argument('--junior', help = "use junior run specimen naming convention", action = 'store_true') + parser.add_argument('--plate', help = "plate number", type = int, required = True) + parser.add_argument('--zone', help = "zone number", type = int, required = True) + parser.add_argument('--barcodes', help = "name of barcodes file", + type = argparse.FileType('w'), default = 'barcodes.csv') + parser.add_argument('--labels', help = "name of labels file", + type = argparse.FileType('w'), default = 'labels.csv') + parser.add_argument('--metadata', help = "name of metadata template file", + type = argparse.FileType('w'), default = 'metadata.csv') + + args = parser.parse_args(arguments) + + label_key = 'sampleid' + primer_key = 'reverse' + barcode_key = 'barcode' + zone_key = 'zone' + + fstr = "j{plate_id}{primer_id}" if args.junior else "p{plate_id}z{zone_id}{primer_id}" + + reader = csv.DictReader(sys.stdin) + + barcodes = csv.writer(args.barcodes) + labels = csv.writer(args.labels) + metadata = csv.writer(args.metadata) + + barcodes.writerow(['stub', 'barcode']) + labels.writerow(['specimen', 'label']) + metadata.writerow(['specimen', 'plate', 'zone', 'label', 'primer']) + + seen_labels = {} + seen_primers = {} + + # TODO: add checks for duplicates, empty fields, etc., and bail if something goes wrong + for i, d in enumerate(reader): + if not all (k in d for k in (label_key, primer_key, barcode_key)): + return "Expected columns not found" + + if zone_key in d and d[zone_key] != str(args.zone): + continue + + label = d[label_key] + primer = d[primer_key] + barcode = d[barcode_key] + zone = args.zone + + if not all((label, primer, barcode)): + # only print a warning if at least one of the fields is non-empty + if any((label, primer, barcode)): + warning("Missing required field on row {}, skipping".format(i+2)) + continue + + if label in seen_labels: + return "Duplicate label '{}' found on rows {} and {}".format(label, seen_labels[label]+2, i+2) + + if primer in seen_primers: + return "Duplicate primer '{}' found on rows {} and {}".format(primer, seen_primers[primer]+2, i+2) + + seen_labels[label] = i + seen_primers[primer] = i + + specimen = fstr.format(plate_id=args.plate, zone_id=zone, primer_id=primer.strip().lower().replace('-','')) + barcodes.writerow([specimen, barcode]) + labels.writerow([specimen, label]) + metadata.writerow([specimen, args.plate, zone, label, primer]) + +if __name__ == '__main__': + sys.exit(main(sys.argv[1:]))
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bootstrap.xml Thu Feb 26 18:16:36 2015 -0500 @@ -0,0 +1,67 @@ +<tool id="PHYLO_bootstrap" name="Prepare data" version="1.1.0"> + <description>for analysis</description> + <requirements> + <requirement type="package">yapp_env</requirement> + </requirements> + <macros> + <import>macros.xml</import> + </macros> + <version_command>echo "bootstrap script 1.1.0"</version_command> + <command interpreter="bash"> + bootstrap-wrapper.sh $config + </command> + <stdio> + <expand macro="basic_errors"/> + </stdio> + <inputs> + <param name="plate_id" type="integer" value="1" label="Plate number"/> + <conditional name="run_type"> + <param name="run_type_select" type="select" label="Run type"> + <option value="junior">Junior</option> + <option value="senior">Senior</option> + </param> + <when value="junior"></when> + <when value="senior"> + <param name="zone_id" type="integer" value="1" label="Zone number"/> + </when> + </conditional> + <param name="sample_info" type="data" format="csv" label="Sample information"/> + </inputs> + <outputs> + <data format="csv" name="barcodes" label="Specimen-to-barcode map"/> + <data format="csv" name="labels" label="Specimen-to-label map"/> + <data format="csv" name="metadata" label="Metadata template"/> + </outputs> + <configfiles> + <configfile name="config"> +PLATE_ID="${plate_id}" +#if $run_type.run_type_select == "senior" +JUNIOR="" +ZONE_ID="${run_type.zone_id}" +#else +JUNIOR="--junior" +ZONE_ID="1" +#end if +SAMPLE_INFO="${sample_info}" + +BARCODES="${barcodes}" +LABELS="${labels}" +METADATA="${metadata}" + </configfile> + </configfiles> + <!-- The contents of the help tag is parsed as reStructuredText. Please see + help-template.rst for examples of commonly-used sections in other Galaxy + tools. --> + <help> + +.. class:: infomark + +**What it does** + +This tool parses sample information and creates a mapping of samples to +barcodes. The sample information file must contain the columns ``sampleid``, +``barcode``, and ``reverse``, and can optionally contain a ``zone`` column +also. + + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/classification-wrapper.sh Thu Feb 26 18:16:36 2015 -0500 @@ -0,0 +1,13 @@ +#!/bin/bash + +source $(dirname $0)/util.sh +source $1 + +classif_table.py \ + --specimen-map ${SPLIT_MAP} \ + --metadata-map ${LABEL_MAP} \ + --rank ${WANT_RANK} \ + --tallies-wide ${TALLIES_WIDE} \ + --by-specimen ${BY_SPECIMEN} \ + ${CLASS_DB} \ + ${BY_TAXON}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/classification.xml Thu Feb 26 18:16:36 2015 -0500 @@ -0,0 +1,104 @@ +<tool id="PHYLO_classification" name="Output classifications" version="2.1.0"> + <description>in tabular format</description> + <requirements> + <requirement type="package">yapp_env</requirement> + </requirements> + <macros> + <import>macros.xml</import> + </macros> + <version_command>echo "guppy $(guppy --version)"</version_command> + <command interpreter="bash"> + classification-wrapper.sh ${config} + </command> + <stdio> + <expand macro="basic_errors"/> + </stdio> + <inputs> + <param name="split_map" type="data" format="csv" label="Read-to-specimen map"/> + <param name="label_map" type="data" format="csv" label="Specimen-to-label map"/> + <param name="class_db" type="data" format="sqlite3" label="Placement database"/> + <param name="want_rank" type="select" label="Desired classification rank"> + <option value="species" selected="true">Species</option> + <option value="genus">Genus</option> + <option value="family">Family</option> + <option value="order">Order</option> + <option value="class">Class</option> + <option value="phylum">Phylum</option> + </param> + </inputs> + <outputs> + <data name="by_taxon" format="csv" label="By-taxon classification"/> + <data name="by_specimen" format="csv" label="By-specimen classification"/> + <data name="tallies_wide" format="csv" label="Tallies-wide classification"/> + </outputs> + <configfiles> + <configfile name="config"> +SPLIT_MAP="${split_map}" +LABEL_MAP="${label_map}" +CLASS_DB="${class_db}" +WANT_RANK="${want_rank}" + +BY_TAXON="${by_taxon}" +BY_SPECIMEN="${by_specimen}" +TALLIES_WIDE="${tallies_wide}" + </configfile> + </configfiles> + <!-- The contents of the help tag is parsed as reStructuredText. Please see + help-template.rst for examples of commonly-used sections in other Galaxy + tools. --> + <help> + +.. class:: infomark + +**What it does** + +This tool outputs the classifications made by ``pplacer`` to a tabular format +appropriate for use with R. + +----- + +**Example** + +The classifications are simply done by containment. Say clade A of the +reference tree is the smallest such that contains a given placement. The most +specific classification for that read will be the lowest common ancestor of the +taxonomic classifications for the leaves of A. If the desired classification is +more specific than that, then we get a disconnect between the desired and the +actual classification. For example, if we try to classify at the species level +and the clade LCA is a genus, then we will get a genus name. If there is +uncertainty in read placement, then there is uncertainty in classification. + +For example, here is a classification list made for one read using the tabular +output. The columns are as follows: read name, attempted rank for +classification, actual rank for classification, taxonomic identifier, and +confidence. You can see that in this example, there is some uncertainty at and +below species, but only one classification at the genus level:: + + GLKT0ZE01CQ2BU root root 1 1 + GLKT0ZE01CQ2BU below_root below_root 131567 1 + GLKT0ZE01CQ2BU superkingdom superkingdom 2 1 + GLKT0ZE01CQ2BU below_superkingdom superkingdom 2 1 + GLKT0ZE01CQ2BU below_below_superkingdom superkingdom 2 1 + GLKT0ZE01CQ2BU superphylum superkingdom 2 1 + GLKT0ZE01CQ2BU phylum phylum 1239 1 + GLKT0ZE01CQ2BU subphylum phylum 1239 1 + GLKT0ZE01CQ2BU class class 186801 1 + GLKT0ZE01CQ2BU subclass class 186801 1 + GLKT0ZE01CQ2BU order order 186802 1 + GLKT0ZE01CQ2BU below_order order 186802 1 + GLKT0ZE01CQ2BU below_below_order order 186802 1 + GLKT0ZE01CQ2BU suborder order 186802 1 + GLKT0ZE01CQ2BU family family 186804 1 + GLKT0ZE01CQ2BU below_family family 186804 1 + GLKT0ZE01CQ2BU genus genus 1257 1 + GLKT0ZE01CQ2BU species_group genus 1257 1 + GLKT0ZE01CQ2BU species_subgroup genus 1257 1 + GLKT0ZE01CQ2BU species genus 1257 0.0732247 + GLKT0ZE01CQ2BU species species 1261 0.853561 + GLKT0ZE01CQ2BU species species 341694 0.073214 + GLKT0ZE01CQ2BU below_species genus 1257 0.0732247 + GLKT0ZE01CQ2BU below_species species 1261 0.853561 + GLKT0ZE01CQ2BU below_species species 341694 0.073214 + + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/datatypes_conf.xml Thu Feb 26 18:16:36 2015 -0500 @@ -0,0 +1,22 @@ +<?xml version="1.0"?> +<datatypes> + <datatype_files> + <datatype_file name="pplacer.py"/> + <datatype_file name="taxtastic.py"/> + </datatype_files> + <registration> + <datatype extension="jplace" type="galaxy.datatypes.pplacer:Jplace" mimetype="application/json" display_in_upload="True"/> + <datatype extension="refpkg" type="galaxy.datatypes.taxtastic:Refpkg" display_in_upload="False"/> + <datatype extension="refpkg.zip" type="galaxy.datatypes.taxtastic:RefpkgZip" mimetype="application/zip" display_in_upload="True"> + <converter file="refpkgzip_to_refpkg.xml" target_datatype="refpkg" depends_on="unzip"/> + </datatype> + <datatype extension="xlsx" type="galaxy.datatypes.taxtastic:OfficeXlsx" mimetype="application/zip" display_in_upload="True"> + <converter file="xlsx_to_csv.xml" target_datatype="csv"/> + </datatype> + </registration> + <sniffers> + <sniffer type="galaxy.datatypes.pplacer:Jplace"/> + <sniffer type="galaxy.datatypes.taxtastic:RefpkgZip"/> + <sniffer type="galaxy.datatypes.taxtastic:OfficeXlsx"/> + </sniffers> +</datatypes>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/decorate-wrapper.sh Thu Feb 26 18:16:36 2015 -0500 @@ -0,0 +1,10 @@ +#!/bin/bash + +source $(dirname $0)/util.sh +source $1 + +csvcut -c "specimen,${COLUMNS}" ${METADATA} | \ + csvjoin -c "specimen" ${GROUP_BY_SPECIMEN} - > ${DECORATED_GROUP_BY_SPECIMEN} + +# drop duplicate columns (thanks, Erick!) +#csvcut -c $(head -n 1 addresses.csv | sed "s/,/\n/g" | sort |uniq | paste -s -d",")
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/decorate.xml Thu Feb 26 18:16:36 2015 -0500 @@ -0,0 +1,46 @@ +<tool id="PHYLO_decorate" name="Decorate" version="1.0.0"> + <description>classification results with sample metadata</description> + <requirements> + <requirement type="package">yapp_env</requirement> + </requirements> + <macros> + <import>macros.xml</import> + </macros> + <version_command>echo "decorate script 1.0.0"</version_command> + <command interpreter="bash"> + decorate-wrapper.sh ${config} + </command> + <stdio> + <expand macro="basic_errors"/> + </stdio> + <inputs> + <param name="group_by_specimen" type="data" format="csv" label="Grouped-by-specimen classification"/> + <param name="metadata" type="data" format="csv" label="Sample metadata"/> + <param name="columns" type="text" label="Comma-separated metadata columns" area="True" size="5x40"/> + </inputs> + <outputs> + <data name="decorated_group_by_specimen" format="csv" label="Decorated grouped-by-specimen classification"/> + </outputs> + <configfiles> + <configfile name="config"> +GROUP_BY_SPECIMEN="${group_by_specimen}" +METADATA="${metadata}" +COLUMNS="${columns}" + +DECORATED_GROUP_BY_SPECIMEN="${decorated_group_by_specimen}" + </configfile> + </configfiles> + <!-- The contents of the help tag is parsed as reStructuredText. Please see + help-template.rst for examples of commonly-used sections in other Galaxy + tools. --> + <help> + +.. class:: infomark + +**What it does** + +This tool joins the classifications made by ``pplacer`` with arbitrary sample +metadata. + + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/filter-wrapper.sh Thu Feb 26 18:16:36 2015 -0500 @@ -0,0 +1,48 @@ +#!/bin/bash + +source $(dirname $0)/util.sh +source $1 + +INPUT_QUAL=$(extify qual ${INPUT_QUAL}) +BARCODES=$(extify csv ${BARCODES}) +RAW_SEQS=$(extify fasta ${RAW_SEQS}) + +seqmagick quality-filter \ + --input-qual ${INPUT_QUAL} \ + --barcode-file ${BARCODES} \ + --primer "${PRIMER}" \ + --report-out ${FILTER_REPORT} \ + --details-out ${FILTER_DETAILS} \ + --map-out ${SPLIT_MAP} \ + --barcode-header \ + --min-length ${MIN_LENGTH} \ + --min-mean-quality ${MIN_QUALITY} \ + --quality-window 30 \ + --quality-window-prop 0.9 \ + --quality-window-mean-qual 15 \ + ${RAW_SEQS} \ + filtered.fasta + +if [[ ${REVERSE_COMPLEMENT} == "TRUE" ]]; then + seqmagick mogrify \ + --reverse-complement \ + filtered.fasta +fi + +mv filtered.fasta ${FILTERED_SEQS} + +# TODO: separate tool for concatenating seq data (and reverse complementing them?) +#cat [12]*Reads.fasta | seqmagick convert --input-format fasta - combined.fasta --reverse-complement +#cat [12]*.map.csv > combined.map.csv + +sequencing_quality_report.py ${PLATE_JSON} -t "Sequencing quality report" -o ${SQR_DIR} + +cat <<EOF > ${SQR} +<!DOCTYPE HTML> +<html lang="en-US"> + <head/> + <body> + <a href="index.html">Sequencing quality report</a> + </body> +</html> +EOF
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/filter.xml Thu Feb 26 18:16:36 2015 -0500 @@ -0,0 +1,103 @@ +<tool id="PHYLO_filter" name="Filter and trim" version="1.2.0"> + <description>sequences</description> + <requirements> + <requirement type="package">yapp_env</requirement> + </requirements> + <macros> + <import>macros.xml</import> + </macros> + <version_command>seqmagick --version</version_command> + <command interpreter="bash"> + filter-wrapper.sh ${config} + </command> + <stdio> + <expand macro="basic_errors"/> + </stdio> + <inputs> + <!-- TODO: can take either fasta+qual or fastq --> + <param name="plate_id" type="integer" value="1" label="Plate number"/> + <param name="zone_id" type="integer" value="1" label="Zone number"/> + <param name="raw_seqs" type="data" format="fasta" label="Unfiltered sequences"/> + <param name="input_qual" type="data" format="qual" label="Sequence quality data"/> + <!-- TODO: handle MID format for multi-sample sequencing; see http://qiime.org/scripts/split_libraries.html --> + <param name="barcodes" type="data" format="csv" label="Barcodes"/> + <param name="primer" type="text" label="Primer" value="GCGGACTACCVGGGTATCTAAT" area="True" size="1x40"/> + <param name="min_length" type="integer" min="100" max="1000" value="350" label="Minimum sequence length"/> + <param name="min_quality" type="integer" min="0" max="63" value="35" label="Minimum mean sequence quality"/> + <param name="reverse_complement" type="boolean" truevalue="TRUE" falsevalue="FALSE" label="Reads uniformly correspond to negative strands"/> + </inputs> + <outputs> + <data name="filtered_seqs" format="fasta" label="Filtered sequences"/> + <data name="filter_report" format="tabular" label="Filtering report"/> + <data name="filter_details" format="data" label="Filtering details"/> + <data name="split_map" format="csv" label="Read-to-specimen map"/> + <data name="seq_qual_report" format="html" label="Sequence quality report"/> + </outputs> + <configfiles> + <configfile name="plate_json"> +{ + "plate": ${plate_id}, + "name": "Plate ${plate_id}", + "zones": [ + { + "zone": ${zone_id}, + "cleaning_stats": "${filter_details}" + } + ] +} + </configfile> + <configfile name="config"> +RAW_SEQS="${raw_seqs}" +INPUT_QUAL="${input_qual}" +BARCODES="${barcodes}" +PRIMER="${primer}" +MIN_LENGTH="${min_length}" +MIN_QUALITY="${min_quality}" +REVERSE_COMPLEMENT="${reverse_complement}" +PLATE_JSON="${plate_json}" + +FILTERED_SEQS="${filtered_seqs}" +FILTER_REPORT="${filter_report}" +FILTER_DETAILS="${filter_details}" +SPLIT_MAP="${split_map}" +SQR="${seq_qual_report}" +SQR_DIR="${seq_qual_report.files_path}" + </configfile> + </configfiles> + <!-- The contents of the help tag is parsed as reStructuredText. Please see + help-template.rst for examples of commonly-used sections in other Galaxy + tools. --> + <help> + +.. class:: infomark + +**What it does** + +This tool truncates and removes sequences that don’t match a set of quality +criteria, as well as mapping sequence barcodes to specimens. It takes input +sequences in FASTA format and a quality file, and outputs the filtered +sequences as well as a filtering summary and a sequence quality report. + +The default quality filter settings are: + ++---------------------------+------+ +|parameter |value | ++===========================+======+ +|--min-length |350 | ++---------------------------+------+ +|--min-mean-quality |35 | ++---------------------------+------+ +|--quality-window |30 | ++---------------------------+------+ +|--quality-window-prop |0.9 | ++---------------------------+------+ +|--quality-window-mean-qual |15 | ++---------------------------+------+ + +See seqmagick's `quality filter documentation`_ for full explanations of these +parameters. + +.. _quality filter documentation: http://fhcrc.github.io/seqmagick/quality_filter.html + + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Thu Feb 26 18:16:36 2015 -0500 @@ -0,0 +1,8 @@ +<macros> + <macro name="basic_errors"> + <exit_code range="1:" level="fatal"/> + <regex match="error" level="fatal"/> + <regex match="traceback" level="fatal"/> + <regex match="warning" level="warning"/> + </macro> +</macros>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pplacer-wrapper.sh Thu Feb 26 18:16:36 2015 -0500 @@ -0,0 +1,13 @@ +#!/bin/bash + +source $(dirname $0)/util.sh +source $1 + +QUERY_SEQS=$(extify fasta ${QUERY_SEQS}) +PPLACER_DEFAULT_ARGS="-j ${GALAXY_SLOTS:-4} -p --inform-prior --prior-lower 0.01 --map-identity" + +pplacer \ + ${PPLACER_DEFAULT_ARGS} \ + -c ${REFPKG} \ + -o ${PLACED_SEQS} \ + ${QUERY_SEQS}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pplacer.py Thu Feb 26 18:16:36 2015 -0500 @@ -0,0 +1,54 @@ +import json +from galaxy.datatypes.data import Text +from galaxy.datatypes.images import Html + +class Jplace(Text): + file_ext = "jplace" + + def sniff(self, filename): + try: + with open(filename, "r") as f: + data = json.load(f) + if all (k in data for k in ("version", "tree", "placements", "fields")): + return True + except: + pass + + return False + + def get_mime(self): + return "application/json" + +class AutoPrimaryComposite(Html): + composite_type = "auto_primary_file" + + def __init__(self, **kwd): + Html.__init__(self, **kwd) + + def regenerate_primary_file(self,dataset): + """ + cannot do this until we are setting metadata + """ + bn = dataset.metadata.base_name + efp = dataset.extra_files_path + flist = os.listdir(efp) + rval = ['<html><head><title>Files for Composite Dataset %s</title></head><body><p/>Composite %s contains:<p/><ul>' % (dataset.name,dataset.name)] + for i,fname in enumerate(flist): + sfname = os.path.split(fname)[-1] + f,e = os.path.splitext(fname) + rval.append( '<li><a href="%s">%s</a></li>' % ( sfname, sfname) ) + rval.append( '</ul></body></html>' ) + f = file(dataset.file_name,'w') + f.write("\n".join( rval )) + f.write('\n') + f.close() + + def set_meta(self, dataset, **kwd): + Html.set_meta(self, dataset, **kwd) + self.regenerate_primary_file(dataset) + + def get_mime(self): + return "text/html" + +class BasicHtmlComposite(Html): + composite_type = "basic"
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pplacer.xml Thu Feb 26 18:16:36 2015 -0500 @@ -0,0 +1,53 @@ +<tool id="PHYLO_pplacer" name="Place aligned sequences" version="1.0.0"> + <description>on a reference tree</description> + <requirements> + <requirement type="package">yapp_env</requirement> + </requirements> + <macros> + <import>macros.xml</import> + </macros> + <version_command>echo "pplacer $(pplacer --version)"</version_command> + <command interpreter="bash"> + pplacer-wrapper.sh ${config} + </command> + <stdio> + <expand macro="basic_errors"/> + </stdio> + <inputs> + <param name="refpkg" type="data" format="refpkg" label="Reference package"/> + <param name="query_seqs" type="data" format="fasta" label="Query alignment"/> + </inputs> + <outputs> + <data name="placed_seqs" format="jplace" label="Placed sequences"/> + </outputs> + <configfiles> + <configfile name="config"> +REFPKG="${refpkg.extra_files_path}" +QUERY_SEQS="${query_seqs}" + +PLACED_SEQS="${placed_seqs}" + </configfile> + </configfiles> + <!-- The contents of the help tag is parsed as reStructuredText. Please see + help-template.rst for examples of commonly-used sections in other Galaxy + tools. --> + <help> + +.. class:: infomark + +**What it does** + +This tool places query sequences on a fixed reference phylogenetic tree +according to a reference alignment, producing taxonomic annotations which can +be used for classification and visualization. + +----- + +**Citation** + +Matsen F, Kodner R, Armbrust E V: **pplacer: linear time maximum-likelihood and +Bayesian phylogenetic placement of sequences onto a fixed reference tree**. BMC +Bioinformatics 2010, **11**:1. + + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/preclassification-wrapper.sh Thu Feb 26 18:16:36 2015 -0500 @@ -0,0 +1,30 @@ +#!/bin/bash + +source $(dirname $0)/util.sh +source $1 + +PLACED_SEQS=$(extify jplace ${PLACED_SEQS}) +NBC_SEQS=$(extify fasta ${NBC_SEQS}) + +guppy redup \ + -m \ + -d ${DEDUP_INFO} \ + -o ${REDUPED_SEQS} \ + ${PLACED_SEQS} + +REDUPED_SEQS=$(extify jplace ${REDUPED_SEQS}) + +rppr prep_db \ + -c ${REFPKG} \ + --sqlite ${CLASS_DB} + +guppy classify \ + -c ${REFPKG} \ + -j ${GALAXY_SLOTS:-4} \ + --pp \ + --sqlite ${CLASS_DB} \ + --classifier hybrid2 \ + --nbc-sequences ${NBC_SEQS} \ + ${REDUPED_SEQS} + +multiclass_concat.py --dedup-info ${DEDUP_INFO} ${CLASS_DB}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/preclassification.xml Thu Feb 26 18:16:36 2015 -0500 @@ -0,0 +1,50 @@ +<tool id="PHYLO_preclassification" name="Generate database" version="1.2.0"> + <description>for classification</description> + <requirements> + <requirement type="package">yapp_env</requirement> + </requirements> + <macros> + <import>macros.xml</import> + </macros> + <version_command>echo "guppy $(guppy --version)"</version_command> + <command interpreter="bash"> + preclassification-wrapper.sh ${config} + </command> + <stdio> + <expand macro="basic_errors"/> + </stdio> + <inputs> + <param name="dedup_info" type="data" format="csv" label="Deduplication info"/> + <param name="refpkg" type="data" format="refpkg" label="Reference package"/> + <param name="nbc_seqs" type="data" format="fasta" label="Query alignment" help="The query alignment specified here should be the same as the one passed to pplacer."/> + <param name="placed_seqs" type="data" format="jplace" label="Placements"/> + </inputs> + <outputs> + <data name="reduped_seqs" format="jplace" label="Reduped placements"/> + <data name="class_db" format="sqlite3" label="Placement database"/> + </outputs> + <configfiles> + <configfile name="config"> +DEDUP_INFO="${dedup_info}" +REFPKG="${refpkg.extra_files_path}" +NBC_SEQS="${nbc_seqs}" +PLACED_SEQS="${placed_seqs}" + +REDUPED_SEQS="${reduped_seqs}" +CLASS_DB="${class_db}" + </configfile> + </configfiles> + <!-- The contents of the help tag is parsed as reStructuredText. Please see + help-template.rst for examples of commonly-used sections in other Galaxy + tools. --> + <help> + +.. class:: infomark + +**What it does** + +This tool outputs the classifications made by ``pplacer`` to a database for use +in taxonomic classification. + + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/preprocessing-wrapper.sh Thu Feb 26 18:16:36 2015 -0500 @@ -0,0 +1,20 @@ +#!/bin/bash + +source $1 + +deduplicate_sequences.py \ + --split-map ${SPLIT_MAP} \ + --deduplicated-sequences-file ${DEDUP_INFO} \ + ${INPUT_SEQS} \ + ${DEDUP_SEQS} + +# adapted from yapp/bin/refpkg_align +ref_sto=$(taxit rp ${REFPKG} aln_sto) +profile=$(taxit rp ${REFPKG} profile) + +sto=$(mktemp -u).sto + +cmalign --cpu ${GALAXY_SLOTS:-4} -o "$sto" --sfile "${ALIGNED_SCORES}" --noprob --dnaout "$profile" "${DEDUP_SEQS}" | grep -E '^#' + +esl-alimerge --dna --outformat afa "$ref_sto" "$sto" | \ + seqmagick convert --output-format fasta --dash-gap - "${ALIGNED_SEQS}"
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/preprocessing.xml Thu Feb 26 18:16:36 2015 -0500 @@ -0,0 +1,53 @@ +<tool id="PHYLO_preprocessing" name="Preprocess sequences" version="2.0.0"> + <description>in preparation for phylogenetic placement</description> + <requirements> + <requirement type="package">yapp_env</requirement> + </requirements> + <macros> + <import>macros.xml</import> + </macros> + <version_command>echo "guppy $(guppy --version)"</version_command> + <command interpreter="bash"> + preprocessing-wrapper.sh ${config} + </command> + <stdio> + <expand macro="basic_errors"/> + </stdio> + <inputs> + <param name="refpkg" type="data" format="refpkg" label="Reference package"/> + <param name="input_seqs" type="data" format="fasta" label="Input sequences"/> + <param name="split_map" type="data" format="csv" label="Read-to-specimen map"/> + </inputs> + <outputs> + <data name="dedup_seqs" format="fasta" label="Deduplicated sequences"/> + <data name="dedup_info" format="csv" label="Deduplication info"/> + <data name="aligned_seqs" format="fasta" label="Aligned sequences"/> + <data name="aligned_scores" format="txt" label="Alignment scores"/> + </outputs> + <configfiles> + <configfile name="config"> +REFPKG="${refpkg.extra_files_path}" +INPUT_SEQS="${input_seqs}" +SPLIT_MAP="${split_map}" + +DEDUP_SEQS="${dedup_seqs}" +DEDUP_INFO="${dedup_info}" +ALIGNED_SEQS="${aligned_seqs}" +ALIGNED_SCORES="${aligned_scores}" + </configfile> + </configfiles> + <!-- The contents of the help tag is parsed as reStructuredText. Please see + help-template.rst for examples of commonly-used sections in other Galaxy + tools. --> + <help> + +.. class:: infomark + +**What it does** + +This tool aligns query sequences with the reference sequences used to make the +reference tree contained in the reference package and then merges the query and +reference sequences. + + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/refpkgzip_to_refpkg.xml Thu Feb 26 18:16:36 2015 -0500 @@ -0,0 +1,11 @@ +<tool id="CONVERTER_refpkgzip_to_refpkg" name="Convert zipped refpkg to refpkg" version="1.0.0"> + <command>unzip -o -j $input -d $output.files_path</command> + <inputs> + <param name="input" type="data" format="refpkg.zip" label="Zipped refpkg"/> + </inputs> + <outputs> + <data name="output" format="refpkg"/> + </outputs> + <help> + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/render_datatable-wrapper.sh Thu Feb 26 18:16:36 2015 -0500 @@ -0,0 +1,20 @@ +#!/bin/bash + +source $(dirname $0)/util.sh +source $1 + +mkdir -p ${OUTPUT_DIR} + +python $(dirname $0)/render_datatable.py \ + < ${INPUT} \ + > ${OUTPUT_DIR}/index.html + +cat <<EOF > ${OUTPUT} +<!DOCTYPE HTML> +<html lang="en-US"> + <head/> + <body> + <a href="index.html">Generated table</a> + </body> +</html> +EOF
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/render_datatable.py Thu Feb 26 18:16:36 2015 -0500 @@ -0,0 +1,412 @@ +#!/usr/bin/env python + +import csv +import itertools +import string +import sys + +input = sys.stdin +start_lines = input.readlines(10) +all_input = itertools.chain(iter(start_lines), input) + +def detect_delimiter(iterable, char_set): + matches = (c for c in char_set if c in iterable) + return next(matches, None) + +def detect_csv_dialect(sample): + try: + return csv.Sniffer().sniff(sample) + except: + return None + +delimiter = detect_delimiter(start_lines[0], list('\t, ')) +reader = None + +if delimiter in list('\t,'): + # try to detect csv dialect, which should neatly handle quoted separators and stuff + dialect = detect_csv_dialect(''.join(start_lines)) + if dialect: + reader = csv.reader(all_input, dialect) + +if not reader: + if delimiter in list(string.whitespace): + # use str.split() with no arguments to split on arbitrary whitespace strings + reader = (line.strip().split() for line in all_input) + else: + reader = all_input + +print """\ +<!DOCTYPE html> +<html lang="en"> + <head> + <meta http-equiv="content-type" content="text/html; charset=UTF-8"></meta> + <link href="http://netdna.bootstrapcdn.com/twitter-bootstrap/2.3.2/css/bootstrap-combined.no-icons.min.css" rel="stylesheet"> + <style> +div.dataTables_length label { + float: left; + text-align: left; +} + +div.dataTables_length select { + width: 75px; +} + +div.dataTables_filter label { + float: right; +} + +div.dataTables_info { + padding-top: 8px; +} + +div.dataTables_paginate { + float: right; + margin: 0; +} + +table.table { + clear: both; + margin-bottom: 6px !important; + max-width: none !important; +} + +table.table thead .sorting, +table.table thead .sorting_asc, +table.table thead .sorting_desc, +table.table thead .sorting_asc_disabled, +table.table thead .sorting_desc_disabled { + cursor: pointer; + *cursor: hand; +} + + +table.table thead .sorting { background: url('images/sort_both.png') no-repeat center right; } + +//table.table thead .sorting_asc { background: url('images/sort_asc.png') no-repeat center right; } +//table.table thead .sorting_desc { background: url('images/sort_desc.png') no-repeat center right; } +table.table thead .sorting_asc { background: url('http://cdn3.iconfinder.com/data/icons/fatcow/16x16_0140/bullet_arrow_up.png') no-repeat center right; } +table.table thead .sorting_desc { background: url('http://cdn3.iconfinder.com/data/icons/fatcow/16x16_0140/bullet_arrow_down.png') no-repeat center right; } + +table.table thead .sorting_asc_disabled { background: url('images/sort_asc_disabled.png') no-repeat center right; } +table.table thead .sorting_desc_disabled { background: url('images/sort_desc_disabled.png') no-repeat center right; } + +table.dataTable th:active { + outline: none; +} + +/* Scrolling */ +div.dataTables_scrollHead table { + margin-bottom: 0 !important; + border-bottom-left-radius: 0; + border-bottom-right-radius: 0; +} + +div.dataTables_scrollHead table thead tr:last-child th:first-child, +div.dataTables_scrollHead table thead tr:last-child td:first-child { + border-bottom-left-radius: 0 !important; + border-bottom-right-radius: 0 !important; +} + +div.dataTables_scrollBody table { + border-top: none; + margin-bottom: 0 !important; +} + +div.dataTables_scrollBody tbody tr:first-child th, +div.dataTables_scrollBody tbody tr:first-child td { + border-top: none; +} + +div.dataTables_scrollFoot table { + border-top: none; +} + + + + +/* + * TableTools styles + */ +.table tbody tr.active td, +.table tbody tr.active th { + background-color: #08C; + color: white; +} + +.table tbody tr.active:hover td, +.table tbody tr.active:hover th { + background-color: #0075b0 !important; +} + +.table-striped tbody tr.active:nth-child(odd) td, +.table-striped tbody tr.active:nth-child(odd) th { + background-color: #017ebc; +} + +table.DTTT_selectable tbody tr { + cursor: pointer; + *cursor: hand; +} + +div.DTTT .btn { + color: #333 !important; + font-size: 12px; +} + +div.DTTT .btn:hover { + text-decoration: none !important; +} + + +ul.DTTT_dropdown.dropdown-menu a { + color: #333 !important; /* needed only when demo_page.css is included */ +} + +ul.DTTT_dropdown.dropdown-menu li:hover a { + background-color: #0088cc; + color: white !important; +} + +/* TableTools information display */ +div.DTTT_print_info.modal { + height: 150px; + margin-top: -75px; + text-align: center; +} + +div.DTTT_print_info h6 { + font-weight: normal; + font-size: 28px; + line-height: 28px; + margin: 1em; +} + +div.DTTT_print_info p { + font-size: 14px; + line-height: 20px; +} + + + +/* + * FixedColumns styles + */ +div.DTFC_LeftHeadWrapper table, +div.DTFC_LeftFootWrapper table, +table.DTFC_Cloned tr.even { + background-color: white; +} + +div.DTFC_LeftHeadWrapper table { + margin-bottom: 0 !important; + border-top-right-radius: 0 !important; + border-bottom-left-radius: 0 !important; + border-bottom-right-radius: 0 !important; +} + +div.DTFC_LeftHeadWrapper table thead tr:last-child th:first-child, +div.DTFC_LeftHeadWrapper table thead tr:last-child td:first-child { + border-bottom-left-radius: 0 !important; + border-bottom-right-radius: 0 !important; +} + +div.DTFC_LeftBodyWrapper table { + border-top: none; + margin-bottom: 0 !important; +} + +div.DTFC_LeftBodyWrapper tbody tr:first-child th, +div.DTFC_LeftBodyWrapper tbody tr:first-child td { + border-top: none; +} + +div.DTFC_LeftFootWrapper table { + border-top: none; +} + </style> + <script type="text/javascript" language="javascript" src="http://ajax.aspnetcdn.com/ajax/jQuery/jquery-2.0.0.min.js"></script> + <script type="text/javascript" language="javascript" src="http://ajax.aspnetcdn.com/ajax/jquery.dataTables/1.9.4/jquery.dataTables.min.js"></script> + <script type="text/javascript" charset="utf-8"> +/* Set the defaults for DataTables initialisation */ +$.extend( true, $.fn.dataTable.defaults, { + "sDom": "<'row-fluid'<'span6'l><'span6'f>r>t<'row-fluid'<'span6'i><'span6'p>>", + "sPaginationType": "bootstrap", + "oLanguage": { + "sLengthMenu": "_MENU_ records per page" + } +} ); + + +/* Default class modification */ +$.extend( $.fn.dataTableExt.oStdClasses, { + "sWrapper": "dataTables_wrapper form-inline" +} ); + + +/* API method to get paging information */ +$.fn.dataTableExt.oApi.fnPagingInfo = function ( oSettings ) +{ + return { + "iStart": oSettings._iDisplayStart, + "iEnd": oSettings.fnDisplayEnd(), + "iLength": oSettings._iDisplayLength, + "iTotal": oSettings.fnRecordsTotal(), + "iFilteredTotal": oSettings.fnRecordsDisplay(), + "iPage": oSettings._iDisplayLength === -1 ? + 0 : Math.ceil( oSettings._iDisplayStart / oSettings._iDisplayLength ), + "iTotalPages": oSettings._iDisplayLength === -1 ? + 0 : Math.ceil( oSettings.fnRecordsDisplay() / oSettings._iDisplayLength ) + }; +}; + + +/* Bootstrap style pagination control */ +$.extend( $.fn.dataTableExt.oPagination, { + "bootstrap": { + "fnInit": function( oSettings, nPaging, fnDraw ) { + var oLang = oSettings.oLanguage.oPaginate; + var fnClickHandler = function ( e ) { + e.preventDefault(); + if ( oSettings.oApi._fnPageChange(oSettings, e.data.action) ) { + fnDraw( oSettings ); + } + }; + + $(nPaging).addClass('pagination').append( + '<ul>'+ + '<li class="prev disabled"><a href="#">← '+oLang.sPrevious+'</a></li>'+ + '<li class="next disabled"><a href="#">'+oLang.sNext+' → </a></li>'+ + '</ul>' + ); + var els = $('a', nPaging); + $(els[0]).bind( 'click.DT', { action: "previous" }, fnClickHandler ); + $(els[1]).bind( 'click.DT', { action: "next" }, fnClickHandler ); + }, + + "fnUpdate": function ( oSettings, fnDraw ) { + var iListLength = 5; + var oPaging = oSettings.oInstance.fnPagingInfo(); + var an = oSettings.aanFeatures.p; + var i, ien, j, sClass, iStart, iEnd, iHalf=Math.floor(iListLength/2); + + if ( oPaging.iTotalPages < iListLength) { + iStart = 1; + iEnd = oPaging.iTotalPages; + } + else if ( oPaging.iPage <= iHalf ) { + iStart = 1; + iEnd = iListLength; + } else if ( oPaging.iPage >= (oPaging.iTotalPages-iHalf) ) { + iStart = oPaging.iTotalPages - iListLength + 1; + iEnd = oPaging.iTotalPages; + } else { + iStart = oPaging.iPage - iHalf + 1; + iEnd = iStart + iListLength - 1; + } + + for ( i=0, ien=an.length ; i<ien ; i++ ) { + // Remove the middle elements + $('li:gt(0)', an[i]).filter(':not(:last)').remove(); + + // Add the new list items and their event handlers + for ( j=iStart ; j<=iEnd ; j++ ) { + sClass = (j==oPaging.iPage+1) ? 'class="active"' : ''; + $('<li '+sClass+'><a href="#">'+j+'</a></li>') + .insertBefore( $('li:last', an[i])[0] ) + .bind('click', function (e) { + e.preventDefault(); + oSettings._iDisplayStart = (parseInt($('a', this).text(),10)-1) * oPaging.iLength; + fnDraw( oSettings ); + } ); + } + + // Add / remove disabled classes from the static elements + if ( oPaging.iPage === 0 ) { + $('li:first', an[i]).addClass('disabled'); + } else { + $('li:first', an[i]).removeClass('disabled'); + } + + if ( oPaging.iPage === oPaging.iTotalPages-1 || oPaging.iTotalPages === 0 ) { + $('li:last', an[i]).addClass('disabled'); + } else { + $('li:last', an[i]).removeClass('disabled'); + } + } + } + } +} ); + + +/* + * TableTools Bootstrap compatibility + * Required TableTools 2.1+ + */ +if ( $.fn.DataTable.TableTools ) { + // Set the classes that TableTools uses to something suitable for Bootstrap + $.extend( true, $.fn.DataTable.TableTools.classes, { + "container": "DTTT btn-group", + "buttons": { + "normal": "btn", + "disabled": "disabled" + }, + "collection": { + "container": "DTTT_dropdown dropdown-menu", + "buttons": { + "normal": "", + "disabled": "disabled" + } + }, + "print": { + "info": "DTTT_print_info modal" + }, + "select": { + "row": "active" + } + } ); + + // Have the collection use a bootstrap compatible dropdown + $.extend( true, $.fn.DataTable.TableTools.DEFAULTS.oTags, { + "collection": { + "container": "ul", + "button": "li", + "liner": "a" + } + } ); +} + + +/* Table initialisation */ +$(document).ready(function() { + $('#from_csv').dataTable( { + "sDom": "<'row'<'span6'l><'span6'f>r>t<'row'<'span6'i><'span6'p>>", + "sPaginationType": "bootstrap", + "oLanguage": { + "sLengthMenu": "_MENU_ records per page" + } + } ); +} ); + </script> + </head> + <body> + <div class="container" style="margin-top: 10px"> + <table cellpadding="0" cellspacing="0" border="0" class="table table-striped table-bordered" id="from_csv"> + <thead>\ +""" + +for i, row in enumerate(reader): + if i == 0: + print "<tr><th>" + "</th><th>".join(row) + "</th></tr>" + else: + print "<tr><td>" + "</td><td>".join(row) + "</td></tr>" + + if i == 0: + print "</thead><tbody>" + +print """\ + </tbody> + </table> + </div> + </body> +</html>\ +"""
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/render_datatable.xml Thu Feb 26 18:16:36 2015 -0500 @@ -0,0 +1,63 @@ +<tool id="PHYLO_render_datatable" name="Render CSV file" version="1.1.0"> + <description>as an interactive HTML table</description> + <macros> + <import>macros.xml</import> + </macros> + <command interpreter="bash"> + render_datatable-wrapper.sh $config + </command> + <stdio> + <expand macro="basic_errors"/> + </stdio> + <inputs> + <param name="input" type="data" format="csv" label="CSV file"/> + </inputs> + <outputs> + <data format="html" name="output" label="Generated table"/> + </outputs> + <configfiles> + <configfile name="config"> +INPUT="${input}" + +OUTPUT="${output}" +OUTPUT_DIR="${output.files_path}" + </configfile> + </configfiles> + <!-- The contents of the help tag is parsed as reStructuredText. Please see + help-template.rst for examples of commonly-used sections in other Galaxy + tools. --> + <help> + +.. class:: infomark + +**What it does** + +This tool reformats a CSV file, like this:: + + "seqname","accession","tax_id","species_name","is_type" + "FM872653","FM872653","308994","Dialister propionicifaciens",0.0 + "AY331416","AY331416","239137","Candidate Division TM7 oral",0.0 + "DQ666092","DQ666092","95818_1","Candidate Division TM7 vaginal",0.0 + "S002223913","GQ900631","186802_3","bacterium BVAB3-Strain 1",0.0 + ... + +into an interactive HTML table. + +[placeholder] + ++-------------+-----------+----------+---------------------------------------+----------+ +| seqname | accession | tax_id | species_name | is_type | ++=============+===========+==========+=======================================+==========+ +| FM872653 | FM872653 | 308994 | Dialister propionicifaciens | 0.0 | ++-------------+-----------+----------+---------------------------------------+----------+ +| AY331416 | AY331416 | 239137 | Candidate Division TM7 oral | 0.0 | ++-------------+-----------+----------+---------------------------------------+----------+ +| DQ666092 | DQ666092 | 95818_1 | Candidate Division TM7 vaginal | 0.0 | ++-------------+-----------+----------+---------------------------------------+----------+ +| S002223913 | GQ900631 | 186802_3 | bacterium BVAB3-Strain 1 | 0.0 | ++-------------+-----------+----------+---------------------------------------+----------+ + +... + + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/taxtastic.py Thu Feb 26 18:16:36 2015 -0500 @@ -0,0 +1,64 @@ +import os +import zipfile +from galaxy.datatypes.binary import Binary +from galaxy.datatypes.data import Text + +class Refpkg(Text): + composite_type = "basic" + + def __init__(self, **kwd): + Text.__init__(self, **kwd) + self.add_composite_file("CONTENTS.json") + + def get_mime(self): + return "application/json" + +class RefpkgZip(Binary): + file_ext = "refpkg.zip" + + def __init__(self, **kwd): + Binary.__init__(self, **kwd) + + def sniff(self, filename): + if not zipfile.is_zipfile(filename): + return False + contains_contents_file = False + zip_file = zipfile.ZipFile(filename, "r") + for name in zip_file.namelist(): + if os.path.basename(name) == "CONTENTS.json": + contains_contents_file = True + break + zip_file.close() + if not contains_contents_file: + return False + return True + + def get_mime(self): + return "application/zip" + +class OfficeXlsx(Binary): + file_ext = "xlsx" + + def __init__(self, **kwd): + Binary.__init__(self, **kwd) + + # TODO: this should check for an xl/ directory also + def sniff(self, filename): + if not zipfile.is_zipfile(filename): + return False + contains_contents_file = False + zip_file = zipfile.ZipFile(filename, "r") + for name in zip_file.namelist(): + if os.path.basename(name) == "[Content_Types].xml": + contains_contents_file = True + break + zip_file.close() + if not contains_contents_file: + return False + return True + + def get_mime(self): + return "application/zip" + +Binary.register_sniffable_binary_format("refpkg.zip", "refpkg.zip", RefpkgZip) +Binary.register_sniffable_binary_format("xlsx", "xlsx", OfficeXlsx)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/usearch-wrapper.sh Thu Feb 26 18:16:36 2015 -0500 @@ -0,0 +1,28 @@ +#!/bin/bash + +source $(dirname $0)/util.sh +source $1 + +RDP_SEQS="/shared/silo_researcher/Matsen_F/MatsenGrp/micro_refset/rdp/10_31/tax_filter/filtered/rdp_10_31.filter.fasta" +RDP_SEQINFO="/shared/silo_researcher/Matsen_F/MatsenGrp/micro_refset/rdp/10_31/tax_filter/filtered/rdp_10_31.filter.seq_info.csv" + +sqlite3 -csv -header ${CLASS_DB} <<EOF > usearch_meta.csv +SELECT pn.name, CAST(pn.mass AS INT) count, tax_id, tax_name, taxa.rank + FROM multiclass_concat + JOIN taxa USING (tax_id) + JOIN placement_names pn USING (placement_id, name) + WHERE want_rank = 'species'; +EOF + +romp -v usearch_clusters \ + --usearch-quietly \ + --query-group tax_id \ + --query-duplication count \ + --database-name seqname \ + --database-group tax_id \ + ${INPUT_SEQS} \ + usearch_meta.csv \ + ${RDP_SEQS} \ + ${RDP_SEQINFO} \ + ${USEARCH_HITS} \ + ${USEARCH_GROUPS}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/usearch.xml Thu Feb 26 18:16:36 2015 -0500 @@ -0,0 +1,50 @@ +<tool id="PHYLO_usearch" name="Analyze sequences" version="1.1.0"> + <description>with USEARCH</description> + <macros> + <import>macros.xml</import> + </macros> + <version_command>/home/matsengrp/local/bin/usearch6_64 --version</version_command> + <command interpreter="bash"> + usearch-wrapper.sh $config + </command> + <stdio> + <expand macro="basic_errors"/> + </stdio> + <inputs> + <param name="input_seqs" type="data" format="fasta" label="Input sequences"/> + <param name="class_db" type="data" format="sqlite3" label="Placement database"/> + </inputs> + <outputs> + <data format="csv" name="usearch_hits" label="USEARCH hits"/> + <data format="csv" name="usearch_groups" label="USEARCH groups"/> + </outputs> + <configfiles> + <configfile name="config"> +INPUT_SEQS="${input_seqs}" +CLASS_DB="${class_db}" + +USEARCH_HITS="${usearch_hits}" +USEARCH_GROUPS="${usearch_groups}" + </configfile> + </configfiles> + <!-- The contents of the help tag is parsed as reStructuredText. Please see + help-template.rst for examples of commonly-used sections in other Galaxy + tools. --> + <help> + +.. class:: infomark + +**What it does** + +This tool queries large sequence databases for target sequences and assigns +those sequences to clusters. + +----- + +**Citation** + +Edgar, R C: **Search and clustering orders of magnitude faster than +BLAST**. Bioinformatics 2010, **26**:19. + + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/util.sh Thu Feb 26 18:16:36 2015 -0500 @@ -0,0 +1,52 @@ +#!/bin/bash + +extify() { + local REQ_EXT=$1 + shift + + local OUTPUT="" + local FILE + for FILE in $*; do + local BASENAME=$(basename ${FILE}) + local EXT=${BASENAME##*.} + if [[ ${EXT} != ${REQ_EXT} ]]; then + local LINK="${BASENAME%%.*}.${REQ_EXT}" + if [[ ! -f ${LINK} ]]; then + ln -s ${FILE} ${LINK} + fi + FILE="${LINK}" + fi + OUTPUT="${OUTPUT} ${FILE}" + done + + echo ${OUTPUT} +} + +# from http://www.linuxjournal.com/content/use-date-command-measure-elapsed-time +timer() { + if [[ $# -eq 0 ]]; then + echo $(date '+%s') + else + local stime=$1 + etime=$(date '+%s') + + if [[ -z "$stime" ]]; then stime=$etime; fi + + dt=$((etime - stime)) + ds=$((dt % 60)) + dm=$(((dt / 60) % 60)) + dh=$((dt / 3600)) + printf '%d:%02d:%02d' $dh $dm $ds + fi +} + +on_exit() { + echo "Elapsed time: $(timer ${START_TIME})" +} + +set -eux + +xargs -n 1 -0 < /proc/self/environ > env.log + +START_TIME=$(timer) +trap on_exit EXIT
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xlsx_to_csv.xml Thu Feb 26 18:16:36 2015 -0500 @@ -0,0 +1,21 @@ +<tool id="CONVERTER_xlsx_to_csv" name="Convert xlsx to csv" version="1.0.0"> + <requirements> + <requirement type="package">yapp_env</requirement> + </requirements> + <command>in2csv -f xlsx $input > $output</command> + <inputs> + <param name="input" type="data" format="xlsx" label="Excel spreadsheet"/> + </inputs> + <outputs> + <data name="output" format="csv"/> + </outputs> + <help> + +.. class:: infomark + +**What it does** + +This tool converts a spreadsheet in Microsoft Excel 2007 (.xlsx) format to CSV. + + </help> +</tool>