changeset 0:d4690e65afcd draft

Uploaded
author bcclaywell
date Thu, 26 Feb 2015 18:16:36 -0500
parents
children c8cc6529038c
files bootstrap-wrapper.sh bootstrap.py bootstrap.xml classification-wrapper.sh classification.xml datatypes_conf.xml decorate-wrapper.sh decorate.xml filter-wrapper.sh filter.xml macros.xml pplacer-wrapper.sh pplacer.py pplacer.xml preclassification-wrapper.sh preclassification.xml preprocessing-wrapper.sh preprocessing.xml refpkgzip_to_refpkg.xml render_datatable-wrapper.sh render_datatable.py render_datatable.xml taxtastic.py usearch-wrapper.sh usearch.xml util.sh xlsx_to_csv.xml
diffstat 27 files changed, 1512 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bootstrap-wrapper.sh	Thu Feb 26 18:16:36 2015 -0500
@@ -0,0 +1,13 @@
+#!/bin/bash
+
+source $(dirname $0)/util.sh
+source $1
+
+python $(dirname $0)/bootstrap.py \
+    --plate ${PLATE_ID} \
+    ${JUNIOR} \
+    --zone ${ZONE_ID} \
+    --barcodes ${BARCODES} \
+    --labels ${LABELS} \
+    --metadata ${METADATA} \
+    - < ${SAMPLE_INFO}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bootstrap.py	Thu Feb 26 18:16:36 2015 -0500
@@ -0,0 +1,84 @@
+#!/usr/bin/env python
+
+from __future__ import print_function
+import csv
+import sys
+import os
+import argparse
+
+def warning(*objs):
+    print("WARNING: ", *objs, file=sys.stderr)
+
+def main(arguments):
+
+    parser = argparse.ArgumentParser(arguments, description=__doc__,
+                                     formatter_class=argparse.RawDescriptionHelpFormatter)
+    parser.add_argument('infile', help = "CSV input",
+                        type = argparse.FileType('r'), default = sys.stdin)
+    parser.add_argument('--junior', help = "use junior run specimen naming convention", action = 'store_true')
+    parser.add_argument('--plate', help = "plate number", type = int, required = True)
+    parser.add_argument('--zone', help = "zone number", type = int, required = True)
+    parser.add_argument('--barcodes', help = "name of barcodes file",
+                        type = argparse.FileType('w'), default = 'barcodes.csv')
+    parser.add_argument('--labels', help = "name of labels file",
+                        type = argparse.FileType('w'), default = 'labels.csv')
+    parser.add_argument('--metadata', help = "name of metadata template file",
+                        type = argparse.FileType('w'), default = 'metadata.csv')
+
+    args = parser.parse_args(arguments)
+
+    label_key = 'sampleid'
+    primer_key = 'reverse'
+    barcode_key = 'barcode'
+    zone_key = 'zone'
+
+    fstr = "j{plate_id}{primer_id}" if args.junior else "p{plate_id}z{zone_id}{primer_id}"
+
+    reader = csv.DictReader(sys.stdin)
+
+    barcodes = csv.writer(args.barcodes)
+    labels = csv.writer(args.labels)
+    metadata = csv.writer(args.metadata)
+
+    barcodes.writerow(['stub', 'barcode'])
+    labels.writerow(['specimen', 'label'])
+    metadata.writerow(['specimen', 'plate', 'zone', 'label', 'primer'])
+
+    seen_labels = {}
+    seen_primers = {}
+
+    # TODO: add checks for duplicates, empty fields, etc., and bail if something goes wrong
+    for i, d in enumerate(reader):
+        if not all (k in d for k in (label_key, primer_key, barcode_key)):
+            return "Expected columns not found"
+
+        if zone_key in d and d[zone_key] != str(args.zone):
+            continue
+
+        label = d[label_key]
+        primer = d[primer_key]
+        barcode = d[barcode_key]
+        zone = args.zone
+
+        if not all((label, primer, barcode)):
+            # only print a warning if at least one of the fields is non-empty
+            if any((label, primer, barcode)):
+                warning("Missing required field on row {}, skipping".format(i+2))
+            continue
+
+        if label in seen_labels:
+            return "Duplicate label '{}' found on rows {} and {}".format(label, seen_labels[label]+2, i+2)
+
+        if primer in seen_primers:
+            return "Duplicate primer '{}' found on rows {} and {}".format(primer, seen_primers[primer]+2, i+2)
+
+        seen_labels[label] = i
+        seen_primers[primer] = i
+
+        specimen = fstr.format(plate_id=args.plate, zone_id=zone, primer_id=primer.strip().lower().replace('-',''))
+        barcodes.writerow([specimen, barcode])
+        labels.writerow([specimen, label])
+        metadata.writerow([specimen, args.plate, zone, label, primer])
+
+if __name__ == '__main__':
+    sys.exit(main(sys.argv[1:]))
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bootstrap.xml	Thu Feb 26 18:16:36 2015 -0500
@@ -0,0 +1,67 @@
+<tool id="PHYLO_bootstrap" name="Prepare data" version="1.1.0">
+  <description>for analysis</description>
+  <requirements>
+    <requirement type="package">yapp_env</requirement>
+  </requirements>
+  <macros>
+    <import>macros.xml</import>
+  </macros>
+  <version_command>echo "bootstrap script 1.1.0"</version_command>
+  <command interpreter="bash">
+    bootstrap-wrapper.sh $config
+  </command>
+  <stdio>
+    <expand macro="basic_errors"/>
+  </stdio>
+  <inputs>
+    <param name="plate_id" type="integer" value="1" label="Plate number"/>
+    <conditional name="run_type">
+      <param name="run_type_select" type="select" label="Run type">
+        <option value="junior">Junior</option>
+        <option value="senior">Senior</option>
+      </param>
+      <when value="junior"></when>
+      <when value="senior">
+        <param name="zone_id" type="integer" value="1" label="Zone number"/>
+      </when>
+    </conditional>
+    <param name="sample_info" type="data" format="csv" label="Sample information"/>
+  </inputs>
+  <outputs>
+    <data format="csv" name="barcodes" label="Specimen-to-barcode map"/>
+    <data format="csv" name="labels" label="Specimen-to-label map"/>
+    <data format="csv" name="metadata" label="Metadata template"/>
+  </outputs>
+  <configfiles>
+    <configfile name="config">
+PLATE_ID="${plate_id}"
+#if $run_type.run_type_select == "senior"
+JUNIOR=""
+ZONE_ID="${run_type.zone_id}"
+#else
+JUNIOR="--junior"
+ZONE_ID="1"
+#end if
+SAMPLE_INFO="${sample_info}"
+
+BARCODES="${barcodes}"
+LABELS="${labels}"
+METADATA="${metadata}"
+    </configfile>
+  </configfiles>
+  <!-- The contents of the help tag is parsed as reStructuredText. Please see
+       help-template.rst for examples of commonly-used sections in other Galaxy
+       tools. -->
+  <help>
+
+.. class:: infomark
+
+**What it does**
+
+This tool parses sample information and creates a mapping of samples to
+barcodes. The sample information file must contain the columns ``sampleid``,
+``barcode``, and ``reverse``, and can optionally contain a ``zone`` column
+also.
+
+  </help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/classification-wrapper.sh	Thu Feb 26 18:16:36 2015 -0500
@@ -0,0 +1,13 @@
+#!/bin/bash
+
+source $(dirname $0)/util.sh
+source $1
+
+classif_table.py \
+    --specimen-map ${SPLIT_MAP} \
+    --metadata-map ${LABEL_MAP} \
+    --rank ${WANT_RANK} \
+    --tallies-wide ${TALLIES_WIDE} \
+    --by-specimen ${BY_SPECIMEN} \
+    ${CLASS_DB} \
+    ${BY_TAXON}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/classification.xml	Thu Feb 26 18:16:36 2015 -0500
@@ -0,0 +1,104 @@
+<tool id="PHYLO_classification" name="Output classifications" version="2.1.0">
+  <description>in tabular format</description>
+  <requirements>
+    <requirement type="package">yapp_env</requirement>
+  </requirements>
+  <macros>
+    <import>macros.xml</import>
+  </macros>
+  <version_command>echo "guppy $(guppy --version)"</version_command>
+  <command interpreter="bash">
+    classification-wrapper.sh ${config}
+  </command>
+  <stdio>
+    <expand macro="basic_errors"/>
+  </stdio>
+  <inputs>
+    <param name="split_map" type="data" format="csv" label="Read-to-specimen map"/>
+    <param name="label_map" type="data" format="csv" label="Specimen-to-label map"/>
+    <param name="class_db" type="data" format="sqlite3" label="Placement database"/>
+    <param name="want_rank" type="select" label="Desired classification rank">
+      <option value="species" selected="true">Species</option>
+      <option value="genus">Genus</option>
+      <option value="family">Family</option>
+      <option value="order">Order</option>
+      <option value="class">Class</option>
+      <option value="phylum">Phylum</option>
+    </param>
+  </inputs>
+  <outputs>
+    <data name="by_taxon" format="csv" label="By-taxon classification"/>
+    <data name="by_specimen" format="csv" label="By-specimen classification"/>
+    <data name="tallies_wide" format="csv" label="Tallies-wide classification"/>
+  </outputs>
+  <configfiles>
+    <configfile name="config">
+SPLIT_MAP="${split_map}"
+LABEL_MAP="${label_map}"
+CLASS_DB="${class_db}"
+WANT_RANK="${want_rank}"
+
+BY_TAXON="${by_taxon}"
+BY_SPECIMEN="${by_specimen}"
+TALLIES_WIDE="${tallies_wide}"
+    </configfile>
+  </configfiles>
+  <!-- The contents of the help tag is parsed as reStructuredText. Please see
+       help-template.rst for examples of commonly-used sections in other Galaxy
+       tools. -->
+  <help>
+
+.. class:: infomark
+
+**What it does**
+
+This tool outputs the classifications made by ``pplacer`` to a tabular format
+appropriate for use with R.
+
+-----
+
+**Example**
+
+The classifications are simply done by containment. Say clade A of the
+reference tree is the smallest such that contains a given placement. The most
+specific classification for that read will be the lowest common ancestor of the
+taxonomic classifications for the leaves of A. If the desired classification is
+more specific than that, then we get a disconnect between the desired and the
+actual classification. For example, if we try to classify at the species level
+and the clade LCA is a genus, then we will get a genus name. If there is
+uncertainty in read placement, then there is uncertainty in classification.
+
+For example, here is a classification list made for one read using the tabular
+output. The columns are as follows: read name, attempted rank for
+classification, actual rank for classification, taxonomic identifier, and
+confidence. You can see that in this example, there is some uncertainty at and
+below species, but only one classification at the genus level::
+
+    GLKT0ZE01CQ2BU                      root          root       1          1
+    GLKT0ZE01CQ2BU                below_root    below_root  131567          1
+    GLKT0ZE01CQ2BU              superkingdom  superkingdom       2          1
+    GLKT0ZE01CQ2BU        below_superkingdom  superkingdom       2          1
+    GLKT0ZE01CQ2BU  below_below_superkingdom  superkingdom       2          1
+    GLKT0ZE01CQ2BU               superphylum  superkingdom       2          1
+    GLKT0ZE01CQ2BU                    phylum        phylum    1239          1
+    GLKT0ZE01CQ2BU                 subphylum        phylum    1239          1
+    GLKT0ZE01CQ2BU                     class         class  186801          1
+    GLKT0ZE01CQ2BU                  subclass         class  186801          1
+    GLKT0ZE01CQ2BU                     order         order  186802          1
+    GLKT0ZE01CQ2BU               below_order         order  186802          1
+    GLKT0ZE01CQ2BU         below_below_order         order  186802          1
+    GLKT0ZE01CQ2BU                  suborder         order  186802          1
+    GLKT0ZE01CQ2BU                    family        family  186804          1
+    GLKT0ZE01CQ2BU              below_family        family  186804          1
+    GLKT0ZE01CQ2BU                     genus         genus    1257          1
+    GLKT0ZE01CQ2BU             species_group         genus    1257          1
+    GLKT0ZE01CQ2BU          species_subgroup         genus    1257          1
+    GLKT0ZE01CQ2BU                   species         genus    1257  0.0732247
+    GLKT0ZE01CQ2BU                   species       species    1261   0.853561
+    GLKT0ZE01CQ2BU                   species       species  341694   0.073214
+    GLKT0ZE01CQ2BU             below_species         genus    1257  0.0732247
+    GLKT0ZE01CQ2BU             below_species       species    1261   0.853561
+    GLKT0ZE01CQ2BU             below_species       species  341694   0.073214
+
+  </help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/datatypes_conf.xml	Thu Feb 26 18:16:36 2015 -0500
@@ -0,0 +1,22 @@
+<?xml version="1.0"?>
+<datatypes>
+  <datatype_files>
+    <datatype_file name="pplacer.py"/>
+    <datatype_file name="taxtastic.py"/>
+  </datatype_files>
+  <registration>
+    <datatype extension="jplace" type="galaxy.datatypes.pplacer:Jplace" mimetype="application/json" display_in_upload="True"/>
+    <datatype extension="refpkg" type="galaxy.datatypes.taxtastic:Refpkg" display_in_upload="False"/>
+    <datatype extension="refpkg.zip" type="galaxy.datatypes.taxtastic:RefpkgZip" mimetype="application/zip" display_in_upload="True">
+      <converter file="refpkgzip_to_refpkg.xml" target_datatype="refpkg" depends_on="unzip"/>
+    </datatype>
+    <datatype extension="xlsx" type="galaxy.datatypes.taxtastic:OfficeXlsx" mimetype="application/zip" display_in_upload="True">
+      <converter file="xlsx_to_csv.xml" target_datatype="csv"/>
+    </datatype>
+  </registration>
+  <sniffers>
+    <sniffer type="galaxy.datatypes.pplacer:Jplace"/>
+    <sniffer type="galaxy.datatypes.taxtastic:RefpkgZip"/>
+    <sniffer type="galaxy.datatypes.taxtastic:OfficeXlsx"/>
+  </sniffers>
+</datatypes>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/decorate-wrapper.sh	Thu Feb 26 18:16:36 2015 -0500
@@ -0,0 +1,10 @@
+#!/bin/bash
+
+source $(dirname $0)/util.sh
+source $1
+
+csvcut -c "specimen,${COLUMNS}" ${METADATA} | \
+    csvjoin -c "specimen" ${GROUP_BY_SPECIMEN} - > ${DECORATED_GROUP_BY_SPECIMEN}
+
+# drop duplicate columns (thanks, Erick!)
+#csvcut -c $(head -n 1 addresses.csv | sed "s/,/\n/g" | sort |uniq | paste -s -d",")
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/decorate.xml	Thu Feb 26 18:16:36 2015 -0500
@@ -0,0 +1,46 @@
+<tool id="PHYLO_decorate" name="Decorate" version="1.0.0">
+  <description>classification results with sample metadata</description>
+  <requirements>
+    <requirement type="package">yapp_env</requirement>
+  </requirements>
+  <macros>
+    <import>macros.xml</import>
+  </macros>
+  <version_command>echo "decorate script 1.0.0"</version_command>
+  <command interpreter="bash">
+    decorate-wrapper.sh ${config}
+  </command>
+  <stdio>
+    <expand macro="basic_errors"/>
+  </stdio>
+  <inputs>
+    <param name="group_by_specimen" type="data" format="csv" label="Grouped-by-specimen classification"/>
+    <param name="metadata" type="data" format="csv" label="Sample metadata"/>
+    <param name="columns" type="text" label="Comma-separated metadata columns" area="True" size="5x40"/>
+  </inputs>
+  <outputs>
+    <data name="decorated_group_by_specimen" format="csv" label="Decorated grouped-by-specimen classification"/>
+  </outputs>
+  <configfiles>
+    <configfile name="config">
+GROUP_BY_SPECIMEN="${group_by_specimen}"
+METADATA="${metadata}"
+COLUMNS="${columns}"
+
+DECORATED_GROUP_BY_SPECIMEN="${decorated_group_by_specimen}"
+    </configfile>
+  </configfiles>
+  <!-- The contents of the help tag is parsed as reStructuredText. Please see
+       help-template.rst for examples of commonly-used sections in other Galaxy
+       tools. -->
+  <help>
+
+.. class:: infomark
+
+**What it does**
+
+This tool joins the classifications made by ``pplacer`` with arbitrary sample
+metadata.
+
+  </help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/filter-wrapper.sh	Thu Feb 26 18:16:36 2015 -0500
@@ -0,0 +1,48 @@
+#!/bin/bash
+
+source $(dirname $0)/util.sh
+source $1
+
+INPUT_QUAL=$(extify qual ${INPUT_QUAL})
+BARCODES=$(extify csv ${BARCODES})
+RAW_SEQS=$(extify fasta ${RAW_SEQS})
+
+seqmagick quality-filter \
+    --input-qual ${INPUT_QUAL} \
+    --barcode-file ${BARCODES} \
+    --primer "${PRIMER}" \
+    --report-out ${FILTER_REPORT} \
+    --details-out ${FILTER_DETAILS} \
+    --map-out ${SPLIT_MAP} \
+    --barcode-header \
+    --min-length ${MIN_LENGTH} \
+    --min-mean-quality ${MIN_QUALITY} \
+    --quality-window 30 \
+    --quality-window-prop 0.9 \
+    --quality-window-mean-qual 15 \
+    ${RAW_SEQS} \
+    filtered.fasta
+
+if [[ ${REVERSE_COMPLEMENT} == "TRUE" ]]; then
+    seqmagick mogrify \
+        --reverse-complement \
+        filtered.fasta
+fi
+
+mv filtered.fasta ${FILTERED_SEQS}
+
+# TODO: separate tool for concatenating seq data (and reverse complementing them?)
+#cat [12]*Reads.fasta | seqmagick convert --input-format fasta - combined.fasta --reverse-complement
+#cat [12]*.map.csv > combined.map.csv
+
+sequencing_quality_report.py ${PLATE_JSON} -t "Sequencing quality report" -o ${SQR_DIR}
+
+cat <<EOF > ${SQR}
+<!DOCTYPE HTML>
+<html lang="en-US">
+  <head/>
+  <body>
+    <a href="index.html">Sequencing quality report</a>
+  </body>
+</html>
+EOF
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/filter.xml	Thu Feb 26 18:16:36 2015 -0500
@@ -0,0 +1,103 @@
+<tool id="PHYLO_filter" name="Filter and trim" version="1.2.0">
+  <description>sequences</description>
+  <requirements>
+    <requirement type="package">yapp_env</requirement>
+  </requirements>
+  <macros>
+    <import>macros.xml</import>
+  </macros>
+  <version_command>seqmagick --version</version_command>
+  <command interpreter="bash">
+    filter-wrapper.sh ${config}
+  </command>
+  <stdio>
+    <expand macro="basic_errors"/>
+  </stdio>
+  <inputs>
+    <!-- TODO: can take either fasta+qual or fastq -->
+    <param name="plate_id" type="integer" value="1" label="Plate number"/>
+    <param name="zone_id" type="integer" value="1" label="Zone number"/>
+    <param name="raw_seqs" type="data" format="fasta" label="Unfiltered sequences"/>
+    <param name="input_qual" type="data" format="qual" label="Sequence quality data"/>
+    <!-- TODO: handle MID format for multi-sample sequencing; see http://qiime.org/scripts/split_libraries.html -->
+    <param name="barcodes" type="data" format="csv" label="Barcodes"/>
+    <param name="primer" type="text" label="Primer" value="GCGGACTACCVGGGTATCTAAT" area="True" size="1x40"/>
+    <param name="min_length" type="integer" min="100" max="1000" value="350" label="Minimum sequence length"/>
+    <param name="min_quality" type="integer" min="0" max="63" value="35" label="Minimum mean sequence quality"/>
+    <param name="reverse_complement" type="boolean" truevalue="TRUE" falsevalue="FALSE" label="Reads uniformly correspond to negative strands"/>
+  </inputs>
+  <outputs>
+    <data name="filtered_seqs" format="fasta" label="Filtered sequences"/>
+    <data name="filter_report" format="tabular" label="Filtering report"/>
+    <data name="filter_details" format="data" label="Filtering details"/>
+    <data name="split_map" format="csv" label="Read-to-specimen map"/>
+    <data name="seq_qual_report" format="html" label="Sequence quality report"/>
+  </outputs>
+  <configfiles>
+    <configfile name="plate_json">
+{
+  "plate": ${plate_id},
+  "name": "Plate ${plate_id}",
+  "zones": [
+    {
+      "zone": ${zone_id},
+      "cleaning_stats": "${filter_details}"
+    }
+  ]
+}
+    </configfile>
+    <configfile name="config">
+RAW_SEQS="${raw_seqs}"
+INPUT_QUAL="${input_qual}"
+BARCODES="${barcodes}"
+PRIMER="${primer}"
+MIN_LENGTH="${min_length}"
+MIN_QUALITY="${min_quality}"
+REVERSE_COMPLEMENT="${reverse_complement}"
+PLATE_JSON="${plate_json}"
+
+FILTERED_SEQS="${filtered_seqs}"
+FILTER_REPORT="${filter_report}"
+FILTER_DETAILS="${filter_details}"
+SPLIT_MAP="${split_map}"
+SQR="${seq_qual_report}"
+SQR_DIR="${seq_qual_report.files_path}"
+    </configfile>
+  </configfiles>
+  <!-- The contents of the help tag is parsed as reStructuredText. Please see
+       help-template.rst for examples of commonly-used sections in other Galaxy
+       tools. -->
+  <help>
+
+.. class:: infomark
+
+**What it does**
+
+This tool truncates and removes sequences that don’t match a set of quality
+criteria, as well as mapping sequence barcodes to specimens. It takes input
+sequences in FASTA format and a quality file, and outputs the filtered
+sequences as well as a filtering summary and a sequence quality report.
+
+The default quality filter settings are:
+
++---------------------------+------+
+|parameter                  |value |
++===========================+======+
+|--min-length               |350   |
++---------------------------+------+
+|--min-mean-quality         |35    |
++---------------------------+------+
+|--quality-window           |30    |
++---------------------------+------+
+|--quality-window-prop      |0.9   |
++---------------------------+------+
+|--quality-window-mean-qual |15    |
++---------------------------+------+
+
+See seqmagick's `quality filter documentation`_ for full explanations of these
+parameters.
+
+.. _quality filter documentation: http://fhcrc.github.io/seqmagick/quality_filter.html
+
+  </help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml	Thu Feb 26 18:16:36 2015 -0500
@@ -0,0 +1,8 @@
+<macros>
+  <macro name="basic_errors">
+    <exit_code range="1:" level="fatal"/>
+    <regex match="error" level="fatal"/>
+    <regex match="traceback" level="fatal"/>
+    <regex match="warning" level="warning"/>
+  </macro>
+</macros>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pplacer-wrapper.sh	Thu Feb 26 18:16:36 2015 -0500
@@ -0,0 +1,13 @@
+#!/bin/bash
+
+source $(dirname $0)/util.sh
+source $1
+
+QUERY_SEQS=$(extify fasta ${QUERY_SEQS})
+PPLACER_DEFAULT_ARGS="-j ${GALAXY_SLOTS:-4} -p --inform-prior --prior-lower 0.01 --map-identity"
+
+pplacer \
+    ${PPLACER_DEFAULT_ARGS} \
+    -c ${REFPKG} \
+    -o ${PLACED_SEQS} \
+    ${QUERY_SEQS}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pplacer.py	Thu Feb 26 18:16:36 2015 -0500
@@ -0,0 +1,54 @@
+import json
+from galaxy.datatypes.data import Text
+from galaxy.datatypes.images import Html
+
+class Jplace(Text):
+    file_ext = "jplace"
+
+    def sniff(self, filename):
+        try:
+            with open(filename, "r") as f:
+                data = json.load(f)
+                if all (k in data for k in ("version", "tree", "placements", "fields")):
+                    return True
+        except:
+            pass
+
+        return False
+
+    def get_mime(self):
+        return "application/json"
+
+class AutoPrimaryComposite(Html):
+    composite_type = "auto_primary_file"
+
+    def __init__(self, **kwd):
+        Html.__init__(self, **kwd)
+
+    def regenerate_primary_file(self,dataset):
+        """
+        cannot do this until we are setting metadata
+        """
+        bn = dataset.metadata.base_name
+        efp = dataset.extra_files_path
+        flist = os.listdir(efp)
+        rval = ['<html><head><title>Files for Composite Dataset %s</title></head><body><p/>Composite %s contains:<p/><ul>' % (dataset.name,dataset.name)]
+        for i,fname in enumerate(flist):
+            sfname = os.path.split(fname)[-1]
+            f,e = os.path.splitext(fname)
+            rval.append( '<li><a href="%s">%s</a></li>' % ( sfname, sfname) )
+        rval.append( '</ul></body></html>' )
+        f = file(dataset.file_name,'w')
+        f.write("\n".join( rval ))
+        f.write('\n')
+        f.close()
+
+    def set_meta(self, dataset, **kwd):
+        Html.set_meta(self, dataset, **kwd)
+        self.regenerate_primary_file(dataset)
+
+    def get_mime(self):
+        return "text/html"
+
+class BasicHtmlComposite(Html):
+    composite_type = "basic"
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pplacer.xml	Thu Feb 26 18:16:36 2015 -0500
@@ -0,0 +1,53 @@
+<tool id="PHYLO_pplacer" name="Place aligned sequences" version="1.0.0">
+  <description>on a reference tree</description>
+  <requirements>
+    <requirement type="package">yapp_env</requirement>
+  </requirements>
+  <macros>
+    <import>macros.xml</import>
+  </macros>
+  <version_command>echo "pplacer $(pplacer --version)"</version_command>
+  <command interpreter="bash">
+    pplacer-wrapper.sh ${config}
+  </command>
+  <stdio>
+    <expand macro="basic_errors"/>
+  </stdio>
+  <inputs>
+    <param name="refpkg" type="data" format="refpkg" label="Reference package"/>
+    <param name="query_seqs" type="data" format="fasta" label="Query alignment"/>
+  </inputs>
+  <outputs>
+    <data name="placed_seqs" format="jplace" label="Placed sequences"/>
+  </outputs>
+  <configfiles>
+    <configfile name="config">
+REFPKG="${refpkg.extra_files_path}"
+QUERY_SEQS="${query_seqs}"
+
+PLACED_SEQS="${placed_seqs}"
+    </configfile>
+  </configfiles>
+  <!-- The contents of the help tag is parsed as reStructuredText. Please see
+       help-template.rst for examples of commonly-used sections in other Galaxy
+       tools. -->
+  <help>
+
+.. class:: infomark
+
+**What it does**
+
+This tool places query sequences on a fixed reference phylogenetic tree
+according to a reference alignment, producing taxonomic annotations which can
+be used for classification and visualization.
+
+-----
+
+**Citation**
+
+Matsen F, Kodner R, Armbrust E V: **pplacer: linear time maximum-likelihood and
+Bayesian phylogenetic placement of sequences onto a fixed reference tree**. BMC
+Bioinformatics 2010, **11**:1.
+
+  </help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/preclassification-wrapper.sh	Thu Feb 26 18:16:36 2015 -0500
@@ -0,0 +1,30 @@
+#!/bin/bash
+
+source $(dirname $0)/util.sh
+source $1
+
+PLACED_SEQS=$(extify jplace ${PLACED_SEQS})
+NBC_SEQS=$(extify fasta ${NBC_SEQS})
+
+guppy redup \
+    -m \
+    -d ${DEDUP_INFO} \
+    -o ${REDUPED_SEQS} \
+    ${PLACED_SEQS}
+
+REDUPED_SEQS=$(extify jplace ${REDUPED_SEQS})
+
+rppr prep_db \
+    -c ${REFPKG} \
+    --sqlite ${CLASS_DB}
+
+guppy classify \
+    -c ${REFPKG} \
+    -j ${GALAXY_SLOTS:-4} \
+    --pp \
+    --sqlite ${CLASS_DB} \
+    --classifier hybrid2 \
+    --nbc-sequences ${NBC_SEQS} \
+    ${REDUPED_SEQS}
+
+multiclass_concat.py --dedup-info ${DEDUP_INFO} ${CLASS_DB}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/preclassification.xml	Thu Feb 26 18:16:36 2015 -0500
@@ -0,0 +1,50 @@
+<tool id="PHYLO_preclassification" name="Generate database" version="1.2.0">
+  <description>for classification</description>
+  <requirements>
+    <requirement type="package">yapp_env</requirement>
+  </requirements>
+  <macros>
+    <import>macros.xml</import>
+  </macros>
+  <version_command>echo "guppy $(guppy --version)"</version_command>
+  <command interpreter="bash">
+    preclassification-wrapper.sh ${config}
+  </command>
+  <stdio>
+    <expand macro="basic_errors"/>
+  </stdio>
+  <inputs>
+    <param name="dedup_info" type="data" format="csv" label="Deduplication info"/>
+    <param name="refpkg" type="data" format="refpkg" label="Reference package"/>
+    <param name="nbc_seqs" type="data" format="fasta" label="Query alignment" help="The query alignment specified here should be the same as the one passed to pplacer."/>
+    <param name="placed_seqs" type="data" format="jplace" label="Placements"/>
+  </inputs>
+  <outputs>
+    <data name="reduped_seqs" format="jplace" label="Reduped placements"/>
+    <data name="class_db" format="sqlite3" label="Placement database"/>
+  </outputs>
+  <configfiles>
+    <configfile name="config">
+DEDUP_INFO="${dedup_info}"
+REFPKG="${refpkg.extra_files_path}"
+NBC_SEQS="${nbc_seqs}"
+PLACED_SEQS="${placed_seqs}"
+
+REDUPED_SEQS="${reduped_seqs}"
+CLASS_DB="${class_db}"
+    </configfile>
+  </configfiles>
+  <!-- The contents of the help tag is parsed as reStructuredText. Please see
+       help-template.rst for examples of commonly-used sections in other Galaxy
+       tools. -->
+  <help>
+
+.. class:: infomark
+
+**What it does**
+
+This tool outputs the classifications made by ``pplacer`` to a database for use
+in taxonomic classification.
+
+  </help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/preprocessing-wrapper.sh	Thu Feb 26 18:16:36 2015 -0500
@@ -0,0 +1,20 @@
+#!/bin/bash
+
+source $1
+
+deduplicate_sequences.py \
+    --split-map ${SPLIT_MAP} \
+    --deduplicated-sequences-file ${DEDUP_INFO} \
+    ${INPUT_SEQS} \
+    ${DEDUP_SEQS}
+
+# adapted from yapp/bin/refpkg_align
+ref_sto=$(taxit rp ${REFPKG} aln_sto)
+profile=$(taxit rp ${REFPKG} profile)
+
+sto=$(mktemp -u).sto
+
+cmalign --cpu ${GALAXY_SLOTS:-4} -o "$sto" --sfile "${ALIGNED_SCORES}" --noprob --dnaout "$profile" "${DEDUP_SEQS}" | grep -E '^#'
+
+esl-alimerge --dna --outformat afa "$ref_sto" "$sto" | \
+    seqmagick convert --output-format fasta --dash-gap - "${ALIGNED_SEQS}"
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/preprocessing.xml	Thu Feb 26 18:16:36 2015 -0500
@@ -0,0 +1,53 @@
+<tool id="PHYLO_preprocessing" name="Preprocess sequences" version="2.0.0">
+  <description>in preparation for phylogenetic placement</description>
+  <requirements>
+    <requirement type="package">yapp_env</requirement>
+  </requirements>
+  <macros>
+    <import>macros.xml</import>
+  </macros>
+  <version_command>echo "guppy $(guppy --version)"</version_command>
+  <command interpreter="bash">
+    preprocessing-wrapper.sh ${config}
+  </command>
+  <stdio>
+    <expand macro="basic_errors"/>
+  </stdio>
+  <inputs>
+    <param name="refpkg" type="data" format="refpkg" label="Reference package"/>
+    <param name="input_seqs" type="data" format="fasta" label="Input sequences"/>
+    <param name="split_map" type="data" format="csv" label="Read-to-specimen map"/>
+  </inputs>
+  <outputs>
+    <data name="dedup_seqs" format="fasta" label="Deduplicated sequences"/>
+    <data name="dedup_info" format="csv" label="Deduplication info"/>
+    <data name="aligned_seqs" format="fasta" label="Aligned sequences"/>
+    <data name="aligned_scores" format="txt" label="Alignment scores"/>
+  </outputs>
+  <configfiles>
+    <configfile name="config">
+REFPKG="${refpkg.extra_files_path}"
+INPUT_SEQS="${input_seqs}"
+SPLIT_MAP="${split_map}"
+
+DEDUP_SEQS="${dedup_seqs}"
+DEDUP_INFO="${dedup_info}"
+ALIGNED_SEQS="${aligned_seqs}"
+ALIGNED_SCORES="${aligned_scores}"
+    </configfile>
+  </configfiles>
+  <!-- The contents of the help tag is parsed as reStructuredText. Please see
+       help-template.rst for examples of commonly-used sections in other Galaxy
+       tools. -->
+  <help>
+
+.. class:: infomark
+
+**What it does**
+
+This tool aligns query sequences with the reference sequences used to make the
+reference tree contained in the reference package and then merges the query and
+reference sequences.
+
+  </help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/refpkgzip_to_refpkg.xml	Thu Feb 26 18:16:36 2015 -0500
@@ -0,0 +1,11 @@
+<tool id="CONVERTER_refpkgzip_to_refpkg" name="Convert zipped refpkg to refpkg" version="1.0.0">
+  <command>unzip -o -j $input -d $output.files_path</command>
+  <inputs>
+    <param name="input" type="data" format="refpkg.zip" label="Zipped refpkg"/>
+  </inputs>
+  <outputs>
+    <data name="output" format="refpkg"/>
+  </outputs>
+  <help>
+  </help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/render_datatable-wrapper.sh	Thu Feb 26 18:16:36 2015 -0500
@@ -0,0 +1,20 @@
+#!/bin/bash
+
+source $(dirname $0)/util.sh
+source $1
+
+mkdir -p ${OUTPUT_DIR}
+
+python $(dirname $0)/render_datatable.py \
+    < ${INPUT} \
+    > ${OUTPUT_DIR}/index.html
+
+cat <<EOF > ${OUTPUT}
+<!DOCTYPE HTML>
+<html lang="en-US">
+  <head/>
+  <body>
+    <a href="index.html">Generated table</a>
+  </body>
+</html>
+EOF
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/render_datatable.py	Thu Feb 26 18:16:36 2015 -0500
@@ -0,0 +1,412 @@
+#!/usr/bin/env python
+
+import csv
+import itertools
+import string
+import sys
+
+input = sys.stdin
+start_lines = input.readlines(10)
+all_input = itertools.chain(iter(start_lines), input)
+
+def detect_delimiter(iterable, char_set):
+    matches = (c for c in char_set if c in iterable)
+    return next(matches, None)
+
+def detect_csv_dialect(sample):
+    try:
+        return csv.Sniffer().sniff(sample)
+    except:
+        return None
+
+delimiter = detect_delimiter(start_lines[0], list('\t, '))
+reader = None
+
+if delimiter in list('\t,'):
+    # try to detect csv dialect, which should neatly handle quoted separators and stuff
+    dialect = detect_csv_dialect(''.join(start_lines))
+    if dialect:
+        reader = csv.reader(all_input, dialect)
+
+if not reader:
+    if delimiter in list(string.whitespace):
+        # use str.split() with no arguments to split on arbitrary whitespace strings
+        reader = (line.strip().split() for line in all_input)
+    else:
+        reader = all_input
+
+print """\
+<!DOCTYPE html>
+<html lang="en">
+  <head>
+    <meta http-equiv="content-type" content="text/html; charset=UTF-8"></meta>
+    <link href="http://netdna.bootstrapcdn.com/twitter-bootstrap/2.3.2/css/bootstrap-combined.no-icons.min.css" rel="stylesheet">
+    <style>
+div.dataTables_length label {
+	float: left;
+	text-align: left;
+}
+
+div.dataTables_length select {
+	width: 75px;
+}
+
+div.dataTables_filter label {
+	float: right;
+}
+
+div.dataTables_info {
+	padding-top: 8px;
+}
+
+div.dataTables_paginate {
+	float: right;
+	margin: 0;
+}
+
+table.table {
+	clear: both;
+	margin-bottom: 6px !important;
+	max-width: none !important;
+}
+
+table.table thead .sorting,
+table.table thead .sorting_asc,
+table.table thead .sorting_desc,
+table.table thead .sorting_asc_disabled,
+table.table thead .sorting_desc_disabled {
+	cursor: pointer;
+	*cursor: hand;
+}
+
+
+table.table thead .sorting { background: url('images/sort_both.png') no-repeat center right; }
+
+//table.table thead .sorting_asc { background: url('images/sort_asc.png') no-repeat center right; }
+//table.table thead .sorting_desc { background: url('images/sort_desc.png') no-repeat center right; }
+table.table thead .sorting_asc { background: url('http://cdn3.iconfinder.com/data/icons/fatcow/16x16_0140/bullet_arrow_up.png') no-repeat center right; }
+table.table thead .sorting_desc { background: url('http://cdn3.iconfinder.com/data/icons/fatcow/16x16_0140/bullet_arrow_down.png') no-repeat center right; }
+
+table.table thead .sorting_asc_disabled { background: url('images/sort_asc_disabled.png') no-repeat center right; }
+table.table thead .sorting_desc_disabled { background: url('images/sort_desc_disabled.png') no-repeat center right; }
+
+table.dataTable th:active {
+	outline: none;
+}
+
+/* Scrolling */
+div.dataTables_scrollHead table {
+	margin-bottom: 0 !important;
+	border-bottom-left-radius: 0;
+	border-bottom-right-radius: 0;
+}
+
+div.dataTables_scrollHead table thead tr:last-child th:first-child,
+div.dataTables_scrollHead table thead tr:last-child td:first-child {
+	border-bottom-left-radius: 0 !important;
+	border-bottom-right-radius: 0 !important;
+}
+
+div.dataTables_scrollBody table {
+	border-top: none;
+	margin-bottom: 0 !important;
+}
+
+div.dataTables_scrollBody tbody tr:first-child th,
+div.dataTables_scrollBody tbody tr:first-child td {
+	border-top: none;
+}
+
+div.dataTables_scrollFoot table {
+	border-top: none;
+}
+
+
+
+
+/*
+ * TableTools styles
+ */
+.table tbody tr.active td,
+.table tbody tr.active th {
+	background-color: #08C;
+	color: white;
+}
+
+.table tbody tr.active:hover td,
+.table tbody tr.active:hover th {
+	background-color: #0075b0 !important;
+}
+
+.table-striped tbody tr.active:nth-child(odd) td,
+.table-striped tbody tr.active:nth-child(odd) th {
+	background-color: #017ebc;
+}
+
+table.DTTT_selectable tbody tr {
+	cursor: pointer;
+	*cursor: hand;
+}
+
+div.DTTT .btn {
+	color: #333 !important;
+	font-size: 12px;
+}
+
+div.DTTT .btn:hover {
+	text-decoration: none !important;
+}
+
+
+ul.DTTT_dropdown.dropdown-menu a {
+	color: #333 !important; /* needed only when demo_page.css is included */
+}
+
+ul.DTTT_dropdown.dropdown-menu li:hover a {
+	background-color: #0088cc;
+	color: white !important;
+}
+
+/* TableTools information display */
+div.DTTT_print_info.modal {
+	height: 150px;
+	margin-top: -75px;
+	text-align: center;
+}
+
+div.DTTT_print_info h6 {
+	font-weight: normal;
+	font-size: 28px;
+	line-height: 28px;
+	margin: 1em;
+}
+
+div.DTTT_print_info p {
+	font-size: 14px;
+	line-height: 20px;
+}
+
+
+
+/*
+ * FixedColumns styles
+ */
+div.DTFC_LeftHeadWrapper table,
+div.DTFC_LeftFootWrapper table,
+table.DTFC_Cloned tr.even {
+	background-color: white;
+}
+
+div.DTFC_LeftHeadWrapper table {
+	margin-bottom: 0 !important;
+	border-top-right-radius: 0 !important;
+	border-bottom-left-radius: 0 !important;
+	border-bottom-right-radius: 0 !important;
+}
+
+div.DTFC_LeftHeadWrapper table thead tr:last-child th:first-child,
+div.DTFC_LeftHeadWrapper table thead tr:last-child td:first-child {
+	border-bottom-left-radius: 0 !important;
+	border-bottom-right-radius: 0 !important;
+}
+
+div.DTFC_LeftBodyWrapper table {
+	border-top: none;
+	margin-bottom: 0 !important;
+}
+
+div.DTFC_LeftBodyWrapper tbody tr:first-child th,
+div.DTFC_LeftBodyWrapper tbody tr:first-child td {
+	border-top: none;
+}
+
+div.DTFC_LeftFootWrapper table {
+	border-top: none;
+}
+    </style>
+    <script type="text/javascript" language="javascript" src="http://ajax.aspnetcdn.com/ajax/jQuery/jquery-2.0.0.min.js"></script>
+    <script type="text/javascript" language="javascript" src="http://ajax.aspnetcdn.com/ajax/jquery.dataTables/1.9.4/jquery.dataTables.min.js"></script>
+    <script type="text/javascript" charset="utf-8">
+/* Set the defaults for DataTables initialisation */
+$.extend( true, $.fn.dataTable.defaults, {
+	"sDom": "<'row-fluid'<'span6'l><'span6'f>r>t<'row-fluid'<'span6'i><'span6'p>>",
+	"sPaginationType": "bootstrap",
+	"oLanguage": {
+		"sLengthMenu": "_MENU_ records per page"
+	}
+} );
+
+
+/* Default class modification */
+$.extend( $.fn.dataTableExt.oStdClasses, {
+	"sWrapper": "dataTables_wrapper form-inline"
+} );
+
+
+/* API method to get paging information */
+$.fn.dataTableExt.oApi.fnPagingInfo = function ( oSettings )
+{
+	return {
+		"iStart":         oSettings._iDisplayStart,
+		"iEnd":           oSettings.fnDisplayEnd(),
+		"iLength":        oSettings._iDisplayLength,
+		"iTotal":         oSettings.fnRecordsTotal(),
+		"iFilteredTotal": oSettings.fnRecordsDisplay(),
+		"iPage":          oSettings._iDisplayLength === -1 ?
+			0 : Math.ceil( oSettings._iDisplayStart / oSettings._iDisplayLength ),
+		"iTotalPages":    oSettings._iDisplayLength === -1 ?
+			0 : Math.ceil( oSettings.fnRecordsDisplay() / oSettings._iDisplayLength )
+	};
+};
+
+
+/* Bootstrap style pagination control */
+$.extend( $.fn.dataTableExt.oPagination, {
+	"bootstrap": {
+		"fnInit": function( oSettings, nPaging, fnDraw ) {
+			var oLang = oSettings.oLanguage.oPaginate;
+			var fnClickHandler = function ( e ) {
+				e.preventDefault();
+				if ( oSettings.oApi._fnPageChange(oSettings, e.data.action) ) {
+					fnDraw( oSettings );
+				}
+			};
+
+			$(nPaging).addClass('pagination').append(
+				'<ul>'+
+					'<li class="prev disabled"><a href="#">&larr; '+oLang.sPrevious+'</a></li>'+
+					'<li class="next disabled"><a href="#">'+oLang.sNext+' &rarr; </a></li>'+
+				'</ul>'
+			);
+			var els = $('a', nPaging);
+			$(els[0]).bind( 'click.DT', { action: "previous" }, fnClickHandler );
+			$(els[1]).bind( 'click.DT', { action: "next" }, fnClickHandler );
+		},
+
+		"fnUpdate": function ( oSettings, fnDraw ) {
+			var iListLength = 5;
+			var oPaging = oSettings.oInstance.fnPagingInfo();
+			var an = oSettings.aanFeatures.p;
+			var i, ien, j, sClass, iStart, iEnd, iHalf=Math.floor(iListLength/2);
+
+			if ( oPaging.iTotalPages < iListLength) {
+				iStart = 1;
+				iEnd = oPaging.iTotalPages;
+			}
+			else if ( oPaging.iPage <= iHalf ) {
+				iStart = 1;
+				iEnd = iListLength;
+			} else if ( oPaging.iPage >= (oPaging.iTotalPages-iHalf) ) {
+				iStart = oPaging.iTotalPages - iListLength + 1;
+				iEnd = oPaging.iTotalPages;
+			} else {
+				iStart = oPaging.iPage - iHalf + 1;
+				iEnd = iStart + iListLength - 1;
+			}
+
+			for ( i=0, ien=an.length ; i<ien ; i++ ) {
+				// Remove the middle elements
+				$('li:gt(0)', an[i]).filter(':not(:last)').remove();
+
+				// Add the new list items and their event handlers
+				for ( j=iStart ; j<=iEnd ; j++ ) {
+					sClass = (j==oPaging.iPage+1) ? 'class="active"' : '';
+					$('<li '+sClass+'><a href="#">'+j+'</a></li>')
+						.insertBefore( $('li:last', an[i])[0] )
+						.bind('click', function (e) {
+							e.preventDefault();
+							oSettings._iDisplayStart = (parseInt($('a', this).text(),10)-1) * oPaging.iLength;
+							fnDraw( oSettings );
+						} );
+				}
+
+				// Add / remove disabled classes from the static elements
+				if ( oPaging.iPage === 0 ) {
+					$('li:first', an[i]).addClass('disabled');
+				} else {
+					$('li:first', an[i]).removeClass('disabled');
+				}
+
+				if ( oPaging.iPage === oPaging.iTotalPages-1 || oPaging.iTotalPages === 0 ) {
+					$('li:last', an[i]).addClass('disabled');
+				} else {
+					$('li:last', an[i]).removeClass('disabled');
+				}
+			}
+		}
+	}
+} );
+
+
+/*
+ * TableTools Bootstrap compatibility
+ * Required TableTools 2.1+
+ */
+if ( $.fn.DataTable.TableTools ) {
+	// Set the classes that TableTools uses to something suitable for Bootstrap
+	$.extend( true, $.fn.DataTable.TableTools.classes, {
+		"container": "DTTT btn-group",
+		"buttons": {
+			"normal": "btn",
+			"disabled": "disabled"
+		},
+		"collection": {
+			"container": "DTTT_dropdown dropdown-menu",
+			"buttons": {
+				"normal": "",
+				"disabled": "disabled"
+			}
+		},
+		"print": {
+			"info": "DTTT_print_info modal"
+		},
+		"select": {
+			"row": "active"
+		}
+	} );
+
+	// Have the collection use a bootstrap compatible dropdown
+	$.extend( true, $.fn.DataTable.TableTools.DEFAULTS.oTags, {
+		"collection": {
+			"container": "ul",
+			"button": "li",
+			"liner": "a"
+		}
+	} );
+}
+
+
+/* Table initialisation */
+$(document).ready(function() {
+	$('#from_csv').dataTable( {
+		"sDom": "<'row'<'span6'l><'span6'f>r>t<'row'<'span6'i><'span6'p>>",
+		"sPaginationType": "bootstrap",
+		"oLanguage": {
+			"sLengthMenu": "_MENU_ records per page"
+		}
+	} );
+} );
+    </script>
+  </head>
+  <body>
+    <div class="container" style="margin-top: 10px">
+      <table cellpadding="0" cellspacing="0" border="0" class="table table-striped table-bordered" id="from_csv">
+        <thead>\
+"""
+
+for i, row in enumerate(reader):
+    if i == 0:
+        print "<tr><th>" + "</th><th>".join(row) + "</th></tr>"
+    else:
+        print "<tr><td>" + "</td><td>".join(row) + "</td></tr>"
+
+    if i == 0:
+        print "</thead><tbody>"
+
+print """\
+        </tbody>
+      </table>
+    </div>
+  </body>
+</html>\
+"""
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/render_datatable.xml	Thu Feb 26 18:16:36 2015 -0500
@@ -0,0 +1,63 @@
+<tool id="PHYLO_render_datatable" name="Render CSV file" version="1.1.0">
+  <description>as an interactive HTML table</description>
+  <macros>
+    <import>macros.xml</import>
+  </macros>
+  <command interpreter="bash">
+    render_datatable-wrapper.sh $config
+  </command>
+  <stdio>
+    <expand macro="basic_errors"/>
+  </stdio>
+  <inputs>
+    <param name="input" type="data" format="csv" label="CSV file"/>
+  </inputs>
+  <outputs>
+    <data format="html" name="output" label="Generated table"/>
+  </outputs>
+  <configfiles>
+    <configfile name="config">
+INPUT="${input}"
+
+OUTPUT="${output}"
+OUTPUT_DIR="${output.files_path}"
+    </configfile>
+  </configfiles>
+  <!-- The contents of the help tag is parsed as reStructuredText. Please see
+       help-template.rst for examples of commonly-used sections in other Galaxy
+       tools. -->
+  <help>
+
+.. class:: infomark
+
+**What it does**
+
+This tool reformats a CSV file, like this::
+
+    "seqname","accession","tax_id","species_name","is_type"
+    "FM872653","FM872653","308994","Dialister propionicifaciens",0.0
+    "AY331416","AY331416","239137","Candidate Division TM7 oral",0.0
+    "DQ666092","DQ666092","95818_1","Candidate Division TM7 vaginal",0.0
+    "S002223913","GQ900631","186802_3","bacterium BVAB3-Strain 1",0.0
+    ...
+
+into an interactive HTML table.
+
+[placeholder]
+
++-------------+-----------+----------+---------------------------------------+----------+
+|  seqname    | accession | tax_id   | species_name                          | is_type  |
++=============+===========+==========+=======================================+==========+
+|  FM872653   | FM872653  | 308994   | Dialister propionicifaciens           | 0.0      |
++-------------+-----------+----------+---------------------------------------+----------+
+|  AY331416   | AY331416  | 239137   | Candidate Division TM7 oral           | 0.0      |
++-------------+-----------+----------+---------------------------------------+----------+
+|  DQ666092   | DQ666092  | 95818_1  | Candidate Division TM7 vaginal        | 0.0      |
++-------------+-----------+----------+---------------------------------------+----------+
+|  S002223913 | GQ900631  | 186802_3 | bacterium BVAB3-Strain 1              | 0.0      |
++-------------+-----------+----------+---------------------------------------+----------+
+
+...
+
+  </help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/taxtastic.py	Thu Feb 26 18:16:36 2015 -0500
@@ -0,0 +1,64 @@
+import os
+import zipfile
+from galaxy.datatypes.binary import Binary
+from galaxy.datatypes.data import Text
+
+class Refpkg(Text):
+    composite_type = "basic"
+
+    def __init__(self, **kwd):
+        Text.__init__(self, **kwd)
+        self.add_composite_file("CONTENTS.json")
+
+    def get_mime(self):
+        return "application/json"
+
+class RefpkgZip(Binary):
+    file_ext = "refpkg.zip"
+
+    def __init__(self, **kwd):
+        Binary.__init__(self, **kwd)
+
+    def sniff(self, filename):
+        if not zipfile.is_zipfile(filename):
+            return False
+        contains_contents_file = False
+        zip_file = zipfile.ZipFile(filename, "r")
+        for name in zip_file.namelist():
+            if os.path.basename(name) == "CONTENTS.json":
+                contains_contents_file = True
+                break
+        zip_file.close()
+        if not contains_contents_file:
+            return False
+        return True
+
+    def get_mime(self):
+        return "application/zip"
+
+class OfficeXlsx(Binary):
+    file_ext = "xlsx"
+
+    def __init__(self, **kwd):
+        Binary.__init__(self, **kwd)
+
+    # TODO: this should check for an xl/ directory also
+    def sniff(self, filename):
+        if not zipfile.is_zipfile(filename):
+            return False
+        contains_contents_file = False
+        zip_file = zipfile.ZipFile(filename, "r")
+        for name in zip_file.namelist():
+            if os.path.basename(name) == "[Content_Types].xml":
+                contains_contents_file = True
+                break
+        zip_file.close()
+        if not contains_contents_file:
+            return False
+        return True
+
+    def get_mime(self):
+        return "application/zip"
+
+Binary.register_sniffable_binary_format("refpkg.zip", "refpkg.zip", RefpkgZip)
+Binary.register_sniffable_binary_format("xlsx", "xlsx", OfficeXlsx)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usearch-wrapper.sh	Thu Feb 26 18:16:36 2015 -0500
@@ -0,0 +1,28 @@
+#!/bin/bash
+
+source $(dirname $0)/util.sh
+source $1
+
+RDP_SEQS="/shared/silo_researcher/Matsen_F/MatsenGrp/micro_refset/rdp/10_31/tax_filter/filtered/rdp_10_31.filter.fasta"
+RDP_SEQINFO="/shared/silo_researcher/Matsen_F/MatsenGrp/micro_refset/rdp/10_31/tax_filter/filtered/rdp_10_31.filter.seq_info.csv"
+
+sqlite3 -csv -header ${CLASS_DB} <<EOF > usearch_meta.csv
+SELECT pn.name, CAST(pn.mass AS INT) count, tax_id, tax_name, taxa.rank
+  FROM multiclass_concat
+    JOIN taxa USING (tax_id)
+    JOIN placement_names pn USING (placement_id, name)
+    WHERE want_rank = 'species';
+EOF
+
+romp -v usearch_clusters \
+    --usearch-quietly \
+    --query-group tax_id \
+    --query-duplication count \
+    --database-name seqname \
+    --database-group tax_id \
+    ${INPUT_SEQS} \
+    usearch_meta.csv \
+    ${RDP_SEQS} \
+    ${RDP_SEQINFO} \
+    ${USEARCH_HITS} \
+    ${USEARCH_GROUPS}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usearch.xml	Thu Feb 26 18:16:36 2015 -0500
@@ -0,0 +1,50 @@
+<tool id="PHYLO_usearch" name="Analyze sequences" version="1.1.0">
+  <description>with USEARCH</description>
+  <macros>
+    <import>macros.xml</import>
+  </macros>
+  <version_command>/home/matsengrp/local/bin/usearch6_64 --version</version_command>
+  <command interpreter="bash">
+    usearch-wrapper.sh $config
+  </command>
+  <stdio>
+    <expand macro="basic_errors"/>
+  </stdio>
+  <inputs>
+    <param name="input_seqs" type="data" format="fasta" label="Input sequences"/>
+    <param name="class_db" type="data" format="sqlite3" label="Placement database"/>
+  </inputs>
+  <outputs>
+    <data format="csv" name="usearch_hits" label="USEARCH hits"/>
+    <data format="csv" name="usearch_groups" label="USEARCH groups"/>
+  </outputs>
+  <configfiles>
+    <configfile name="config">
+INPUT_SEQS="${input_seqs}"
+CLASS_DB="${class_db}"
+
+USEARCH_HITS="${usearch_hits}"
+USEARCH_GROUPS="${usearch_groups}"
+    </configfile>
+  </configfiles>
+  <!-- The contents of the help tag is parsed as reStructuredText. Please see
+       help-template.rst for examples of commonly-used sections in other Galaxy
+       tools. -->
+  <help>
+
+.. class:: infomark
+
+**What it does**
+
+This tool queries large sequence databases for target sequences and assigns
+those sequences to clusters.
+
+-----
+
+**Citation**
+
+Edgar, R C: **Search and clustering orders of magnitude faster than
+BLAST**. Bioinformatics 2010, **26**:19.
+
+  </help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/util.sh	Thu Feb 26 18:16:36 2015 -0500
@@ -0,0 +1,52 @@
+#!/bin/bash
+
+extify() {
+    local REQ_EXT=$1
+    shift
+
+    local OUTPUT=""
+    local FILE
+    for FILE in $*; do
+        local BASENAME=$(basename ${FILE})
+        local EXT=${BASENAME##*.}
+        if [[ ${EXT} != ${REQ_EXT} ]]; then
+            local LINK="${BASENAME%%.*}.${REQ_EXT}"
+            if [[ ! -f ${LINK} ]]; then
+                ln -s ${FILE} ${LINK}
+            fi
+            FILE="${LINK}"
+        fi
+        OUTPUT="${OUTPUT} ${FILE}"
+    done
+
+    echo ${OUTPUT}
+}
+
+# from http://www.linuxjournal.com/content/use-date-command-measure-elapsed-time
+timer() {
+    if [[ $# -eq 0 ]]; then
+        echo $(date '+%s')
+    else
+        local  stime=$1
+        etime=$(date '+%s')
+
+        if [[ -z "$stime" ]]; then stime=$etime; fi
+
+        dt=$((etime - stime))
+        ds=$((dt % 60))
+        dm=$(((dt / 60) % 60))
+        dh=$((dt / 3600))
+        printf '%d:%02d:%02d' $dh $dm $ds
+    fi
+}
+
+on_exit() {
+    echo "Elapsed time: $(timer ${START_TIME})"
+}
+
+set -eux
+
+xargs -n 1 -0 < /proc/self/environ > env.log
+
+START_TIME=$(timer)
+trap on_exit EXIT
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/xlsx_to_csv.xml	Thu Feb 26 18:16:36 2015 -0500
@@ -0,0 +1,21 @@
+<tool id="CONVERTER_xlsx_to_csv" name="Convert xlsx to csv" version="1.0.0">
+  <requirements>
+    <requirement type="package">yapp_env</requirement>
+  </requirements>
+  <command>in2csv -f xlsx $input > $output</command>
+  <inputs>
+    <param name="input" type="data" format="xlsx" label="Excel spreadsheet"/>
+  </inputs>
+  <outputs>
+    <data name="output" format="csv"/>
+  </outputs>
+  <help>
+
+.. class:: infomark
+
+**What it does**
+
+This tool converts a spreadsheet in Microsoft Excel 2007 (.xlsx) format to CSV.
+
+  </help>
+</tool>