Mercurial > repos > bcclaywell > microbiome_pplacer_suite
changeset 1:c8cc6529038c draft
Uploaded
author | bcclaywell |
---|---|
date | Thu, 26 Feb 2015 19:30:44 -0500 |
parents | d4690e65afcd |
children | ce6db18f5fd3 |
files | bootstrap-wrapper.sh bootstrap.py bootstrap.xml classification-wrapper.sh classification.xml datatypes_conf.xml decorate-wrapper.sh decorate.xml filter-wrapper.sh filter.xml macros.xml pplacer-wrapper.sh pplacer.py pplacer.xml preclassification-wrapper.sh preclassification.xml preprocessing-wrapper.sh preprocessing.xml refpkgzip_to_refpkg.xml render_datatable-wrapper.sh render_datatable.py render_datatable.xml taxtastic.py usearch-wrapper.sh usearch.xml util.sh xlsx_to_csv.xml |
diffstat | 27 files changed, 0 insertions(+), 1512 deletions(-) [+] |
line wrap: on
line diff
--- a/bootstrap-wrapper.sh Thu Feb 26 18:16:36 2015 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,13 +0,0 @@ -#!/bin/bash - -source $(dirname $0)/util.sh -source $1 - -python $(dirname $0)/bootstrap.py \ - --plate ${PLATE_ID} \ - ${JUNIOR} \ - --zone ${ZONE_ID} \ - --barcodes ${BARCODES} \ - --labels ${LABELS} \ - --metadata ${METADATA} \ - - < ${SAMPLE_INFO}
--- a/bootstrap.py Thu Feb 26 18:16:36 2015 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,84 +0,0 @@ -#!/usr/bin/env python - -from __future__ import print_function -import csv -import sys -import os -import argparse - -def warning(*objs): - print("WARNING: ", *objs, file=sys.stderr) - -def main(arguments): - - parser = argparse.ArgumentParser(arguments, description=__doc__, - formatter_class=argparse.RawDescriptionHelpFormatter) - parser.add_argument('infile', help = "CSV input", - type = argparse.FileType('r'), default = sys.stdin) - parser.add_argument('--junior', help = "use junior run specimen naming convention", action = 'store_true') - parser.add_argument('--plate', help = "plate number", type = int, required = True) - parser.add_argument('--zone', help = "zone number", type = int, required = True) - parser.add_argument('--barcodes', help = "name of barcodes file", - type = argparse.FileType('w'), default = 'barcodes.csv') - parser.add_argument('--labels', help = "name of labels file", - type = argparse.FileType('w'), default = 'labels.csv') - parser.add_argument('--metadata', help = "name of metadata template file", - type = argparse.FileType('w'), default = 'metadata.csv') - - args = parser.parse_args(arguments) - - label_key = 'sampleid' - primer_key = 'reverse' - barcode_key = 'barcode' - zone_key = 'zone' - - fstr = "j{plate_id}{primer_id}" if args.junior else "p{plate_id}z{zone_id}{primer_id}" - - reader = csv.DictReader(sys.stdin) - - barcodes = csv.writer(args.barcodes) - labels = csv.writer(args.labels) - metadata = csv.writer(args.metadata) - - barcodes.writerow(['stub', 'barcode']) - labels.writerow(['specimen', 'label']) - metadata.writerow(['specimen', 'plate', 'zone', 'label', 'primer']) - - seen_labels = {} - seen_primers = {} - - # TODO: add checks for duplicates, empty fields, etc., and bail if something goes wrong - for i, d in enumerate(reader): - if not all (k in d for k in (label_key, primer_key, barcode_key)): - return "Expected columns not found" - - if zone_key in d and d[zone_key] != str(args.zone): - continue - - label = d[label_key] - primer = d[primer_key] - barcode = d[barcode_key] - zone = args.zone - - if not all((label, primer, barcode)): - # only print a warning if at least one of the fields is non-empty - if any((label, primer, barcode)): - warning("Missing required field on row {}, skipping".format(i+2)) - continue - - if label in seen_labels: - return "Duplicate label '{}' found on rows {} and {}".format(label, seen_labels[label]+2, i+2) - - if primer in seen_primers: - return "Duplicate primer '{}' found on rows {} and {}".format(primer, seen_primers[primer]+2, i+2) - - seen_labels[label] = i - seen_primers[primer] = i - - specimen = fstr.format(plate_id=args.plate, zone_id=zone, primer_id=primer.strip().lower().replace('-','')) - barcodes.writerow([specimen, barcode]) - labels.writerow([specimen, label]) - metadata.writerow([specimen, args.plate, zone, label, primer]) - -if __name__ == '__main__': - sys.exit(main(sys.argv[1:]))
--- a/bootstrap.xml Thu Feb 26 18:16:36 2015 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,67 +0,0 @@ -<tool id="PHYLO_bootstrap" name="Prepare data" version="1.1.0"> - <description>for analysis</description> - <requirements> - <requirement type="package">yapp_env</requirement> - </requirements> - <macros> - <import>macros.xml</import> - </macros> - <version_command>echo "bootstrap script 1.1.0"</version_command> - <command interpreter="bash"> - bootstrap-wrapper.sh $config - </command> - <stdio> - <expand macro="basic_errors"/> - </stdio> - <inputs> - <param name="plate_id" type="integer" value="1" label="Plate number"/> - <conditional name="run_type"> - <param name="run_type_select" type="select" label="Run type"> - <option value="junior">Junior</option> - <option value="senior">Senior</option> - </param> - <when value="junior"></when> - <when value="senior"> - <param name="zone_id" type="integer" value="1" label="Zone number"/> - </when> - </conditional> - <param name="sample_info" type="data" format="csv" label="Sample information"/> - </inputs> - <outputs> - <data format="csv" name="barcodes" label="Specimen-to-barcode map"/> - <data format="csv" name="labels" label="Specimen-to-label map"/> - <data format="csv" name="metadata" label="Metadata template"/> - </outputs> - <configfiles> - <configfile name="config"> -PLATE_ID="${plate_id}" -#if $run_type.run_type_select == "senior" -JUNIOR="" -ZONE_ID="${run_type.zone_id}" -#else -JUNIOR="--junior" -ZONE_ID="1" -#end if -SAMPLE_INFO="${sample_info}" - -BARCODES="${barcodes}" -LABELS="${labels}" -METADATA="${metadata}" - </configfile> - </configfiles> - <!-- The contents of the help tag is parsed as reStructuredText. Please see - help-template.rst for examples of commonly-used sections in other Galaxy - tools. --> - <help> - -.. class:: infomark - -**What it does** - -This tool parses sample information and creates a mapping of samples to -barcodes. The sample information file must contain the columns ``sampleid``, -``barcode``, and ``reverse``, and can optionally contain a ``zone`` column -also. - - </help> -</tool>
--- a/classification-wrapper.sh Thu Feb 26 18:16:36 2015 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,13 +0,0 @@ -#!/bin/bash - -source $(dirname $0)/util.sh -source $1 - -classif_table.py \ - --specimen-map ${SPLIT_MAP} \ - --metadata-map ${LABEL_MAP} \ - --rank ${WANT_RANK} \ - --tallies-wide ${TALLIES_WIDE} \ - --by-specimen ${BY_SPECIMEN} \ - ${CLASS_DB} \ - ${BY_TAXON}
--- a/classification.xml Thu Feb 26 18:16:36 2015 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,104 +0,0 @@ -<tool id="PHYLO_classification" name="Output classifications" version="2.1.0"> - <description>in tabular format</description> - <requirements> - <requirement type="package">yapp_env</requirement> - </requirements> - <macros> - <import>macros.xml</import> - </macros> - <version_command>echo "guppy $(guppy --version)"</version_command> - <command interpreter="bash"> - classification-wrapper.sh ${config} - </command> - <stdio> - <expand macro="basic_errors"/> - </stdio> - <inputs> - <param name="split_map" type="data" format="csv" label="Read-to-specimen map"/> - <param name="label_map" type="data" format="csv" label="Specimen-to-label map"/> - <param name="class_db" type="data" format="sqlite3" label="Placement database"/> - <param name="want_rank" type="select" label="Desired classification rank"> - <option value="species" selected="true">Species</option> - <option value="genus">Genus</option> - <option value="family">Family</option> - <option value="order">Order</option> - <option value="class">Class</option> - <option value="phylum">Phylum</option> - </param> - </inputs> - <outputs> - <data name="by_taxon" format="csv" label="By-taxon classification"/> - <data name="by_specimen" format="csv" label="By-specimen classification"/> - <data name="tallies_wide" format="csv" label="Tallies-wide classification"/> - </outputs> - <configfiles> - <configfile name="config"> -SPLIT_MAP="${split_map}" -LABEL_MAP="${label_map}" -CLASS_DB="${class_db}" -WANT_RANK="${want_rank}" - -BY_TAXON="${by_taxon}" -BY_SPECIMEN="${by_specimen}" -TALLIES_WIDE="${tallies_wide}" - </configfile> - </configfiles> - <!-- The contents of the help tag is parsed as reStructuredText. Please see - help-template.rst for examples of commonly-used sections in other Galaxy - tools. --> - <help> - -.. class:: infomark - -**What it does** - -This tool outputs the classifications made by ``pplacer`` to a tabular format -appropriate for use with R. - ------ - -**Example** - -The classifications are simply done by containment. Say clade A of the -reference tree is the smallest such that contains a given placement. The most -specific classification for that read will be the lowest common ancestor of the -taxonomic classifications for the leaves of A. If the desired classification is -more specific than that, then we get a disconnect between the desired and the -actual classification. For example, if we try to classify at the species level -and the clade LCA is a genus, then we will get a genus name. If there is -uncertainty in read placement, then there is uncertainty in classification. - -For example, here is a classification list made for one read using the tabular -output. The columns are as follows: read name, attempted rank for -classification, actual rank for classification, taxonomic identifier, and -confidence. You can see that in this example, there is some uncertainty at and -below species, but only one classification at the genus level:: - - GLKT0ZE01CQ2BU root root 1 1 - GLKT0ZE01CQ2BU below_root below_root 131567 1 - GLKT0ZE01CQ2BU superkingdom superkingdom 2 1 - GLKT0ZE01CQ2BU below_superkingdom superkingdom 2 1 - GLKT0ZE01CQ2BU below_below_superkingdom superkingdom 2 1 - GLKT0ZE01CQ2BU superphylum superkingdom 2 1 - GLKT0ZE01CQ2BU phylum phylum 1239 1 - GLKT0ZE01CQ2BU subphylum phylum 1239 1 - GLKT0ZE01CQ2BU class class 186801 1 - GLKT0ZE01CQ2BU subclass class 186801 1 - GLKT0ZE01CQ2BU order order 186802 1 - GLKT0ZE01CQ2BU below_order order 186802 1 - GLKT0ZE01CQ2BU below_below_order order 186802 1 - GLKT0ZE01CQ2BU suborder order 186802 1 - GLKT0ZE01CQ2BU family family 186804 1 - GLKT0ZE01CQ2BU below_family family 186804 1 - GLKT0ZE01CQ2BU genus genus 1257 1 - GLKT0ZE01CQ2BU species_group genus 1257 1 - GLKT0ZE01CQ2BU species_subgroup genus 1257 1 - GLKT0ZE01CQ2BU species genus 1257 0.0732247 - GLKT0ZE01CQ2BU species species 1261 0.853561 - GLKT0ZE01CQ2BU species species 341694 0.073214 - GLKT0ZE01CQ2BU below_species genus 1257 0.0732247 - GLKT0ZE01CQ2BU below_species species 1261 0.853561 - GLKT0ZE01CQ2BU below_species species 341694 0.073214 - - </help> -</tool>
--- a/datatypes_conf.xml Thu Feb 26 18:16:36 2015 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,22 +0,0 @@ -<?xml version="1.0"?> -<datatypes> - <datatype_files> - <datatype_file name="pplacer.py"/> - <datatype_file name="taxtastic.py"/> - </datatype_files> - <registration> - <datatype extension="jplace" type="galaxy.datatypes.pplacer:Jplace" mimetype="application/json" display_in_upload="True"/> - <datatype extension="refpkg" type="galaxy.datatypes.taxtastic:Refpkg" display_in_upload="False"/> - <datatype extension="refpkg.zip" type="galaxy.datatypes.taxtastic:RefpkgZip" mimetype="application/zip" display_in_upload="True"> - <converter file="refpkgzip_to_refpkg.xml" target_datatype="refpkg" depends_on="unzip"/> - </datatype> - <datatype extension="xlsx" type="galaxy.datatypes.taxtastic:OfficeXlsx" mimetype="application/zip" display_in_upload="True"> - <converter file="xlsx_to_csv.xml" target_datatype="csv"/> - </datatype> - </registration> - <sniffers> - <sniffer type="galaxy.datatypes.pplacer:Jplace"/> - <sniffer type="galaxy.datatypes.taxtastic:RefpkgZip"/> - <sniffer type="galaxy.datatypes.taxtastic:OfficeXlsx"/> - </sniffers> -</datatypes>
--- a/decorate-wrapper.sh Thu Feb 26 18:16:36 2015 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,10 +0,0 @@ -#!/bin/bash - -source $(dirname $0)/util.sh -source $1 - -csvcut -c "specimen,${COLUMNS}" ${METADATA} | \ - csvjoin -c "specimen" ${GROUP_BY_SPECIMEN} - > ${DECORATED_GROUP_BY_SPECIMEN} - -# drop duplicate columns (thanks, Erick!) -#csvcut -c $(head -n 1 addresses.csv | sed "s/,/\n/g" | sort |uniq | paste -s -d",")
--- a/decorate.xml Thu Feb 26 18:16:36 2015 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,46 +0,0 @@ -<tool id="PHYLO_decorate" name="Decorate" version="1.0.0"> - <description>classification results with sample metadata</description> - <requirements> - <requirement type="package">yapp_env</requirement> - </requirements> - <macros> - <import>macros.xml</import> - </macros> - <version_command>echo "decorate script 1.0.0"</version_command> - <command interpreter="bash"> - decorate-wrapper.sh ${config} - </command> - <stdio> - <expand macro="basic_errors"/> - </stdio> - <inputs> - <param name="group_by_specimen" type="data" format="csv" label="Grouped-by-specimen classification"/> - <param name="metadata" type="data" format="csv" label="Sample metadata"/> - <param name="columns" type="text" label="Comma-separated metadata columns" area="True" size="5x40"/> - </inputs> - <outputs> - <data name="decorated_group_by_specimen" format="csv" label="Decorated grouped-by-specimen classification"/> - </outputs> - <configfiles> - <configfile name="config"> -GROUP_BY_SPECIMEN="${group_by_specimen}" -METADATA="${metadata}" -COLUMNS="${columns}" - -DECORATED_GROUP_BY_SPECIMEN="${decorated_group_by_specimen}" - </configfile> - </configfiles> - <!-- The contents of the help tag is parsed as reStructuredText. Please see - help-template.rst for examples of commonly-used sections in other Galaxy - tools. --> - <help> - -.. class:: infomark - -**What it does** - -This tool joins the classifications made by ``pplacer`` with arbitrary sample -metadata. - - </help> -</tool>
--- a/filter-wrapper.sh Thu Feb 26 18:16:36 2015 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,48 +0,0 @@ -#!/bin/bash - -source $(dirname $0)/util.sh -source $1 - -INPUT_QUAL=$(extify qual ${INPUT_QUAL}) -BARCODES=$(extify csv ${BARCODES}) -RAW_SEQS=$(extify fasta ${RAW_SEQS}) - -seqmagick quality-filter \ - --input-qual ${INPUT_QUAL} \ - --barcode-file ${BARCODES} \ - --primer "${PRIMER}" \ - --report-out ${FILTER_REPORT} \ - --details-out ${FILTER_DETAILS} \ - --map-out ${SPLIT_MAP} \ - --barcode-header \ - --min-length ${MIN_LENGTH} \ - --min-mean-quality ${MIN_QUALITY} \ - --quality-window 30 \ - --quality-window-prop 0.9 \ - --quality-window-mean-qual 15 \ - ${RAW_SEQS} \ - filtered.fasta - -if [[ ${REVERSE_COMPLEMENT} == "TRUE" ]]; then - seqmagick mogrify \ - --reverse-complement \ - filtered.fasta -fi - -mv filtered.fasta ${FILTERED_SEQS} - -# TODO: separate tool for concatenating seq data (and reverse complementing them?) -#cat [12]*Reads.fasta | seqmagick convert --input-format fasta - combined.fasta --reverse-complement -#cat [12]*.map.csv > combined.map.csv - -sequencing_quality_report.py ${PLATE_JSON} -t "Sequencing quality report" -o ${SQR_DIR} - -cat <<EOF > ${SQR} -<!DOCTYPE HTML> -<html lang="en-US"> - <head/> - <body> - <a href="index.html">Sequencing quality report</a> - </body> -</html> -EOF
--- a/filter.xml Thu Feb 26 18:16:36 2015 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,103 +0,0 @@ -<tool id="PHYLO_filter" name="Filter and trim" version="1.2.0"> - <description>sequences</description> - <requirements> - <requirement type="package">yapp_env</requirement> - </requirements> - <macros> - <import>macros.xml</import> - </macros> - <version_command>seqmagick --version</version_command> - <command interpreter="bash"> - filter-wrapper.sh ${config} - </command> - <stdio> - <expand macro="basic_errors"/> - </stdio> - <inputs> - <!-- TODO: can take either fasta+qual or fastq --> - <param name="plate_id" type="integer" value="1" label="Plate number"/> - <param name="zone_id" type="integer" value="1" label="Zone number"/> - <param name="raw_seqs" type="data" format="fasta" label="Unfiltered sequences"/> - <param name="input_qual" type="data" format="qual" label="Sequence quality data"/> - <!-- TODO: handle MID format for multi-sample sequencing; see http://qiime.org/scripts/split_libraries.html --> - <param name="barcodes" type="data" format="csv" label="Barcodes"/> - <param name="primer" type="text" label="Primer" value="GCGGACTACCVGGGTATCTAAT" area="True" size="1x40"/> - <param name="min_length" type="integer" min="100" max="1000" value="350" label="Minimum sequence length"/> - <param name="min_quality" type="integer" min="0" max="63" value="35" label="Minimum mean sequence quality"/> - <param name="reverse_complement" type="boolean" truevalue="TRUE" falsevalue="FALSE" label="Reads uniformly correspond to negative strands"/> - </inputs> - <outputs> - <data name="filtered_seqs" format="fasta" label="Filtered sequences"/> - <data name="filter_report" format="tabular" label="Filtering report"/> - <data name="filter_details" format="data" label="Filtering details"/> - <data name="split_map" format="csv" label="Read-to-specimen map"/> - <data name="seq_qual_report" format="html" label="Sequence quality report"/> - </outputs> - <configfiles> - <configfile name="plate_json"> -{ - "plate": ${plate_id}, - "name": "Plate ${plate_id}", - "zones": [ - { - "zone": ${zone_id}, - "cleaning_stats": "${filter_details}" - } - ] -} - </configfile> - <configfile name="config"> -RAW_SEQS="${raw_seqs}" -INPUT_QUAL="${input_qual}" -BARCODES="${barcodes}" -PRIMER="${primer}" -MIN_LENGTH="${min_length}" -MIN_QUALITY="${min_quality}" -REVERSE_COMPLEMENT="${reverse_complement}" -PLATE_JSON="${plate_json}" - -FILTERED_SEQS="${filtered_seqs}" -FILTER_REPORT="${filter_report}" -FILTER_DETAILS="${filter_details}" -SPLIT_MAP="${split_map}" -SQR="${seq_qual_report}" -SQR_DIR="${seq_qual_report.files_path}" - </configfile> - </configfiles> - <!-- The contents of the help tag is parsed as reStructuredText. Please see - help-template.rst for examples of commonly-used sections in other Galaxy - tools. --> - <help> - -.. class:: infomark - -**What it does** - -This tool truncates and removes sequences that don’t match a set of quality -criteria, as well as mapping sequence barcodes to specimens. It takes input -sequences in FASTA format and a quality file, and outputs the filtered -sequences as well as a filtering summary and a sequence quality report. - -The default quality filter settings are: - -+---------------------------+------+ -|parameter |value | -+===========================+======+ -|--min-length |350 | -+---------------------------+------+ -|--min-mean-quality |35 | -+---------------------------+------+ -|--quality-window |30 | -+---------------------------+------+ -|--quality-window-prop |0.9 | -+---------------------------+------+ -|--quality-window-mean-qual |15 | -+---------------------------+------+ - -See seqmagick's `quality filter documentation`_ for full explanations of these -parameters. - -.. _quality filter documentation: http://fhcrc.github.io/seqmagick/quality_filter.html - - </help> -</tool>
--- a/macros.xml Thu Feb 26 18:16:36 2015 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,8 +0,0 @@ -<macros> - <macro name="basic_errors"> - <exit_code range="1:" level="fatal"/> - <regex match="error" level="fatal"/> - <regex match="traceback" level="fatal"/> - <regex match="warning" level="warning"/> - </macro> -</macros>
--- a/pplacer-wrapper.sh Thu Feb 26 18:16:36 2015 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,13 +0,0 @@ -#!/bin/bash - -source $(dirname $0)/util.sh -source $1 - -QUERY_SEQS=$(extify fasta ${QUERY_SEQS}) -PPLACER_DEFAULT_ARGS="-j ${GALAXY_SLOTS:-4} -p --inform-prior --prior-lower 0.01 --map-identity" - -pplacer \ - ${PPLACER_DEFAULT_ARGS} \ - -c ${REFPKG} \ - -o ${PLACED_SEQS} \ - ${QUERY_SEQS}
--- a/pplacer.py Thu Feb 26 18:16:36 2015 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,54 +0,0 @@ -import json -from galaxy.datatypes.data import Text -from galaxy.datatypes.images import Html - -class Jplace(Text): - file_ext = "jplace" - - def sniff(self, filename): - try: - with open(filename, "r") as f: - data = json.load(f) - if all (k in data for k in ("version", "tree", "placements", "fields")): - return True - except: - pass - - return False - - def get_mime(self): - return "application/json" - -class AutoPrimaryComposite(Html): - composite_type = "auto_primary_file" - - def __init__(self, **kwd): - Html.__init__(self, **kwd) - - def regenerate_primary_file(self,dataset): - """ - cannot do this until we are setting metadata - """ - bn = dataset.metadata.base_name - efp = dataset.extra_files_path - flist = os.listdir(efp) - rval = ['<html><head><title>Files for Composite Dataset %s</title></head><body><p/>Composite %s contains:<p/><ul>' % (dataset.name,dataset.name)] - for i,fname in enumerate(flist): - sfname = os.path.split(fname)[-1] - f,e = os.path.splitext(fname) - rval.append( '<li><a href="%s">%s</a></li>' % ( sfname, sfname) ) - rval.append( '</ul></body></html>' ) - f = file(dataset.file_name,'w') - f.write("\n".join( rval )) - f.write('\n') - f.close() - - def set_meta(self, dataset, **kwd): - Html.set_meta(self, dataset, **kwd) - self.regenerate_primary_file(dataset) - - def get_mime(self): - return "text/html" - -class BasicHtmlComposite(Html): - composite_type = "basic"
--- a/pplacer.xml Thu Feb 26 18:16:36 2015 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,53 +0,0 @@ -<tool id="PHYLO_pplacer" name="Place aligned sequences" version="1.0.0"> - <description>on a reference tree</description> - <requirements> - <requirement type="package">yapp_env</requirement> - </requirements> - <macros> - <import>macros.xml</import> - </macros> - <version_command>echo "pplacer $(pplacer --version)"</version_command> - <command interpreter="bash"> - pplacer-wrapper.sh ${config} - </command> - <stdio> - <expand macro="basic_errors"/> - </stdio> - <inputs> - <param name="refpkg" type="data" format="refpkg" label="Reference package"/> - <param name="query_seqs" type="data" format="fasta" label="Query alignment"/> - </inputs> - <outputs> - <data name="placed_seqs" format="jplace" label="Placed sequences"/> - </outputs> - <configfiles> - <configfile name="config"> -REFPKG="${refpkg.extra_files_path}" -QUERY_SEQS="${query_seqs}" - -PLACED_SEQS="${placed_seqs}" - </configfile> - </configfiles> - <!-- The contents of the help tag is parsed as reStructuredText. Please see - help-template.rst for examples of commonly-used sections in other Galaxy - tools. --> - <help> - -.. class:: infomark - -**What it does** - -This tool places query sequences on a fixed reference phylogenetic tree -according to a reference alignment, producing taxonomic annotations which can -be used for classification and visualization. - ------ - -**Citation** - -Matsen F, Kodner R, Armbrust E V: **pplacer: linear time maximum-likelihood and -Bayesian phylogenetic placement of sequences onto a fixed reference tree**. BMC -Bioinformatics 2010, **11**:1. - - </help> -</tool>
--- a/preclassification-wrapper.sh Thu Feb 26 18:16:36 2015 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,30 +0,0 @@ -#!/bin/bash - -source $(dirname $0)/util.sh -source $1 - -PLACED_SEQS=$(extify jplace ${PLACED_SEQS}) -NBC_SEQS=$(extify fasta ${NBC_SEQS}) - -guppy redup \ - -m \ - -d ${DEDUP_INFO} \ - -o ${REDUPED_SEQS} \ - ${PLACED_SEQS} - -REDUPED_SEQS=$(extify jplace ${REDUPED_SEQS}) - -rppr prep_db \ - -c ${REFPKG} \ - --sqlite ${CLASS_DB} - -guppy classify \ - -c ${REFPKG} \ - -j ${GALAXY_SLOTS:-4} \ - --pp \ - --sqlite ${CLASS_DB} \ - --classifier hybrid2 \ - --nbc-sequences ${NBC_SEQS} \ - ${REDUPED_SEQS} - -multiclass_concat.py --dedup-info ${DEDUP_INFO} ${CLASS_DB}
--- a/preclassification.xml Thu Feb 26 18:16:36 2015 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,50 +0,0 @@ -<tool id="PHYLO_preclassification" name="Generate database" version="1.2.0"> - <description>for classification</description> - <requirements> - <requirement type="package">yapp_env</requirement> - </requirements> - <macros> - <import>macros.xml</import> - </macros> - <version_command>echo "guppy $(guppy --version)"</version_command> - <command interpreter="bash"> - preclassification-wrapper.sh ${config} - </command> - <stdio> - <expand macro="basic_errors"/> - </stdio> - <inputs> - <param name="dedup_info" type="data" format="csv" label="Deduplication info"/> - <param name="refpkg" type="data" format="refpkg" label="Reference package"/> - <param name="nbc_seqs" type="data" format="fasta" label="Query alignment" help="The query alignment specified here should be the same as the one passed to pplacer."/> - <param name="placed_seqs" type="data" format="jplace" label="Placements"/> - </inputs> - <outputs> - <data name="reduped_seqs" format="jplace" label="Reduped placements"/> - <data name="class_db" format="sqlite3" label="Placement database"/> - </outputs> - <configfiles> - <configfile name="config"> -DEDUP_INFO="${dedup_info}" -REFPKG="${refpkg.extra_files_path}" -NBC_SEQS="${nbc_seqs}" -PLACED_SEQS="${placed_seqs}" - -REDUPED_SEQS="${reduped_seqs}" -CLASS_DB="${class_db}" - </configfile> - </configfiles> - <!-- The contents of the help tag is parsed as reStructuredText. Please see - help-template.rst for examples of commonly-used sections in other Galaxy - tools. --> - <help> - -.. class:: infomark - -**What it does** - -This tool outputs the classifications made by ``pplacer`` to a database for use -in taxonomic classification. - - </help> -</tool>
--- a/preprocessing-wrapper.sh Thu Feb 26 18:16:36 2015 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,20 +0,0 @@ -#!/bin/bash - -source $1 - -deduplicate_sequences.py \ - --split-map ${SPLIT_MAP} \ - --deduplicated-sequences-file ${DEDUP_INFO} \ - ${INPUT_SEQS} \ - ${DEDUP_SEQS} - -# adapted from yapp/bin/refpkg_align -ref_sto=$(taxit rp ${REFPKG} aln_sto) -profile=$(taxit rp ${REFPKG} profile) - -sto=$(mktemp -u).sto - -cmalign --cpu ${GALAXY_SLOTS:-4} -o "$sto" --sfile "${ALIGNED_SCORES}" --noprob --dnaout "$profile" "${DEDUP_SEQS}" | grep -E '^#' - -esl-alimerge --dna --outformat afa "$ref_sto" "$sto" | \ - seqmagick convert --output-format fasta --dash-gap - "${ALIGNED_SEQS}"
--- a/preprocessing.xml Thu Feb 26 18:16:36 2015 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,53 +0,0 @@ -<tool id="PHYLO_preprocessing" name="Preprocess sequences" version="2.0.0"> - <description>in preparation for phylogenetic placement</description> - <requirements> - <requirement type="package">yapp_env</requirement> - </requirements> - <macros> - <import>macros.xml</import> - </macros> - <version_command>echo "guppy $(guppy --version)"</version_command> - <command interpreter="bash"> - preprocessing-wrapper.sh ${config} - </command> - <stdio> - <expand macro="basic_errors"/> - </stdio> - <inputs> - <param name="refpkg" type="data" format="refpkg" label="Reference package"/> - <param name="input_seqs" type="data" format="fasta" label="Input sequences"/> - <param name="split_map" type="data" format="csv" label="Read-to-specimen map"/> - </inputs> - <outputs> - <data name="dedup_seqs" format="fasta" label="Deduplicated sequences"/> - <data name="dedup_info" format="csv" label="Deduplication info"/> - <data name="aligned_seqs" format="fasta" label="Aligned sequences"/> - <data name="aligned_scores" format="txt" label="Alignment scores"/> - </outputs> - <configfiles> - <configfile name="config"> -REFPKG="${refpkg.extra_files_path}" -INPUT_SEQS="${input_seqs}" -SPLIT_MAP="${split_map}" - -DEDUP_SEQS="${dedup_seqs}" -DEDUP_INFO="${dedup_info}" -ALIGNED_SEQS="${aligned_seqs}" -ALIGNED_SCORES="${aligned_scores}" - </configfile> - </configfiles> - <!-- The contents of the help tag is parsed as reStructuredText. Please see - help-template.rst for examples of commonly-used sections in other Galaxy - tools. --> - <help> - -.. class:: infomark - -**What it does** - -This tool aligns query sequences with the reference sequences used to make the -reference tree contained in the reference package and then merges the query and -reference sequences. - - </help> -</tool>
--- a/refpkgzip_to_refpkg.xml Thu Feb 26 18:16:36 2015 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,11 +0,0 @@ -<tool id="CONVERTER_refpkgzip_to_refpkg" name="Convert zipped refpkg to refpkg" version="1.0.0"> - <command>unzip -o -j $input -d $output.files_path</command> - <inputs> - <param name="input" type="data" format="refpkg.zip" label="Zipped refpkg"/> - </inputs> - <outputs> - <data name="output" format="refpkg"/> - </outputs> - <help> - </help> -</tool>
--- a/render_datatable-wrapper.sh Thu Feb 26 18:16:36 2015 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,20 +0,0 @@ -#!/bin/bash - -source $(dirname $0)/util.sh -source $1 - -mkdir -p ${OUTPUT_DIR} - -python $(dirname $0)/render_datatable.py \ - < ${INPUT} \ - > ${OUTPUT_DIR}/index.html - -cat <<EOF > ${OUTPUT} -<!DOCTYPE HTML> -<html lang="en-US"> - <head/> - <body> - <a href="index.html">Generated table</a> - </body> -</html> -EOF
--- a/render_datatable.py Thu Feb 26 18:16:36 2015 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,412 +0,0 @@ -#!/usr/bin/env python - -import csv -import itertools -import string -import sys - -input = sys.stdin -start_lines = input.readlines(10) -all_input = itertools.chain(iter(start_lines), input) - -def detect_delimiter(iterable, char_set): - matches = (c for c in char_set if c in iterable) - return next(matches, None) - -def detect_csv_dialect(sample): - try: - return csv.Sniffer().sniff(sample) - except: - return None - -delimiter = detect_delimiter(start_lines[0], list('\t, ')) -reader = None - -if delimiter in list('\t,'): - # try to detect csv dialect, which should neatly handle quoted separators and stuff - dialect = detect_csv_dialect(''.join(start_lines)) - if dialect: - reader = csv.reader(all_input, dialect) - -if not reader: - if delimiter in list(string.whitespace): - # use str.split() with no arguments to split on arbitrary whitespace strings - reader = (line.strip().split() for line in all_input) - else: - reader = all_input - -print """\ -<!DOCTYPE html> -<html lang="en"> - <head> - <meta http-equiv="content-type" content="text/html; charset=UTF-8"></meta> - <link href="http://netdna.bootstrapcdn.com/twitter-bootstrap/2.3.2/css/bootstrap-combined.no-icons.min.css" rel="stylesheet"> - <style> -div.dataTables_length label { - float: left; - text-align: left; -} - -div.dataTables_length select { - width: 75px; -} - -div.dataTables_filter label { - float: right; -} - -div.dataTables_info { - padding-top: 8px; -} - -div.dataTables_paginate { - float: right; - margin: 0; -} - -table.table { - clear: both; - margin-bottom: 6px !important; - max-width: none !important; -} - -table.table thead .sorting, -table.table thead .sorting_asc, -table.table thead .sorting_desc, -table.table thead .sorting_asc_disabled, -table.table thead .sorting_desc_disabled { - cursor: pointer; - *cursor: hand; -} - - -table.table thead .sorting { background: url('images/sort_both.png') no-repeat center right; } - -//table.table thead .sorting_asc { background: url('images/sort_asc.png') no-repeat center right; } -//table.table thead .sorting_desc { background: url('images/sort_desc.png') no-repeat center right; } -table.table thead .sorting_asc { background: url('http://cdn3.iconfinder.com/data/icons/fatcow/16x16_0140/bullet_arrow_up.png') no-repeat center right; } -table.table thead .sorting_desc { background: url('http://cdn3.iconfinder.com/data/icons/fatcow/16x16_0140/bullet_arrow_down.png') no-repeat center right; } - -table.table thead .sorting_asc_disabled { background: url('images/sort_asc_disabled.png') no-repeat center right; } -table.table thead .sorting_desc_disabled { background: url('images/sort_desc_disabled.png') no-repeat center right; } - -table.dataTable th:active { - outline: none; -} - -/* Scrolling */ -div.dataTables_scrollHead table { - margin-bottom: 0 !important; - border-bottom-left-radius: 0; - border-bottom-right-radius: 0; -} - -div.dataTables_scrollHead table thead tr:last-child th:first-child, -div.dataTables_scrollHead table thead tr:last-child td:first-child { - border-bottom-left-radius: 0 !important; - border-bottom-right-radius: 0 !important; -} - -div.dataTables_scrollBody table { - border-top: none; - margin-bottom: 0 !important; -} - -div.dataTables_scrollBody tbody tr:first-child th, -div.dataTables_scrollBody tbody tr:first-child td { - border-top: none; -} - -div.dataTables_scrollFoot table { - border-top: none; -} - - - - -/* - * TableTools styles - */ -.table tbody tr.active td, -.table tbody tr.active th { - background-color: #08C; - color: white; -} - -.table tbody tr.active:hover td, -.table tbody tr.active:hover th { - background-color: #0075b0 !important; -} - -.table-striped tbody tr.active:nth-child(odd) td, -.table-striped tbody tr.active:nth-child(odd) th { - background-color: #017ebc; -} - -table.DTTT_selectable tbody tr { - cursor: pointer; - *cursor: hand; -} - -div.DTTT .btn { - color: #333 !important; - font-size: 12px; -} - -div.DTTT .btn:hover { - text-decoration: none !important; -} - - -ul.DTTT_dropdown.dropdown-menu a { - color: #333 !important; /* needed only when demo_page.css is included */ -} - -ul.DTTT_dropdown.dropdown-menu li:hover a { - background-color: #0088cc; - color: white !important; -} - -/* TableTools information display */ -div.DTTT_print_info.modal { - height: 150px; - margin-top: -75px; - text-align: center; -} - -div.DTTT_print_info h6 { - font-weight: normal; - font-size: 28px; - line-height: 28px; - margin: 1em; -} - -div.DTTT_print_info p { - font-size: 14px; - line-height: 20px; -} - - - -/* - * FixedColumns styles - */ -div.DTFC_LeftHeadWrapper table, -div.DTFC_LeftFootWrapper table, -table.DTFC_Cloned tr.even { - background-color: white; -} - -div.DTFC_LeftHeadWrapper table { - margin-bottom: 0 !important; - border-top-right-radius: 0 !important; - border-bottom-left-radius: 0 !important; - border-bottom-right-radius: 0 !important; -} - -div.DTFC_LeftHeadWrapper table thead tr:last-child th:first-child, -div.DTFC_LeftHeadWrapper table thead tr:last-child td:first-child { - border-bottom-left-radius: 0 !important; - border-bottom-right-radius: 0 !important; -} - -div.DTFC_LeftBodyWrapper table { - border-top: none; - margin-bottom: 0 !important; -} - -div.DTFC_LeftBodyWrapper tbody tr:first-child th, -div.DTFC_LeftBodyWrapper tbody tr:first-child td { - border-top: none; -} - -div.DTFC_LeftFootWrapper table { - border-top: none; -} - </style> - <script type="text/javascript" language="javascript" src="http://ajax.aspnetcdn.com/ajax/jQuery/jquery-2.0.0.min.js"></script> - <script type="text/javascript" language="javascript" src="http://ajax.aspnetcdn.com/ajax/jquery.dataTables/1.9.4/jquery.dataTables.min.js"></script> - <script type="text/javascript" charset="utf-8"> -/* Set the defaults for DataTables initialisation */ -$.extend( true, $.fn.dataTable.defaults, { - "sDom": "<'row-fluid'<'span6'l><'span6'f>r>t<'row-fluid'<'span6'i><'span6'p>>", - "sPaginationType": "bootstrap", - "oLanguage": { - "sLengthMenu": "_MENU_ records per page" - } -} ); - - -/* Default class modification */ -$.extend( $.fn.dataTableExt.oStdClasses, { - "sWrapper": "dataTables_wrapper form-inline" -} ); - - -/* API method to get paging information */ -$.fn.dataTableExt.oApi.fnPagingInfo = function ( oSettings ) -{ - return { - "iStart": oSettings._iDisplayStart, - "iEnd": oSettings.fnDisplayEnd(), - "iLength": oSettings._iDisplayLength, - "iTotal": oSettings.fnRecordsTotal(), - "iFilteredTotal": oSettings.fnRecordsDisplay(), - "iPage": oSettings._iDisplayLength === -1 ? - 0 : Math.ceil( oSettings._iDisplayStart / oSettings._iDisplayLength ), - "iTotalPages": oSettings._iDisplayLength === -1 ? - 0 : Math.ceil( oSettings.fnRecordsDisplay() / oSettings._iDisplayLength ) - }; -}; - - -/* Bootstrap style pagination control */ -$.extend( $.fn.dataTableExt.oPagination, { - "bootstrap": { - "fnInit": function( oSettings, nPaging, fnDraw ) { - var oLang = oSettings.oLanguage.oPaginate; - var fnClickHandler = function ( e ) { - e.preventDefault(); - if ( oSettings.oApi._fnPageChange(oSettings, e.data.action) ) { - fnDraw( oSettings ); - } - }; - - $(nPaging).addClass('pagination').append( - '<ul>'+ - '<li class="prev disabled"><a href="#">← '+oLang.sPrevious+'</a></li>'+ - '<li class="next disabled"><a href="#">'+oLang.sNext+' → </a></li>'+ - '</ul>' - ); - var els = $('a', nPaging); - $(els[0]).bind( 'click.DT', { action: "previous" }, fnClickHandler ); - $(els[1]).bind( 'click.DT', { action: "next" }, fnClickHandler ); - }, - - "fnUpdate": function ( oSettings, fnDraw ) { - var iListLength = 5; - var oPaging = oSettings.oInstance.fnPagingInfo(); - var an = oSettings.aanFeatures.p; - var i, ien, j, sClass, iStart, iEnd, iHalf=Math.floor(iListLength/2); - - if ( oPaging.iTotalPages < iListLength) { - iStart = 1; - iEnd = oPaging.iTotalPages; - } - else if ( oPaging.iPage <= iHalf ) { - iStart = 1; - iEnd = iListLength; - } else if ( oPaging.iPage >= (oPaging.iTotalPages-iHalf) ) { - iStart = oPaging.iTotalPages - iListLength + 1; - iEnd = oPaging.iTotalPages; - } else { - iStart = oPaging.iPage - iHalf + 1; - iEnd = iStart + iListLength - 1; - } - - for ( i=0, ien=an.length ; i<ien ; i++ ) { - // Remove the middle elements - $('li:gt(0)', an[i]).filter(':not(:last)').remove(); - - // Add the new list items and their event handlers - for ( j=iStart ; j<=iEnd ; j++ ) { - sClass = (j==oPaging.iPage+1) ? 'class="active"' : ''; - $('<li '+sClass+'><a href="#">'+j+'</a></li>') - .insertBefore( $('li:last', an[i])[0] ) - .bind('click', function (e) { - e.preventDefault(); - oSettings._iDisplayStart = (parseInt($('a', this).text(),10)-1) * oPaging.iLength; - fnDraw( oSettings ); - } ); - } - - // Add / remove disabled classes from the static elements - if ( oPaging.iPage === 0 ) { - $('li:first', an[i]).addClass('disabled'); - } else { - $('li:first', an[i]).removeClass('disabled'); - } - - if ( oPaging.iPage === oPaging.iTotalPages-1 || oPaging.iTotalPages === 0 ) { - $('li:last', an[i]).addClass('disabled'); - } else { - $('li:last', an[i]).removeClass('disabled'); - } - } - } - } -} ); - - -/* - * TableTools Bootstrap compatibility - * Required TableTools 2.1+ - */ -if ( $.fn.DataTable.TableTools ) { - // Set the classes that TableTools uses to something suitable for Bootstrap - $.extend( true, $.fn.DataTable.TableTools.classes, { - "container": "DTTT btn-group", - "buttons": { - "normal": "btn", - "disabled": "disabled" - }, - "collection": { - "container": "DTTT_dropdown dropdown-menu", - "buttons": { - "normal": "", - "disabled": "disabled" - } - }, - "print": { - "info": "DTTT_print_info modal" - }, - "select": { - "row": "active" - } - } ); - - // Have the collection use a bootstrap compatible dropdown - $.extend( true, $.fn.DataTable.TableTools.DEFAULTS.oTags, { - "collection": { - "container": "ul", - "button": "li", - "liner": "a" - } - } ); -} - - -/* Table initialisation */ -$(document).ready(function() { - $('#from_csv').dataTable( { - "sDom": "<'row'<'span6'l><'span6'f>r>t<'row'<'span6'i><'span6'p>>", - "sPaginationType": "bootstrap", - "oLanguage": { - "sLengthMenu": "_MENU_ records per page" - } - } ); -} ); - </script> - </head> - <body> - <div class="container" style="margin-top: 10px"> - <table cellpadding="0" cellspacing="0" border="0" class="table table-striped table-bordered" id="from_csv"> - <thead>\ -""" - -for i, row in enumerate(reader): - if i == 0: - print "<tr><th>" + "</th><th>".join(row) + "</th></tr>" - else: - print "<tr><td>" + "</td><td>".join(row) + "</td></tr>" - - if i == 0: - print "</thead><tbody>" - -print """\ - </tbody> - </table> - </div> - </body> -</html>\ -"""
--- a/render_datatable.xml Thu Feb 26 18:16:36 2015 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,63 +0,0 @@ -<tool id="PHYLO_render_datatable" name="Render CSV file" version="1.1.0"> - <description>as an interactive HTML table</description> - <macros> - <import>macros.xml</import> - </macros> - <command interpreter="bash"> - render_datatable-wrapper.sh $config - </command> - <stdio> - <expand macro="basic_errors"/> - </stdio> - <inputs> - <param name="input" type="data" format="csv" label="CSV file"/> - </inputs> - <outputs> - <data format="html" name="output" label="Generated table"/> - </outputs> - <configfiles> - <configfile name="config"> -INPUT="${input}" - -OUTPUT="${output}" -OUTPUT_DIR="${output.files_path}" - </configfile> - </configfiles> - <!-- The contents of the help tag is parsed as reStructuredText. Please see - help-template.rst for examples of commonly-used sections in other Galaxy - tools. --> - <help> - -.. class:: infomark - -**What it does** - -This tool reformats a CSV file, like this:: - - "seqname","accession","tax_id","species_name","is_type" - "FM872653","FM872653","308994","Dialister propionicifaciens",0.0 - "AY331416","AY331416","239137","Candidate Division TM7 oral",0.0 - "DQ666092","DQ666092","95818_1","Candidate Division TM7 vaginal",0.0 - "S002223913","GQ900631","186802_3","bacterium BVAB3-Strain 1",0.0 - ... - -into an interactive HTML table. - -[placeholder] - -+-------------+-----------+----------+---------------------------------------+----------+ -| seqname | accession | tax_id | species_name | is_type | -+=============+===========+==========+=======================================+==========+ -| FM872653 | FM872653 | 308994 | Dialister propionicifaciens | 0.0 | -+-------------+-----------+----------+---------------------------------------+----------+ -| AY331416 | AY331416 | 239137 | Candidate Division TM7 oral | 0.0 | -+-------------+-----------+----------+---------------------------------------+----------+ -| DQ666092 | DQ666092 | 95818_1 | Candidate Division TM7 vaginal | 0.0 | -+-------------+-----------+----------+---------------------------------------+----------+ -| S002223913 | GQ900631 | 186802_3 | bacterium BVAB3-Strain 1 | 0.0 | -+-------------+-----------+----------+---------------------------------------+----------+ - -... - - </help> -</tool>
--- a/taxtastic.py Thu Feb 26 18:16:36 2015 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,64 +0,0 @@ -import os -import zipfile -from galaxy.datatypes.binary import Binary -from galaxy.datatypes.data import Text - -class Refpkg(Text): - composite_type = "basic" - - def __init__(self, **kwd): - Text.__init__(self, **kwd) - self.add_composite_file("CONTENTS.json") - - def get_mime(self): - return "application/json" - -class RefpkgZip(Binary): - file_ext = "refpkg.zip" - - def __init__(self, **kwd): - Binary.__init__(self, **kwd) - - def sniff(self, filename): - if not zipfile.is_zipfile(filename): - return False - contains_contents_file = False - zip_file = zipfile.ZipFile(filename, "r") - for name in zip_file.namelist(): - if os.path.basename(name) == "CONTENTS.json": - contains_contents_file = True - break - zip_file.close() - if not contains_contents_file: - return False - return True - - def get_mime(self): - return "application/zip" - -class OfficeXlsx(Binary): - file_ext = "xlsx" - - def __init__(self, **kwd): - Binary.__init__(self, **kwd) - - # TODO: this should check for an xl/ directory also - def sniff(self, filename): - if not zipfile.is_zipfile(filename): - return False - contains_contents_file = False - zip_file = zipfile.ZipFile(filename, "r") - for name in zip_file.namelist(): - if os.path.basename(name) == "[Content_Types].xml": - contains_contents_file = True - break - zip_file.close() - if not contains_contents_file: - return False - return True - - def get_mime(self): - return "application/zip" - -Binary.register_sniffable_binary_format("refpkg.zip", "refpkg.zip", RefpkgZip) -Binary.register_sniffable_binary_format("xlsx", "xlsx", OfficeXlsx)
--- a/usearch-wrapper.sh Thu Feb 26 18:16:36 2015 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,28 +0,0 @@ -#!/bin/bash - -source $(dirname $0)/util.sh -source $1 - -RDP_SEQS="/shared/silo_researcher/Matsen_F/MatsenGrp/micro_refset/rdp/10_31/tax_filter/filtered/rdp_10_31.filter.fasta" -RDP_SEQINFO="/shared/silo_researcher/Matsen_F/MatsenGrp/micro_refset/rdp/10_31/tax_filter/filtered/rdp_10_31.filter.seq_info.csv" - -sqlite3 -csv -header ${CLASS_DB} <<EOF > usearch_meta.csv -SELECT pn.name, CAST(pn.mass AS INT) count, tax_id, tax_name, taxa.rank - FROM multiclass_concat - JOIN taxa USING (tax_id) - JOIN placement_names pn USING (placement_id, name) - WHERE want_rank = 'species'; -EOF - -romp -v usearch_clusters \ - --usearch-quietly \ - --query-group tax_id \ - --query-duplication count \ - --database-name seqname \ - --database-group tax_id \ - ${INPUT_SEQS} \ - usearch_meta.csv \ - ${RDP_SEQS} \ - ${RDP_SEQINFO} \ - ${USEARCH_HITS} \ - ${USEARCH_GROUPS}
--- a/usearch.xml Thu Feb 26 18:16:36 2015 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,50 +0,0 @@ -<tool id="PHYLO_usearch" name="Analyze sequences" version="1.1.0"> - <description>with USEARCH</description> - <macros> - <import>macros.xml</import> - </macros> - <version_command>/home/matsengrp/local/bin/usearch6_64 --version</version_command> - <command interpreter="bash"> - usearch-wrapper.sh $config - </command> - <stdio> - <expand macro="basic_errors"/> - </stdio> - <inputs> - <param name="input_seqs" type="data" format="fasta" label="Input sequences"/> - <param name="class_db" type="data" format="sqlite3" label="Placement database"/> - </inputs> - <outputs> - <data format="csv" name="usearch_hits" label="USEARCH hits"/> - <data format="csv" name="usearch_groups" label="USEARCH groups"/> - </outputs> - <configfiles> - <configfile name="config"> -INPUT_SEQS="${input_seqs}" -CLASS_DB="${class_db}" - -USEARCH_HITS="${usearch_hits}" -USEARCH_GROUPS="${usearch_groups}" - </configfile> - </configfiles> - <!-- The contents of the help tag is parsed as reStructuredText. Please see - help-template.rst for examples of commonly-used sections in other Galaxy - tools. --> - <help> - -.. class:: infomark - -**What it does** - -This tool queries large sequence databases for target sequences and assigns -those sequences to clusters. - ------ - -**Citation** - -Edgar, R C: **Search and clustering orders of magnitude faster than -BLAST**. Bioinformatics 2010, **26**:19. - - </help> -</tool>
--- a/util.sh Thu Feb 26 18:16:36 2015 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,52 +0,0 @@ -#!/bin/bash - -extify() { - local REQ_EXT=$1 - shift - - local OUTPUT="" - local FILE - for FILE in $*; do - local BASENAME=$(basename ${FILE}) - local EXT=${BASENAME##*.} - if [[ ${EXT} != ${REQ_EXT} ]]; then - local LINK="${BASENAME%%.*}.${REQ_EXT}" - if [[ ! -f ${LINK} ]]; then - ln -s ${FILE} ${LINK} - fi - FILE="${LINK}" - fi - OUTPUT="${OUTPUT} ${FILE}" - done - - echo ${OUTPUT} -} - -# from http://www.linuxjournal.com/content/use-date-command-measure-elapsed-time -timer() { - if [[ $# -eq 0 ]]; then - echo $(date '+%s') - else - local stime=$1 - etime=$(date '+%s') - - if [[ -z "$stime" ]]; then stime=$etime; fi - - dt=$((etime - stime)) - ds=$((dt % 60)) - dm=$(((dt / 60) % 60)) - dh=$((dt / 3600)) - printf '%d:%02d:%02d' $dh $dm $ds - fi -} - -on_exit() { - echo "Elapsed time: $(timer ${START_TIME})" -} - -set -eux - -xargs -n 1 -0 < /proc/self/environ > env.log - -START_TIME=$(timer) -trap on_exit EXIT
--- a/xlsx_to_csv.xml Thu Feb 26 18:16:36 2015 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,21 +0,0 @@ -<tool id="CONVERTER_xlsx_to_csv" name="Convert xlsx to csv" version="1.0.0"> - <requirements> - <requirement type="package">yapp_env</requirement> - </requirements> - <command>in2csv -f xlsx $input > $output</command> - <inputs> - <param name="input" type="data" format="xlsx" label="Excel spreadsheet"/> - </inputs> - <outputs> - <data name="output" format="csv"/> - </outputs> - <help> - -.. class:: infomark - -**What it does** - -This tool converts a spreadsheet in Microsoft Excel 2007 (.xlsx) format to CSV. - - </help> -</tool>