# HG changeset patch # User bcclaywell # Date 1424997044 18000 # Node ID c8cc6529038c72119f8d36e62ab1d9edec6a1bd0 # Parent d4690e65afcd5848371e073ce388feb54652ba1f Uploaded diff -r d4690e65afcd -r c8cc6529038c bootstrap-wrapper.sh --- a/bootstrap-wrapper.sh Thu Feb 26 18:16:36 2015 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,13 +0,0 @@ -#!/bin/bash - -source $(dirname $0)/util.sh -source $1 - -python $(dirname $0)/bootstrap.py \ - --plate ${PLATE_ID} \ - ${JUNIOR} \ - --zone ${ZONE_ID} \ - --barcodes ${BARCODES} \ - --labels ${LABELS} \ - --metadata ${METADATA} \ - - < ${SAMPLE_INFO} diff -r d4690e65afcd -r c8cc6529038c bootstrap.py --- a/bootstrap.py Thu Feb 26 18:16:36 2015 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,84 +0,0 @@ -#!/usr/bin/env python - -from __future__ import print_function -import csv -import sys -import os -import argparse - -def warning(*objs): - print("WARNING: ", *objs, file=sys.stderr) - -def main(arguments): - - parser = argparse.ArgumentParser(arguments, description=__doc__, - formatter_class=argparse.RawDescriptionHelpFormatter) - parser.add_argument('infile', help = "CSV input", - type = argparse.FileType('r'), default = sys.stdin) - parser.add_argument('--junior', help = "use junior run specimen naming convention", action = 'store_true') - parser.add_argument('--plate', help = "plate number", type = int, required = True) - parser.add_argument('--zone', help = "zone number", type = int, required = True) - parser.add_argument('--barcodes', help = "name of barcodes file", - type = argparse.FileType('w'), default = 'barcodes.csv') - parser.add_argument('--labels', help = "name of labels file", - type = argparse.FileType('w'), default = 'labels.csv') - parser.add_argument('--metadata', help = "name of metadata template file", - type = argparse.FileType('w'), default = 'metadata.csv') - - args = parser.parse_args(arguments) - - label_key = 'sampleid' - primer_key = 'reverse' - barcode_key = 'barcode' - zone_key = 'zone' - - fstr = "j{plate_id}{primer_id}" if args.junior else "p{plate_id}z{zone_id}{primer_id}" - - reader = csv.DictReader(sys.stdin) - - barcodes = csv.writer(args.barcodes) - labels = csv.writer(args.labels) - metadata = csv.writer(args.metadata) - - barcodes.writerow(['stub', 'barcode']) - labels.writerow(['specimen', 'label']) - metadata.writerow(['specimen', 'plate', 'zone', 'label', 'primer']) - - seen_labels = {} - seen_primers = {} - - # TODO: add checks for duplicates, empty fields, etc., and bail if something goes wrong - for i, d in enumerate(reader): - if not all (k in d for k in (label_key, primer_key, barcode_key)): - return "Expected columns not found" - - if zone_key in d and d[zone_key] != str(args.zone): - continue - - label = d[label_key] - primer = d[primer_key] - barcode = d[barcode_key] - zone = args.zone - - if not all((label, primer, barcode)): - # only print a warning if at least one of the fields is non-empty - if any((label, primer, barcode)): - warning("Missing required field on row {}, skipping".format(i+2)) - continue - - if label in seen_labels: - return "Duplicate label '{}' found on rows {} and {}".format(label, seen_labels[label]+2, i+2) - - if primer in seen_primers: - return "Duplicate primer '{}' found on rows {} and {}".format(primer, seen_primers[primer]+2, i+2) - - seen_labels[label] = i - seen_primers[primer] = i - - specimen = fstr.format(plate_id=args.plate, zone_id=zone, primer_id=primer.strip().lower().replace('-','')) - barcodes.writerow([specimen, barcode]) - labels.writerow([specimen, label]) - metadata.writerow([specimen, args.plate, zone, label, primer]) - -if __name__ == '__main__': - sys.exit(main(sys.argv[1:])) diff -r d4690e65afcd -r c8cc6529038c bootstrap.xml --- a/bootstrap.xml Thu Feb 26 18:16:36 2015 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,67 +0,0 @@ - - for analysis - - yapp_env - - - macros.xml - - echo "bootstrap script 1.1.0" - - bootstrap-wrapper.sh $config - - - - - - - - - - - - - - - - - - - - - - - - - -PLATE_ID="${plate_id}" -#if $run_type.run_type_select == "senior" -JUNIOR="" -ZONE_ID="${run_type.zone_id}" -#else -JUNIOR="--junior" -ZONE_ID="1" -#end if -SAMPLE_INFO="${sample_info}" - -BARCODES="${barcodes}" -LABELS="${labels}" -METADATA="${metadata}" - - - - - -.. class:: infomark - -**What it does** - -This tool parses sample information and creates a mapping of samples to -barcodes. The sample information file must contain the columns ``sampleid``, -``barcode``, and ``reverse``, and can optionally contain a ``zone`` column -also. - - - diff -r d4690e65afcd -r c8cc6529038c classification-wrapper.sh --- a/classification-wrapper.sh Thu Feb 26 18:16:36 2015 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,13 +0,0 @@ -#!/bin/bash - -source $(dirname $0)/util.sh -source $1 - -classif_table.py \ - --specimen-map ${SPLIT_MAP} \ - --metadata-map ${LABEL_MAP} \ - --rank ${WANT_RANK} \ - --tallies-wide ${TALLIES_WIDE} \ - --by-specimen ${BY_SPECIMEN} \ - ${CLASS_DB} \ - ${BY_TAXON} diff -r d4690e65afcd -r c8cc6529038c classification.xml --- a/classification.xml Thu Feb 26 18:16:36 2015 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,104 +0,0 @@ - - in tabular format - - yapp_env - - - macros.xml - - echo "guppy $(guppy --version)" - - classification-wrapper.sh ${config} - - - - - - - - - - - - - - - - - - - - - - - - -SPLIT_MAP="${split_map}" -LABEL_MAP="${label_map}" -CLASS_DB="${class_db}" -WANT_RANK="${want_rank}" - -BY_TAXON="${by_taxon}" -BY_SPECIMEN="${by_specimen}" -TALLIES_WIDE="${tallies_wide}" - - - - - -.. class:: infomark - -**What it does** - -This tool outputs the classifications made by ``pplacer`` to a tabular format -appropriate for use with R. - ------ - -**Example** - -The classifications are simply done by containment. Say clade A of the -reference tree is the smallest such that contains a given placement. The most -specific classification for that read will be the lowest common ancestor of the -taxonomic classifications for the leaves of A. If the desired classification is -more specific than that, then we get a disconnect between the desired and the -actual classification. For example, if we try to classify at the species level -and the clade LCA is a genus, then we will get a genus name. If there is -uncertainty in read placement, then there is uncertainty in classification. - -For example, here is a classification list made for one read using the tabular -output. The columns are as follows: read name, attempted rank for -classification, actual rank for classification, taxonomic identifier, and -confidence. You can see that in this example, there is some uncertainty at and -below species, but only one classification at the genus level:: - - GLKT0ZE01CQ2BU root root 1 1 - GLKT0ZE01CQ2BU below_root below_root 131567 1 - GLKT0ZE01CQ2BU superkingdom superkingdom 2 1 - GLKT0ZE01CQ2BU below_superkingdom superkingdom 2 1 - GLKT0ZE01CQ2BU below_below_superkingdom superkingdom 2 1 - GLKT0ZE01CQ2BU superphylum superkingdom 2 1 - GLKT0ZE01CQ2BU phylum phylum 1239 1 - GLKT0ZE01CQ2BU subphylum phylum 1239 1 - GLKT0ZE01CQ2BU class class 186801 1 - GLKT0ZE01CQ2BU subclass class 186801 1 - GLKT0ZE01CQ2BU order order 186802 1 - GLKT0ZE01CQ2BU below_order order 186802 1 - GLKT0ZE01CQ2BU below_below_order order 186802 1 - GLKT0ZE01CQ2BU suborder order 186802 1 - GLKT0ZE01CQ2BU family family 186804 1 - GLKT0ZE01CQ2BU below_family family 186804 1 - GLKT0ZE01CQ2BU genus genus 1257 1 - GLKT0ZE01CQ2BU species_group genus 1257 1 - GLKT0ZE01CQ2BU species_subgroup genus 1257 1 - GLKT0ZE01CQ2BU species genus 1257 0.0732247 - GLKT0ZE01CQ2BU species species 1261 0.853561 - GLKT0ZE01CQ2BU species species 341694 0.073214 - GLKT0ZE01CQ2BU below_species genus 1257 0.0732247 - GLKT0ZE01CQ2BU below_species species 1261 0.853561 - GLKT0ZE01CQ2BU below_species species 341694 0.073214 - - - diff -r d4690e65afcd -r c8cc6529038c datatypes_conf.xml --- a/datatypes_conf.xml Thu Feb 26 18:16:36 2015 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,22 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - diff -r d4690e65afcd -r c8cc6529038c decorate-wrapper.sh --- a/decorate-wrapper.sh Thu Feb 26 18:16:36 2015 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,10 +0,0 @@ -#!/bin/bash - -source $(dirname $0)/util.sh -source $1 - -csvcut -c "specimen,${COLUMNS}" ${METADATA} | \ - csvjoin -c "specimen" ${GROUP_BY_SPECIMEN} - > ${DECORATED_GROUP_BY_SPECIMEN} - -# drop duplicate columns (thanks, Erick!) -#csvcut -c $(head -n 1 addresses.csv | sed "s/,/\n/g" | sort |uniq | paste -s -d",") diff -r d4690e65afcd -r c8cc6529038c decorate.xml --- a/decorate.xml Thu Feb 26 18:16:36 2015 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,46 +0,0 @@ - - classification results with sample metadata - - yapp_env - - - macros.xml - - echo "decorate script 1.0.0" - - decorate-wrapper.sh ${config} - - - - - - - - - - - - - - -GROUP_BY_SPECIMEN="${group_by_specimen}" -METADATA="${metadata}" -COLUMNS="${columns}" - -DECORATED_GROUP_BY_SPECIMEN="${decorated_group_by_specimen}" - - - - - -.. class:: infomark - -**What it does** - -This tool joins the classifications made by ``pplacer`` with arbitrary sample -metadata. - - - diff -r d4690e65afcd -r c8cc6529038c filter-wrapper.sh --- a/filter-wrapper.sh Thu Feb 26 18:16:36 2015 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,48 +0,0 @@ -#!/bin/bash - -source $(dirname $0)/util.sh -source $1 - -INPUT_QUAL=$(extify qual ${INPUT_QUAL}) -BARCODES=$(extify csv ${BARCODES}) -RAW_SEQS=$(extify fasta ${RAW_SEQS}) - -seqmagick quality-filter \ - --input-qual ${INPUT_QUAL} \ - --barcode-file ${BARCODES} \ - --primer "${PRIMER}" \ - --report-out ${FILTER_REPORT} \ - --details-out ${FILTER_DETAILS} \ - --map-out ${SPLIT_MAP} \ - --barcode-header \ - --min-length ${MIN_LENGTH} \ - --min-mean-quality ${MIN_QUALITY} \ - --quality-window 30 \ - --quality-window-prop 0.9 \ - --quality-window-mean-qual 15 \ - ${RAW_SEQS} \ - filtered.fasta - -if [[ ${REVERSE_COMPLEMENT} == "TRUE" ]]; then - seqmagick mogrify \ - --reverse-complement \ - filtered.fasta -fi - -mv filtered.fasta ${FILTERED_SEQS} - -# TODO: separate tool for concatenating seq data (and reverse complementing them?) -#cat [12]*Reads.fasta | seqmagick convert --input-format fasta - combined.fasta --reverse-complement -#cat [12]*.map.csv > combined.map.csv - -sequencing_quality_report.py ${PLATE_JSON} -t "Sequencing quality report" -o ${SQR_DIR} - -cat < ${SQR} - - - - - Sequencing quality report - - -EOF diff -r d4690e65afcd -r c8cc6529038c filter.xml --- a/filter.xml Thu Feb 26 18:16:36 2015 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,103 +0,0 @@ - - sequences - - yapp_env - - - macros.xml - - seqmagick --version - - filter-wrapper.sh ${config} - - - - - - - - - - - - - - - - - - - - - - - - - - -{ - "plate": ${plate_id}, - "name": "Plate ${plate_id}", - "zones": [ - { - "zone": ${zone_id}, - "cleaning_stats": "${filter_details}" - } - ] -} - - -RAW_SEQS="${raw_seqs}" -INPUT_QUAL="${input_qual}" -BARCODES="${barcodes}" -PRIMER="${primer}" -MIN_LENGTH="${min_length}" -MIN_QUALITY="${min_quality}" -REVERSE_COMPLEMENT="${reverse_complement}" -PLATE_JSON="${plate_json}" - -FILTERED_SEQS="${filtered_seqs}" -FILTER_REPORT="${filter_report}" -FILTER_DETAILS="${filter_details}" -SPLIT_MAP="${split_map}" -SQR="${seq_qual_report}" -SQR_DIR="${seq_qual_report.files_path}" - - - - - -.. class:: infomark - -**What it does** - -This tool truncates and removes sequences that don’t match a set of quality -criteria, as well as mapping sequence barcodes to specimens. It takes input -sequences in FASTA format and a quality file, and outputs the filtered -sequences as well as a filtering summary and a sequence quality report. - -The default quality filter settings are: - -+---------------------------+------+ -|parameter |value | -+===========================+======+ -|--min-length |350 | -+---------------------------+------+ -|--min-mean-quality |35 | -+---------------------------+------+ -|--quality-window |30 | -+---------------------------+------+ -|--quality-window-prop |0.9 | -+---------------------------+------+ -|--quality-window-mean-qual |15 | -+---------------------------+------+ - -See seqmagick's `quality filter documentation`_ for full explanations of these -parameters. - -.. _quality filter documentation: http://fhcrc.github.io/seqmagick/quality_filter.html - - - diff -r d4690e65afcd -r c8cc6529038c macros.xml --- a/macros.xml Thu Feb 26 18:16:36 2015 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,8 +0,0 @@ - - - - - - - - diff -r d4690e65afcd -r c8cc6529038c pplacer-wrapper.sh --- a/pplacer-wrapper.sh Thu Feb 26 18:16:36 2015 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,13 +0,0 @@ -#!/bin/bash - -source $(dirname $0)/util.sh -source $1 - -QUERY_SEQS=$(extify fasta ${QUERY_SEQS}) -PPLACER_DEFAULT_ARGS="-j ${GALAXY_SLOTS:-4} -p --inform-prior --prior-lower 0.01 --map-identity" - -pplacer \ - ${PPLACER_DEFAULT_ARGS} \ - -c ${REFPKG} \ - -o ${PLACED_SEQS} \ - ${QUERY_SEQS} diff -r d4690e65afcd -r c8cc6529038c pplacer.py --- a/pplacer.py Thu Feb 26 18:16:36 2015 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,54 +0,0 @@ -import json -from galaxy.datatypes.data import Text -from galaxy.datatypes.images import Html - -class Jplace(Text): - file_ext = "jplace" - - def sniff(self, filename): - try: - with open(filename, "r") as f: - data = json.load(f) - if all (k in data for k in ("version", "tree", "placements", "fields")): - return True - except: - pass - - return False - - def get_mime(self): - return "application/json" - -class AutoPrimaryComposite(Html): - composite_type = "auto_primary_file" - - def __init__(self, **kwd): - Html.__init__(self, **kwd) - - def regenerate_primary_file(self,dataset): - """ - cannot do this until we are setting metadata - """ - bn = dataset.metadata.base_name - efp = dataset.extra_files_path - flist = os.listdir(efp) - rval = ['Files for Composite Dataset %s

Composite %s contains:

' ) - f = file(dataset.file_name,'w') - f.write("\n".join( rval )) - f.write('\n') - f.close() - - def set_meta(self, dataset, **kwd): - Html.set_meta(self, dataset, **kwd) - self.regenerate_primary_file(dataset) - - def get_mime(self): - return "text/html" - -class BasicHtmlComposite(Html): - composite_type = "basic" diff -r d4690e65afcd -r c8cc6529038c pplacer.xml --- a/pplacer.xml Thu Feb 26 18:16:36 2015 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,53 +0,0 @@ - - on a reference tree - - yapp_env - - - macros.xml - - echo "pplacer $(pplacer --version)" - - pplacer-wrapper.sh ${config} - - - - - - - - - - - - - -REFPKG="${refpkg.extra_files_path}" -QUERY_SEQS="${query_seqs}" - -PLACED_SEQS="${placed_seqs}" - - - - - -.. class:: infomark - -**What it does** - -This tool places query sequences on a fixed reference phylogenetic tree -according to a reference alignment, producing taxonomic annotations which can -be used for classification and visualization. - ------ - -**Citation** - -Matsen F, Kodner R, Armbrust E V: **pplacer: linear time maximum-likelihood and -Bayesian phylogenetic placement of sequences onto a fixed reference tree**. BMC -Bioinformatics 2010, **11**:1. - - - diff -r d4690e65afcd -r c8cc6529038c preclassification-wrapper.sh --- a/preclassification-wrapper.sh Thu Feb 26 18:16:36 2015 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,30 +0,0 @@ -#!/bin/bash - -source $(dirname $0)/util.sh -source $1 - -PLACED_SEQS=$(extify jplace ${PLACED_SEQS}) -NBC_SEQS=$(extify fasta ${NBC_SEQS}) - -guppy redup \ - -m \ - -d ${DEDUP_INFO} \ - -o ${REDUPED_SEQS} \ - ${PLACED_SEQS} - -REDUPED_SEQS=$(extify jplace ${REDUPED_SEQS}) - -rppr prep_db \ - -c ${REFPKG} \ - --sqlite ${CLASS_DB} - -guppy classify \ - -c ${REFPKG} \ - -j ${GALAXY_SLOTS:-4} \ - --pp \ - --sqlite ${CLASS_DB} \ - --classifier hybrid2 \ - --nbc-sequences ${NBC_SEQS} \ - ${REDUPED_SEQS} - -multiclass_concat.py --dedup-info ${DEDUP_INFO} ${CLASS_DB} diff -r d4690e65afcd -r c8cc6529038c preclassification.xml --- a/preclassification.xml Thu Feb 26 18:16:36 2015 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,50 +0,0 @@ - - for classification - - yapp_env - - - macros.xml - - echo "guppy $(guppy --version)" - - preclassification-wrapper.sh ${config} - - - - - - - - - - - - - - - - -DEDUP_INFO="${dedup_info}" -REFPKG="${refpkg.extra_files_path}" -NBC_SEQS="${nbc_seqs}" -PLACED_SEQS="${placed_seqs}" - -REDUPED_SEQS="${reduped_seqs}" -CLASS_DB="${class_db}" - - - - - -.. class:: infomark - -**What it does** - -This tool outputs the classifications made by ``pplacer`` to a database for use -in taxonomic classification. - - - diff -r d4690e65afcd -r c8cc6529038c preprocessing-wrapper.sh --- a/preprocessing-wrapper.sh Thu Feb 26 18:16:36 2015 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,20 +0,0 @@ -#!/bin/bash - -source $1 - -deduplicate_sequences.py \ - --split-map ${SPLIT_MAP} \ - --deduplicated-sequences-file ${DEDUP_INFO} \ - ${INPUT_SEQS} \ - ${DEDUP_SEQS} - -# adapted from yapp/bin/refpkg_align -ref_sto=$(taxit rp ${REFPKG} aln_sto) -profile=$(taxit rp ${REFPKG} profile) - -sto=$(mktemp -u).sto - -cmalign --cpu ${GALAXY_SLOTS:-4} -o "$sto" --sfile "${ALIGNED_SCORES}" --noprob --dnaout "$profile" "${DEDUP_SEQS}" | grep -E '^#' - -esl-alimerge --dna --outformat afa "$ref_sto" "$sto" | \ - seqmagick convert --output-format fasta --dash-gap - "${ALIGNED_SEQS}" diff -r d4690e65afcd -r c8cc6529038c preprocessing.xml --- a/preprocessing.xml Thu Feb 26 18:16:36 2015 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,53 +0,0 @@ - - in preparation for phylogenetic placement - - yapp_env - - - macros.xml - - echo "guppy $(guppy --version)" - - preprocessing-wrapper.sh ${config} - - - - - - - - - - - - - - - - - -REFPKG="${refpkg.extra_files_path}" -INPUT_SEQS="${input_seqs}" -SPLIT_MAP="${split_map}" - -DEDUP_SEQS="${dedup_seqs}" -DEDUP_INFO="${dedup_info}" -ALIGNED_SEQS="${aligned_seqs}" -ALIGNED_SCORES="${aligned_scores}" - - - - - -.. class:: infomark - -**What it does** - -This tool aligns query sequences with the reference sequences used to make the -reference tree contained in the reference package and then merges the query and -reference sequences. - - - diff -r d4690e65afcd -r c8cc6529038c refpkgzip_to_refpkg.xml --- a/refpkgzip_to_refpkg.xml Thu Feb 26 18:16:36 2015 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,11 +0,0 @@ - - unzip -o -j $input -d $output.files_path - - - - - - - - - diff -r d4690e65afcd -r c8cc6529038c render_datatable-wrapper.sh --- a/render_datatable-wrapper.sh Thu Feb 26 18:16:36 2015 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,20 +0,0 @@ -#!/bin/bash - -source $(dirname $0)/util.sh -source $1 - -mkdir -p ${OUTPUT_DIR} - -python $(dirname $0)/render_datatable.py \ - < ${INPUT} \ - > ${OUTPUT_DIR}/index.html - -cat < ${OUTPUT} - - - - - Generated table - - -EOF diff -r d4690e65afcd -r c8cc6529038c render_datatable.py --- a/render_datatable.py Thu Feb 26 18:16:36 2015 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,412 +0,0 @@ -#!/usr/bin/env python - -import csv -import itertools -import string -import sys - -input = sys.stdin -start_lines = input.readlines(10) -all_input = itertools.chain(iter(start_lines), input) - -def detect_delimiter(iterable, char_set): - matches = (c for c in char_set if c in iterable) - return next(matches, None) - -def detect_csv_dialect(sample): - try: - return csv.Sniffer().sniff(sample) - except: - return None - -delimiter = detect_delimiter(start_lines[0], list('\t, ')) -reader = None - -if delimiter in list('\t,'): - # try to detect csv dialect, which should neatly handle quoted separators and stuff - dialect = detect_csv_dialect(''.join(start_lines)) - if dialect: - reader = csv.reader(all_input, dialect) - -if not reader: - if delimiter in list(string.whitespace): - # use str.split() with no arguments to split on arbitrary whitespace strings - reader = (line.strip().split() for line in all_input) - else: - reader = all_input - -print """\ - - - - - - - - - - - -
- - \ -""" - -for i, row in enumerate(reader): - if i == 0: - print "" - else: - print "" - - if i == 0: - print "" - -print """\ - -
" + "".join(row) + "
" + "".join(row) + "
-
- -\ -""" diff -r d4690e65afcd -r c8cc6529038c render_datatable.xml --- a/render_datatable.xml Thu Feb 26 18:16:36 2015 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,63 +0,0 @@ - - as an interactive HTML table - - macros.xml - - - render_datatable-wrapper.sh $config - - - - - - - - - - - - -INPUT="${input}" - -OUTPUT="${output}" -OUTPUT_DIR="${output.files_path}" - - - - - -.. class:: infomark - -**What it does** - -This tool reformats a CSV file, like this:: - - "seqname","accession","tax_id","species_name","is_type" - "FM872653","FM872653","308994","Dialister propionicifaciens",0.0 - "AY331416","AY331416","239137","Candidate Division TM7 oral",0.0 - "DQ666092","DQ666092","95818_1","Candidate Division TM7 vaginal",0.0 - "S002223913","GQ900631","186802_3","bacterium BVAB3-Strain 1",0.0 - ... - -into an interactive HTML table. - -[placeholder] - -+-------------+-----------+----------+---------------------------------------+----------+ -| seqname | accession | tax_id | species_name | is_type | -+=============+===========+==========+=======================================+==========+ -| FM872653 | FM872653 | 308994 | Dialister propionicifaciens | 0.0 | -+-------------+-----------+----------+---------------------------------------+----------+ -| AY331416 | AY331416 | 239137 | Candidate Division TM7 oral | 0.0 | -+-------------+-----------+----------+---------------------------------------+----------+ -| DQ666092 | DQ666092 | 95818_1 | Candidate Division TM7 vaginal | 0.0 | -+-------------+-----------+----------+---------------------------------------+----------+ -| S002223913 | GQ900631 | 186802_3 | bacterium BVAB3-Strain 1 | 0.0 | -+-------------+-----------+----------+---------------------------------------+----------+ - -... - - - diff -r d4690e65afcd -r c8cc6529038c taxtastic.py --- a/taxtastic.py Thu Feb 26 18:16:36 2015 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,64 +0,0 @@ -import os -import zipfile -from galaxy.datatypes.binary import Binary -from galaxy.datatypes.data import Text - -class Refpkg(Text): - composite_type = "basic" - - def __init__(self, **kwd): - Text.__init__(self, **kwd) - self.add_composite_file("CONTENTS.json") - - def get_mime(self): - return "application/json" - -class RefpkgZip(Binary): - file_ext = "refpkg.zip" - - def __init__(self, **kwd): - Binary.__init__(self, **kwd) - - def sniff(self, filename): - if not zipfile.is_zipfile(filename): - return False - contains_contents_file = False - zip_file = zipfile.ZipFile(filename, "r") - for name in zip_file.namelist(): - if os.path.basename(name) == "CONTENTS.json": - contains_contents_file = True - break - zip_file.close() - if not contains_contents_file: - return False - return True - - def get_mime(self): - return "application/zip" - -class OfficeXlsx(Binary): - file_ext = "xlsx" - - def __init__(self, **kwd): - Binary.__init__(self, **kwd) - - # TODO: this should check for an xl/ directory also - def sniff(self, filename): - if not zipfile.is_zipfile(filename): - return False - contains_contents_file = False - zip_file = zipfile.ZipFile(filename, "r") - for name in zip_file.namelist(): - if os.path.basename(name) == "[Content_Types].xml": - contains_contents_file = True - break - zip_file.close() - if not contains_contents_file: - return False - return True - - def get_mime(self): - return "application/zip" - -Binary.register_sniffable_binary_format("refpkg.zip", "refpkg.zip", RefpkgZip) -Binary.register_sniffable_binary_format("xlsx", "xlsx", OfficeXlsx) diff -r d4690e65afcd -r c8cc6529038c usearch-wrapper.sh --- a/usearch-wrapper.sh Thu Feb 26 18:16:36 2015 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,28 +0,0 @@ -#!/bin/bash - -source $(dirname $0)/util.sh -source $1 - -RDP_SEQS="/shared/silo_researcher/Matsen_F/MatsenGrp/micro_refset/rdp/10_31/tax_filter/filtered/rdp_10_31.filter.fasta" -RDP_SEQINFO="/shared/silo_researcher/Matsen_F/MatsenGrp/micro_refset/rdp/10_31/tax_filter/filtered/rdp_10_31.filter.seq_info.csv" - -sqlite3 -csv -header ${CLASS_DB} < usearch_meta.csv -SELECT pn.name, CAST(pn.mass AS INT) count, tax_id, tax_name, taxa.rank - FROM multiclass_concat - JOIN taxa USING (tax_id) - JOIN placement_names pn USING (placement_id, name) - WHERE want_rank = 'species'; -EOF - -romp -v usearch_clusters \ - --usearch-quietly \ - --query-group tax_id \ - --query-duplication count \ - --database-name seqname \ - --database-group tax_id \ - ${INPUT_SEQS} \ - usearch_meta.csv \ - ${RDP_SEQS} \ - ${RDP_SEQINFO} \ - ${USEARCH_HITS} \ - ${USEARCH_GROUPS} diff -r d4690e65afcd -r c8cc6529038c usearch.xml --- a/usearch.xml Thu Feb 26 18:16:36 2015 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,50 +0,0 @@ - - with USEARCH - - macros.xml - - /home/matsengrp/local/bin/usearch6_64 --version - - usearch-wrapper.sh $config - - - - - - - - - - - - - - -INPUT_SEQS="${input_seqs}" -CLASS_DB="${class_db}" - -USEARCH_HITS="${usearch_hits}" -USEARCH_GROUPS="${usearch_groups}" - - - - - -.. class:: infomark - -**What it does** - -This tool queries large sequence databases for target sequences and assigns -those sequences to clusters. - ------ - -**Citation** - -Edgar, R C: **Search and clustering orders of magnitude faster than -BLAST**. Bioinformatics 2010, **26**:19. - - - diff -r d4690e65afcd -r c8cc6529038c util.sh --- a/util.sh Thu Feb 26 18:16:36 2015 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,52 +0,0 @@ -#!/bin/bash - -extify() { - local REQ_EXT=$1 - shift - - local OUTPUT="" - local FILE - for FILE in $*; do - local BASENAME=$(basename ${FILE}) - local EXT=${BASENAME##*.} - if [[ ${EXT} != ${REQ_EXT} ]]; then - local LINK="${BASENAME%%.*}.${REQ_EXT}" - if [[ ! -f ${LINK} ]]; then - ln -s ${FILE} ${LINK} - fi - FILE="${LINK}" - fi - OUTPUT="${OUTPUT} ${FILE}" - done - - echo ${OUTPUT} -} - -# from http://www.linuxjournal.com/content/use-date-command-measure-elapsed-time -timer() { - if [[ $# -eq 0 ]]; then - echo $(date '+%s') - else - local stime=$1 - etime=$(date '+%s') - - if [[ -z "$stime" ]]; then stime=$etime; fi - - dt=$((etime - stime)) - ds=$((dt % 60)) - dm=$(((dt / 60) % 60)) - dh=$((dt / 3600)) - printf '%d:%02d:%02d' $dh $dm $ds - fi -} - -on_exit() { - echo "Elapsed time: $(timer ${START_TIME})" -} - -set -eux - -xargs -n 1 -0 < /proc/self/environ > env.log - -START_TIME=$(timer) -trap on_exit EXIT diff -r d4690e65afcd -r c8cc6529038c xlsx_to_csv.xml --- a/xlsx_to_csv.xml Thu Feb 26 18:16:36 2015 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,21 +0,0 @@ - - - yapp_env - - in2csv -f xlsx $input > $output - - - - - - - - -.. class:: infomark - -**What it does** - -This tool converts a spreadsheet in Microsoft Excel 2007 (.xlsx) format to CSV. - - -