# HG changeset patch
# User bcclaywell
# Date 1424997044 18000
# Node ID c8cc6529038c72119f8d36e62ab1d9edec6a1bd0
# Parent d4690e65afcd5848371e073ce388feb54652ba1f
Uploaded
diff -r d4690e65afcd -r c8cc6529038c bootstrap-wrapper.sh
--- a/bootstrap-wrapper.sh Thu Feb 26 18:16:36 2015 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,13 +0,0 @@
-#!/bin/bash
-
-source $(dirname $0)/util.sh
-source $1
-
-python $(dirname $0)/bootstrap.py \
- --plate ${PLATE_ID} \
- ${JUNIOR} \
- --zone ${ZONE_ID} \
- --barcodes ${BARCODES} \
- --labels ${LABELS} \
- --metadata ${METADATA} \
- - < ${SAMPLE_INFO}
diff -r d4690e65afcd -r c8cc6529038c bootstrap.py
--- a/bootstrap.py Thu Feb 26 18:16:36 2015 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,84 +0,0 @@
-#!/usr/bin/env python
-
-from __future__ import print_function
-import csv
-import sys
-import os
-import argparse
-
-def warning(*objs):
- print("WARNING: ", *objs, file=sys.stderr)
-
-def main(arguments):
-
- parser = argparse.ArgumentParser(arguments, description=__doc__,
- formatter_class=argparse.RawDescriptionHelpFormatter)
- parser.add_argument('infile', help = "CSV input",
- type = argparse.FileType('r'), default = sys.stdin)
- parser.add_argument('--junior', help = "use junior run specimen naming convention", action = 'store_true')
- parser.add_argument('--plate', help = "plate number", type = int, required = True)
- parser.add_argument('--zone', help = "zone number", type = int, required = True)
- parser.add_argument('--barcodes', help = "name of barcodes file",
- type = argparse.FileType('w'), default = 'barcodes.csv')
- parser.add_argument('--labels', help = "name of labels file",
- type = argparse.FileType('w'), default = 'labels.csv')
- parser.add_argument('--metadata', help = "name of metadata template file",
- type = argparse.FileType('w'), default = 'metadata.csv')
-
- args = parser.parse_args(arguments)
-
- label_key = 'sampleid'
- primer_key = 'reverse'
- barcode_key = 'barcode'
- zone_key = 'zone'
-
- fstr = "j{plate_id}{primer_id}" if args.junior else "p{plate_id}z{zone_id}{primer_id}"
-
- reader = csv.DictReader(sys.stdin)
-
- barcodes = csv.writer(args.barcodes)
- labels = csv.writer(args.labels)
- metadata = csv.writer(args.metadata)
-
- barcodes.writerow(['stub', 'barcode'])
- labels.writerow(['specimen', 'label'])
- metadata.writerow(['specimen', 'plate', 'zone', 'label', 'primer'])
-
- seen_labels = {}
- seen_primers = {}
-
- # TODO: add checks for duplicates, empty fields, etc., and bail if something goes wrong
- for i, d in enumerate(reader):
- if not all (k in d for k in (label_key, primer_key, barcode_key)):
- return "Expected columns not found"
-
- if zone_key in d and d[zone_key] != str(args.zone):
- continue
-
- label = d[label_key]
- primer = d[primer_key]
- barcode = d[barcode_key]
- zone = args.zone
-
- if not all((label, primer, barcode)):
- # only print a warning if at least one of the fields is non-empty
- if any((label, primer, barcode)):
- warning("Missing required field on row {}, skipping".format(i+2))
- continue
-
- if label in seen_labels:
- return "Duplicate label '{}' found on rows {} and {}".format(label, seen_labels[label]+2, i+2)
-
- if primer in seen_primers:
- return "Duplicate primer '{}' found on rows {} and {}".format(primer, seen_primers[primer]+2, i+2)
-
- seen_labels[label] = i
- seen_primers[primer] = i
-
- specimen = fstr.format(plate_id=args.plate, zone_id=zone, primer_id=primer.strip().lower().replace('-',''))
- barcodes.writerow([specimen, barcode])
- labels.writerow([specimen, label])
- metadata.writerow([specimen, args.plate, zone, label, primer])
-
-if __name__ == '__main__':
- sys.exit(main(sys.argv[1:]))
diff -r d4690e65afcd -r c8cc6529038c bootstrap.xml
--- a/bootstrap.xml Thu Feb 26 18:16:36 2015 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,67 +0,0 @@
-
- for analysis
-
- yapp_env
-
-
- macros.xml
-
- echo "bootstrap script 1.1.0"
-
- bootstrap-wrapper.sh $config
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-PLATE_ID="${plate_id}"
-#if $run_type.run_type_select == "senior"
-JUNIOR=""
-ZONE_ID="${run_type.zone_id}"
-#else
-JUNIOR="--junior"
-ZONE_ID="1"
-#end if
-SAMPLE_INFO="${sample_info}"
-
-BARCODES="${barcodes}"
-LABELS="${labels}"
-METADATA="${metadata}"
-
-
-
-
-
-.. class:: infomark
-
-**What it does**
-
-This tool parses sample information and creates a mapping of samples to
-barcodes. The sample information file must contain the columns ``sampleid``,
-``barcode``, and ``reverse``, and can optionally contain a ``zone`` column
-also.
-
-
-
diff -r d4690e65afcd -r c8cc6529038c classification-wrapper.sh
--- a/classification-wrapper.sh Thu Feb 26 18:16:36 2015 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,13 +0,0 @@
-#!/bin/bash
-
-source $(dirname $0)/util.sh
-source $1
-
-classif_table.py \
- --specimen-map ${SPLIT_MAP} \
- --metadata-map ${LABEL_MAP} \
- --rank ${WANT_RANK} \
- --tallies-wide ${TALLIES_WIDE} \
- --by-specimen ${BY_SPECIMEN} \
- ${CLASS_DB} \
- ${BY_TAXON}
diff -r d4690e65afcd -r c8cc6529038c classification.xml
--- a/classification.xml Thu Feb 26 18:16:36 2015 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,104 +0,0 @@
-
- in tabular format
-
- yapp_env
-
-
- macros.xml
-
- echo "guppy $(guppy --version)"
-
- classification-wrapper.sh ${config}
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-SPLIT_MAP="${split_map}"
-LABEL_MAP="${label_map}"
-CLASS_DB="${class_db}"
-WANT_RANK="${want_rank}"
-
-BY_TAXON="${by_taxon}"
-BY_SPECIMEN="${by_specimen}"
-TALLIES_WIDE="${tallies_wide}"
-
-
-
-
-
-.. class:: infomark
-
-**What it does**
-
-This tool outputs the classifications made by ``pplacer`` to a tabular format
-appropriate for use with R.
-
------
-
-**Example**
-
-The classifications are simply done by containment. Say clade A of the
-reference tree is the smallest such that contains a given placement. The most
-specific classification for that read will be the lowest common ancestor of the
-taxonomic classifications for the leaves of A. If the desired classification is
-more specific than that, then we get a disconnect between the desired and the
-actual classification. For example, if we try to classify at the species level
-and the clade LCA is a genus, then we will get a genus name. If there is
-uncertainty in read placement, then there is uncertainty in classification.
-
-For example, here is a classification list made for one read using the tabular
-output. The columns are as follows: read name, attempted rank for
-classification, actual rank for classification, taxonomic identifier, and
-confidence. You can see that in this example, there is some uncertainty at and
-below species, but only one classification at the genus level::
-
- GLKT0ZE01CQ2BU root root 1 1
- GLKT0ZE01CQ2BU below_root below_root 131567 1
- GLKT0ZE01CQ2BU superkingdom superkingdom 2 1
- GLKT0ZE01CQ2BU below_superkingdom superkingdom 2 1
- GLKT0ZE01CQ2BU below_below_superkingdom superkingdom 2 1
- GLKT0ZE01CQ2BU superphylum superkingdom 2 1
- GLKT0ZE01CQ2BU phylum phylum 1239 1
- GLKT0ZE01CQ2BU subphylum phylum 1239 1
- GLKT0ZE01CQ2BU class class 186801 1
- GLKT0ZE01CQ2BU subclass class 186801 1
- GLKT0ZE01CQ2BU order order 186802 1
- GLKT0ZE01CQ2BU below_order order 186802 1
- GLKT0ZE01CQ2BU below_below_order order 186802 1
- GLKT0ZE01CQ2BU suborder order 186802 1
- GLKT0ZE01CQ2BU family family 186804 1
- GLKT0ZE01CQ2BU below_family family 186804 1
- GLKT0ZE01CQ2BU genus genus 1257 1
- GLKT0ZE01CQ2BU species_group genus 1257 1
- GLKT0ZE01CQ2BU species_subgroup genus 1257 1
- GLKT0ZE01CQ2BU species genus 1257 0.0732247
- GLKT0ZE01CQ2BU species species 1261 0.853561
- GLKT0ZE01CQ2BU species species 341694 0.073214
- GLKT0ZE01CQ2BU below_species genus 1257 0.0732247
- GLKT0ZE01CQ2BU below_species species 1261 0.853561
- GLKT0ZE01CQ2BU below_species species 341694 0.073214
-
-
-
diff -r d4690e65afcd -r c8cc6529038c datatypes_conf.xml
--- a/datatypes_conf.xml Thu Feb 26 18:16:36 2015 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,22 +0,0 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
diff -r d4690e65afcd -r c8cc6529038c decorate-wrapper.sh
--- a/decorate-wrapper.sh Thu Feb 26 18:16:36 2015 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,10 +0,0 @@
-#!/bin/bash
-
-source $(dirname $0)/util.sh
-source $1
-
-csvcut -c "specimen,${COLUMNS}" ${METADATA} | \
- csvjoin -c "specimen" ${GROUP_BY_SPECIMEN} - > ${DECORATED_GROUP_BY_SPECIMEN}
-
-# drop duplicate columns (thanks, Erick!)
-#csvcut -c $(head -n 1 addresses.csv | sed "s/,/\n/g" | sort |uniq | paste -s -d",")
diff -r d4690e65afcd -r c8cc6529038c decorate.xml
--- a/decorate.xml Thu Feb 26 18:16:36 2015 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,46 +0,0 @@
-
- classification results with sample metadata
-
- yapp_env
-
-
- macros.xml
-
- echo "decorate script 1.0.0"
-
- decorate-wrapper.sh ${config}
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-GROUP_BY_SPECIMEN="${group_by_specimen}"
-METADATA="${metadata}"
-COLUMNS="${columns}"
-
-DECORATED_GROUP_BY_SPECIMEN="${decorated_group_by_specimen}"
-
-
-
-
-
-.. class:: infomark
-
-**What it does**
-
-This tool joins the classifications made by ``pplacer`` with arbitrary sample
-metadata.
-
-
-
diff -r d4690e65afcd -r c8cc6529038c filter-wrapper.sh
--- a/filter-wrapper.sh Thu Feb 26 18:16:36 2015 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,48 +0,0 @@
-#!/bin/bash
-
-source $(dirname $0)/util.sh
-source $1
-
-INPUT_QUAL=$(extify qual ${INPUT_QUAL})
-BARCODES=$(extify csv ${BARCODES})
-RAW_SEQS=$(extify fasta ${RAW_SEQS})
-
-seqmagick quality-filter \
- --input-qual ${INPUT_QUAL} \
- --barcode-file ${BARCODES} \
- --primer "${PRIMER}" \
- --report-out ${FILTER_REPORT} \
- --details-out ${FILTER_DETAILS} \
- --map-out ${SPLIT_MAP} \
- --barcode-header \
- --min-length ${MIN_LENGTH} \
- --min-mean-quality ${MIN_QUALITY} \
- --quality-window 30 \
- --quality-window-prop 0.9 \
- --quality-window-mean-qual 15 \
- ${RAW_SEQS} \
- filtered.fasta
-
-if [[ ${REVERSE_COMPLEMENT} == "TRUE" ]]; then
- seqmagick mogrify \
- --reverse-complement \
- filtered.fasta
-fi
-
-mv filtered.fasta ${FILTERED_SEQS}
-
-# TODO: separate tool for concatenating seq data (and reverse complementing them?)
-#cat [12]*Reads.fasta | seqmagick convert --input-format fasta - combined.fasta --reverse-complement
-#cat [12]*.map.csv > combined.map.csv
-
-sequencing_quality_report.py ${PLATE_JSON} -t "Sequencing quality report" -o ${SQR_DIR}
-
-cat < ${SQR}
-
-
-
-
- Sequencing quality report
-
-
-EOF
diff -r d4690e65afcd -r c8cc6529038c filter.xml
--- a/filter.xml Thu Feb 26 18:16:36 2015 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,103 +0,0 @@
-
- sequences
-
- yapp_env
-
-
- macros.xml
-
- seqmagick --version
-
- filter-wrapper.sh ${config}
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-{
- "plate": ${plate_id},
- "name": "Plate ${plate_id}",
- "zones": [
- {
- "zone": ${zone_id},
- "cleaning_stats": "${filter_details}"
- }
- ]
-}
-
-
-RAW_SEQS="${raw_seqs}"
-INPUT_QUAL="${input_qual}"
-BARCODES="${barcodes}"
-PRIMER="${primer}"
-MIN_LENGTH="${min_length}"
-MIN_QUALITY="${min_quality}"
-REVERSE_COMPLEMENT="${reverse_complement}"
-PLATE_JSON="${plate_json}"
-
-FILTERED_SEQS="${filtered_seqs}"
-FILTER_REPORT="${filter_report}"
-FILTER_DETAILS="${filter_details}"
-SPLIT_MAP="${split_map}"
-SQR="${seq_qual_report}"
-SQR_DIR="${seq_qual_report.files_path}"
-
-
-
-
-
-.. class:: infomark
-
-**What it does**
-
-This tool truncates and removes sequences that don’t match a set of quality
-criteria, as well as mapping sequence barcodes to specimens. It takes input
-sequences in FASTA format and a quality file, and outputs the filtered
-sequences as well as a filtering summary and a sequence quality report.
-
-The default quality filter settings are:
-
-+---------------------------+------+
-|parameter |value |
-+===========================+======+
-|--min-length |350 |
-+---------------------------+------+
-|--min-mean-quality |35 |
-+---------------------------+------+
-|--quality-window |30 |
-+---------------------------+------+
-|--quality-window-prop |0.9 |
-+---------------------------+------+
-|--quality-window-mean-qual |15 |
-+---------------------------+------+
-
-See seqmagick's `quality filter documentation`_ for full explanations of these
-parameters.
-
-.. _quality filter documentation: http://fhcrc.github.io/seqmagick/quality_filter.html
-
-
-
diff -r d4690e65afcd -r c8cc6529038c macros.xml
--- a/macros.xml Thu Feb 26 18:16:36 2015 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,8 +0,0 @@
-
-
-
-
-
-
-
-
diff -r d4690e65afcd -r c8cc6529038c pplacer-wrapper.sh
--- a/pplacer-wrapper.sh Thu Feb 26 18:16:36 2015 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,13 +0,0 @@
-#!/bin/bash
-
-source $(dirname $0)/util.sh
-source $1
-
-QUERY_SEQS=$(extify fasta ${QUERY_SEQS})
-PPLACER_DEFAULT_ARGS="-j ${GALAXY_SLOTS:-4} -p --inform-prior --prior-lower 0.01 --map-identity"
-
-pplacer \
- ${PPLACER_DEFAULT_ARGS} \
- -c ${REFPKG} \
- -o ${PLACED_SEQS} \
- ${QUERY_SEQS}
diff -r d4690e65afcd -r c8cc6529038c pplacer.py
--- a/pplacer.py Thu Feb 26 18:16:36 2015 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,54 +0,0 @@
-import json
-from galaxy.datatypes.data import Text
-from galaxy.datatypes.images import Html
-
-class Jplace(Text):
- file_ext = "jplace"
-
- def sniff(self, filename):
- try:
- with open(filename, "r") as f:
- data = json.load(f)
- if all (k in data for k in ("version", "tree", "placements", "fields")):
- return True
- except:
- pass
-
- return False
-
- def get_mime(self):
- return "application/json"
-
-class AutoPrimaryComposite(Html):
- composite_type = "auto_primary_file"
-
- def __init__(self, **kwd):
- Html.__init__(self, **kwd)
-
- def regenerate_primary_file(self,dataset):
- """
- cannot do this until we are setting metadata
- """
- bn = dataset.metadata.base_name
- efp = dataset.extra_files_path
- flist = os.listdir(efp)
- rval = ['Files for Composite Dataset %sComposite %s contains:' % (dataset.name,dataset.name)]
- for i,fname in enumerate(flist):
- sfname = os.path.split(fname)[-1]
- f,e = os.path.splitext(fname)
- rval.append( '- %s
' % ( sfname, sfname) )
- rval.append( '
' )
- f = file(dataset.file_name,'w')
- f.write("\n".join( rval ))
- f.write('\n')
- f.close()
-
- def set_meta(self, dataset, **kwd):
- Html.set_meta(self, dataset, **kwd)
- self.regenerate_primary_file(dataset)
-
- def get_mime(self):
- return "text/html"
-
-class BasicHtmlComposite(Html):
- composite_type = "basic"
diff -r d4690e65afcd -r c8cc6529038c pplacer.xml
--- a/pplacer.xml Thu Feb 26 18:16:36 2015 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,53 +0,0 @@
-
- on a reference tree
-
- yapp_env
-
-
- macros.xml
-
- echo "pplacer $(pplacer --version)"
-
- pplacer-wrapper.sh ${config}
-
-
-
-
-
-
-
-
-
-
-
-
-
-REFPKG="${refpkg.extra_files_path}"
-QUERY_SEQS="${query_seqs}"
-
-PLACED_SEQS="${placed_seqs}"
-
-
-
-
-
-.. class:: infomark
-
-**What it does**
-
-This tool places query sequences on a fixed reference phylogenetic tree
-according to a reference alignment, producing taxonomic annotations which can
-be used for classification and visualization.
-
------
-
-**Citation**
-
-Matsen F, Kodner R, Armbrust E V: **pplacer: linear time maximum-likelihood and
-Bayesian phylogenetic placement of sequences onto a fixed reference tree**. BMC
-Bioinformatics 2010, **11**:1.
-
-
-
diff -r d4690e65afcd -r c8cc6529038c preclassification-wrapper.sh
--- a/preclassification-wrapper.sh Thu Feb 26 18:16:36 2015 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,30 +0,0 @@
-#!/bin/bash
-
-source $(dirname $0)/util.sh
-source $1
-
-PLACED_SEQS=$(extify jplace ${PLACED_SEQS})
-NBC_SEQS=$(extify fasta ${NBC_SEQS})
-
-guppy redup \
- -m \
- -d ${DEDUP_INFO} \
- -o ${REDUPED_SEQS} \
- ${PLACED_SEQS}
-
-REDUPED_SEQS=$(extify jplace ${REDUPED_SEQS})
-
-rppr prep_db \
- -c ${REFPKG} \
- --sqlite ${CLASS_DB}
-
-guppy classify \
- -c ${REFPKG} \
- -j ${GALAXY_SLOTS:-4} \
- --pp \
- --sqlite ${CLASS_DB} \
- --classifier hybrid2 \
- --nbc-sequences ${NBC_SEQS} \
- ${REDUPED_SEQS}
-
-multiclass_concat.py --dedup-info ${DEDUP_INFO} ${CLASS_DB}
diff -r d4690e65afcd -r c8cc6529038c preclassification.xml
--- a/preclassification.xml Thu Feb 26 18:16:36 2015 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,50 +0,0 @@
-
- for classification
-
- yapp_env
-
-
- macros.xml
-
- echo "guppy $(guppy --version)"
-
- preclassification-wrapper.sh ${config}
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-DEDUP_INFO="${dedup_info}"
-REFPKG="${refpkg.extra_files_path}"
-NBC_SEQS="${nbc_seqs}"
-PLACED_SEQS="${placed_seqs}"
-
-REDUPED_SEQS="${reduped_seqs}"
-CLASS_DB="${class_db}"
-
-
-
-
-
-.. class:: infomark
-
-**What it does**
-
-This tool outputs the classifications made by ``pplacer`` to a database for use
-in taxonomic classification.
-
-
-
diff -r d4690e65afcd -r c8cc6529038c preprocessing-wrapper.sh
--- a/preprocessing-wrapper.sh Thu Feb 26 18:16:36 2015 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,20 +0,0 @@
-#!/bin/bash
-
-source $1
-
-deduplicate_sequences.py \
- --split-map ${SPLIT_MAP} \
- --deduplicated-sequences-file ${DEDUP_INFO} \
- ${INPUT_SEQS} \
- ${DEDUP_SEQS}
-
-# adapted from yapp/bin/refpkg_align
-ref_sto=$(taxit rp ${REFPKG} aln_sto)
-profile=$(taxit rp ${REFPKG} profile)
-
-sto=$(mktemp -u).sto
-
-cmalign --cpu ${GALAXY_SLOTS:-4} -o "$sto" --sfile "${ALIGNED_SCORES}" --noprob --dnaout "$profile" "${DEDUP_SEQS}" | grep -E '^#'
-
-esl-alimerge --dna --outformat afa "$ref_sto" "$sto" | \
- seqmagick convert --output-format fasta --dash-gap - "${ALIGNED_SEQS}"
diff -r d4690e65afcd -r c8cc6529038c preprocessing.xml
--- a/preprocessing.xml Thu Feb 26 18:16:36 2015 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,53 +0,0 @@
-
- in preparation for phylogenetic placement
-
- yapp_env
-
-
- macros.xml
-
- echo "guppy $(guppy --version)"
-
- preprocessing-wrapper.sh ${config}
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-REFPKG="${refpkg.extra_files_path}"
-INPUT_SEQS="${input_seqs}"
-SPLIT_MAP="${split_map}"
-
-DEDUP_SEQS="${dedup_seqs}"
-DEDUP_INFO="${dedup_info}"
-ALIGNED_SEQS="${aligned_seqs}"
-ALIGNED_SCORES="${aligned_scores}"
-
-
-
-
-
-.. class:: infomark
-
-**What it does**
-
-This tool aligns query sequences with the reference sequences used to make the
-reference tree contained in the reference package and then merges the query and
-reference sequences.
-
-
-
diff -r d4690e65afcd -r c8cc6529038c refpkgzip_to_refpkg.xml
--- a/refpkgzip_to_refpkg.xml Thu Feb 26 18:16:36 2015 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,11 +0,0 @@
-
- unzip -o -j $input -d $output.files_path
-
-
-
-
-
-
-
-
-
diff -r d4690e65afcd -r c8cc6529038c render_datatable-wrapper.sh
--- a/render_datatable-wrapper.sh Thu Feb 26 18:16:36 2015 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,20 +0,0 @@
-#!/bin/bash
-
-source $(dirname $0)/util.sh
-source $1
-
-mkdir -p ${OUTPUT_DIR}
-
-python $(dirname $0)/render_datatable.py \
- < ${INPUT} \
- > ${OUTPUT_DIR}/index.html
-
-cat < ${OUTPUT}
-
-
-
-
- Generated table
-
-
-EOF
diff -r d4690e65afcd -r c8cc6529038c render_datatable.py
--- a/render_datatable.py Thu Feb 26 18:16:36 2015 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,412 +0,0 @@
-#!/usr/bin/env python
-
-import csv
-import itertools
-import string
-import sys
-
-input = sys.stdin
-start_lines = input.readlines(10)
-all_input = itertools.chain(iter(start_lines), input)
-
-def detect_delimiter(iterable, char_set):
- matches = (c for c in char_set if c in iterable)
- return next(matches, None)
-
-def detect_csv_dialect(sample):
- try:
- return csv.Sniffer().sniff(sample)
- except:
- return None
-
-delimiter = detect_delimiter(start_lines[0], list('\t, '))
-reader = None
-
-if delimiter in list('\t,'):
- # try to detect csv dialect, which should neatly handle quoted separators and stuff
- dialect = detect_csv_dialect(''.join(start_lines))
- if dialect:
- reader = csv.reader(all_input, dialect)
-
-if not reader:
- if delimiter in list(string.whitespace):
- # use str.split() with no arguments to split on arbitrary whitespace strings
- reader = (line.strip().split() for line in all_input)
- else:
- reader = all_input
-
-print """\
-
-
-
-
-
-
-
-
-
-
-
-
-
- \
-"""
-
-for i, row in enumerate(reader):
- if i == 0:
- print "" + " | ".join(row) + " |
"
- else:
- print "" + " | ".join(row) + " |
"
-
- if i == 0:
- print ""
-
-print """\
-
-
-
-
-\
-"""
diff -r d4690e65afcd -r c8cc6529038c render_datatable.xml
--- a/render_datatable.xml Thu Feb 26 18:16:36 2015 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,63 +0,0 @@
-
- as an interactive HTML table
-
- macros.xml
-
-
- render_datatable-wrapper.sh $config
-
-
-
-
-
-
-
-
-
-
-
-
-INPUT="${input}"
-
-OUTPUT="${output}"
-OUTPUT_DIR="${output.files_path}"
-
-
-
-
-
-.. class:: infomark
-
-**What it does**
-
-This tool reformats a CSV file, like this::
-
- "seqname","accession","tax_id","species_name","is_type"
- "FM872653","FM872653","308994","Dialister propionicifaciens",0.0
- "AY331416","AY331416","239137","Candidate Division TM7 oral",0.0
- "DQ666092","DQ666092","95818_1","Candidate Division TM7 vaginal",0.0
- "S002223913","GQ900631","186802_3","bacterium BVAB3-Strain 1",0.0
- ...
-
-into an interactive HTML table.
-
-[placeholder]
-
-+-------------+-----------+----------+---------------------------------------+----------+
-| seqname | accession | tax_id | species_name | is_type |
-+=============+===========+==========+=======================================+==========+
-| FM872653 | FM872653 | 308994 | Dialister propionicifaciens | 0.0 |
-+-------------+-----------+----------+---------------------------------------+----------+
-| AY331416 | AY331416 | 239137 | Candidate Division TM7 oral | 0.0 |
-+-------------+-----------+----------+---------------------------------------+----------+
-| DQ666092 | DQ666092 | 95818_1 | Candidate Division TM7 vaginal | 0.0 |
-+-------------+-----------+----------+---------------------------------------+----------+
-| S002223913 | GQ900631 | 186802_3 | bacterium BVAB3-Strain 1 | 0.0 |
-+-------------+-----------+----------+---------------------------------------+----------+
-
-...
-
-
-
diff -r d4690e65afcd -r c8cc6529038c taxtastic.py
--- a/taxtastic.py Thu Feb 26 18:16:36 2015 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,64 +0,0 @@
-import os
-import zipfile
-from galaxy.datatypes.binary import Binary
-from galaxy.datatypes.data import Text
-
-class Refpkg(Text):
- composite_type = "basic"
-
- def __init__(self, **kwd):
- Text.__init__(self, **kwd)
- self.add_composite_file("CONTENTS.json")
-
- def get_mime(self):
- return "application/json"
-
-class RefpkgZip(Binary):
- file_ext = "refpkg.zip"
-
- def __init__(self, **kwd):
- Binary.__init__(self, **kwd)
-
- def sniff(self, filename):
- if not zipfile.is_zipfile(filename):
- return False
- contains_contents_file = False
- zip_file = zipfile.ZipFile(filename, "r")
- for name in zip_file.namelist():
- if os.path.basename(name) == "CONTENTS.json":
- contains_contents_file = True
- break
- zip_file.close()
- if not contains_contents_file:
- return False
- return True
-
- def get_mime(self):
- return "application/zip"
-
-class OfficeXlsx(Binary):
- file_ext = "xlsx"
-
- def __init__(self, **kwd):
- Binary.__init__(self, **kwd)
-
- # TODO: this should check for an xl/ directory also
- def sniff(self, filename):
- if not zipfile.is_zipfile(filename):
- return False
- contains_contents_file = False
- zip_file = zipfile.ZipFile(filename, "r")
- for name in zip_file.namelist():
- if os.path.basename(name) == "[Content_Types].xml":
- contains_contents_file = True
- break
- zip_file.close()
- if not contains_contents_file:
- return False
- return True
-
- def get_mime(self):
- return "application/zip"
-
-Binary.register_sniffable_binary_format("refpkg.zip", "refpkg.zip", RefpkgZip)
-Binary.register_sniffable_binary_format("xlsx", "xlsx", OfficeXlsx)
diff -r d4690e65afcd -r c8cc6529038c usearch-wrapper.sh
--- a/usearch-wrapper.sh Thu Feb 26 18:16:36 2015 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,28 +0,0 @@
-#!/bin/bash
-
-source $(dirname $0)/util.sh
-source $1
-
-RDP_SEQS="/shared/silo_researcher/Matsen_F/MatsenGrp/micro_refset/rdp/10_31/tax_filter/filtered/rdp_10_31.filter.fasta"
-RDP_SEQINFO="/shared/silo_researcher/Matsen_F/MatsenGrp/micro_refset/rdp/10_31/tax_filter/filtered/rdp_10_31.filter.seq_info.csv"
-
-sqlite3 -csv -header ${CLASS_DB} < usearch_meta.csv
-SELECT pn.name, CAST(pn.mass AS INT) count, tax_id, tax_name, taxa.rank
- FROM multiclass_concat
- JOIN taxa USING (tax_id)
- JOIN placement_names pn USING (placement_id, name)
- WHERE want_rank = 'species';
-EOF
-
-romp -v usearch_clusters \
- --usearch-quietly \
- --query-group tax_id \
- --query-duplication count \
- --database-name seqname \
- --database-group tax_id \
- ${INPUT_SEQS} \
- usearch_meta.csv \
- ${RDP_SEQS} \
- ${RDP_SEQINFO} \
- ${USEARCH_HITS} \
- ${USEARCH_GROUPS}
diff -r d4690e65afcd -r c8cc6529038c usearch.xml
--- a/usearch.xml Thu Feb 26 18:16:36 2015 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,50 +0,0 @@
-
- with USEARCH
-
- macros.xml
-
- /home/matsengrp/local/bin/usearch6_64 --version
-
- usearch-wrapper.sh $config
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-INPUT_SEQS="${input_seqs}"
-CLASS_DB="${class_db}"
-
-USEARCH_HITS="${usearch_hits}"
-USEARCH_GROUPS="${usearch_groups}"
-
-
-
-
-
-.. class:: infomark
-
-**What it does**
-
-This tool queries large sequence databases for target sequences and assigns
-those sequences to clusters.
-
------
-
-**Citation**
-
-Edgar, R C: **Search and clustering orders of magnitude faster than
-BLAST**. Bioinformatics 2010, **26**:19.
-
-
-
diff -r d4690e65afcd -r c8cc6529038c util.sh
--- a/util.sh Thu Feb 26 18:16:36 2015 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,52 +0,0 @@
-#!/bin/bash
-
-extify() {
- local REQ_EXT=$1
- shift
-
- local OUTPUT=""
- local FILE
- for FILE in $*; do
- local BASENAME=$(basename ${FILE})
- local EXT=${BASENAME##*.}
- if [[ ${EXT} != ${REQ_EXT} ]]; then
- local LINK="${BASENAME%%.*}.${REQ_EXT}"
- if [[ ! -f ${LINK} ]]; then
- ln -s ${FILE} ${LINK}
- fi
- FILE="${LINK}"
- fi
- OUTPUT="${OUTPUT} ${FILE}"
- done
-
- echo ${OUTPUT}
-}
-
-# from http://www.linuxjournal.com/content/use-date-command-measure-elapsed-time
-timer() {
- if [[ $# -eq 0 ]]; then
- echo $(date '+%s')
- else
- local stime=$1
- etime=$(date '+%s')
-
- if [[ -z "$stime" ]]; then stime=$etime; fi
-
- dt=$((etime - stime))
- ds=$((dt % 60))
- dm=$(((dt / 60) % 60))
- dh=$((dt / 3600))
- printf '%d:%02d:%02d' $dh $dm $ds
- fi
-}
-
-on_exit() {
- echo "Elapsed time: $(timer ${START_TIME})"
-}
-
-set -eux
-
-xargs -n 1 -0 < /proc/self/environ > env.log
-
-START_TIME=$(timer)
-trap on_exit EXIT
diff -r d4690e65afcd -r c8cc6529038c xlsx_to_csv.xml
--- a/xlsx_to_csv.xml Thu Feb 26 18:16:36 2015 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,21 +0,0 @@
-
-
- yapp_env
-
- in2csv -f xlsx $input > $output
-
-
-
-
-
-
-
-
-.. class:: infomark
-
-**What it does**
-
-This tool converts a spreadsheet in Microsoft Excel 2007 (.xlsx) format to CSV.
-
-
-