Mercurial > repos > peterjc > mira4_assembler
changeset 29:3bdcee1798d0 draft
planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit c8f27e9d77b9b0827bc7660e599cd19a546d9c9f
author | peterjc |
---|---|
date | Fri, 02 Oct 2015 06:08:30 -0400 |
parents | 37152a480948 |
children | b506e3b779fa |
files | tools/mira4_0/README.rst tools/mira4_0/mira4_bait.xml tools/mira4_0/mira4_mapping.xml |
diffstat | 3 files changed, 129 insertions(+), 2 deletions(-) [+] |
line wrap: on
line diff
--- a/tools/mira4_0/README.rst Tue Sep 15 09:55:02 2015 -0400 +++ b/tools/mira4_0/README.rst Fri Oct 02 06:08:30 2015 -0400 @@ -21,7 +21,7 @@ ====================== This should be straightforward. Via the Tool Shed, Galaxy should automatically -install the ``mira`` datatype, samtools, and download and install the precompiled +install the 'mira' datatype, samtools, and download and install the precompiled binary for MIRA v4.0.2 for the Galaxy wrapper, and run any tests. For MIRA 4, the Galaxy wrapper has been split in two, allowing separate @@ -106,6 +106,7 @@ - MIRA 4.0.2 dependency now declared via dedicated Tool Shed package. v0.0.8 - Renamed folder now have a MIRA 4.9.x wrapper (internal change only). v0.0.9 - Additional unit tests now covering ``miraconvert``. + - Was missing ``mirabait`` wrapper since move to using Planemo. ======= ======================================================================
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/mira4_0/mira4_bait.xml Fri Oct 02 06:08:30 2015 -0400 @@ -0,0 +1,126 @@ +<tool id="mira_4_0_bait" name="MIRA v4.0 mirabait" version="0.0.9"> + <description>Filter reads using kmer matches</description> + <requirements> + <requirement type="binary">mirabait</requirement> + <requirement type="package" version="4.0.2">MIRA</requirement> + </requirements> + <stdio> + <!-- Assume anything other than zero is an error --> + <exit_code range="1:" /> + <exit_code range=":-1" /> + </stdio> + <version_command interpreter="python">mira4_bait.py --version</version_command> + <command interpreter="python"> +mira4_bait.py $input_reads.ext $output_choice $strand_choice $kmer_length $min_occurence "$bait_file" "$input_reads" "$output_reads" + </command> + <inputs> + <param name="bait_file" type="data" format="fasta,fastq,mira" required="true" label="Bait file (what to look for)" /> + <param name="input_reads" type="data" format="fasta,fastq,mira" required="true" label="Reads to search" /> + <param name="output_choice" type="select" label="Output positive matches, or negative matches?"> + <option value="pos">Just positive matches</option> + <option value="neg">Just negative matches</option> + </param> + <param name="strand_choice" type="select" label="Check for matches on both strands?"> + <option value="both">Check both strands</option> + <option value="fwd">Just forward strand</option> + </param> + <param name="kmer_length" type="integer" value="31" min="1" max="32" + label="k-mer length" help="Maximum 32" /> + <param name="min_occurence" type="integer" value="1" min="1" + label="Minimum k-mer occurence" + help="How many k-mer matches do you want per read? Minimum one" /> + </inputs> + <outputs> + <data name="output_reads" format_source="input_reads" metadata_source="input_reads" + label="$input_reads.name #if str($output_choice)=='pos' then 'matching' else 'excluding matches to' # $bait_file.name"/> + </outputs> + <tests> + <test> + <param name="bait_file" value="tvc_bait.fasta" ftype="fasta" /> + <param name="input_reads" value="tvc_mini.fastq" ftype="fastqsanger" /> + <output name="output_reads" file="tvc_mini_bait_pos.fastq" ftype="fastqsanger" /> + </test> + <test> + <param name="bait_file" value="tvc_bait.fasta" ftype="fasta" /> + <param name="input_reads" value="tvc_mini.fastq" ftype="fastqsanger" /> + <param name="kmer_length" value="32" /> + <param name="min_occurence" value="50" /> + <output name="output_reads" file="tvc_mini_bait_strict.fastq" ftype="fastqsanger" /> + </test> + <test> + <param name="bait_file" value="tvc_bait.fasta" ftype="fasta" /> + <param name="input_reads" value="tvc_mini.fastq" ftype="fastqsanger" /> + <param name="output_choice" value="neg" /> + <output name="output_reads" file="tvc_mini_bait_neg.fastq" ftype="fastqsanger" /> + </test> + </tests> + <help> +**What it does** + +Runs the ``mirabait`` utility from MIRA v4.0 to filter your input reads +according to whether or not they contain perfect kmer matches to your +bait file. By default this looks for 31-mers (kmers or *k*-mers where +the fragment length *k* is 31), and only requires a single matching kmer. + +The ``mirabait`` utility is useful in many applications and pipelines +outside of using the main MIRA tool for assembly or mapping. + +.. class:: warningmark + +Note ``mirabait`` cannot be used on protein (amino acid) sequences. + +**Example Usage** + +To remove over abundant entries like rRNA sequences, run ``mirabait`` with +known rRNA sequences as the bait and select the *negative* matches. + +To do targeted assembly by fishing out reads belonging to a gene and just +assemble these, run ``mirabait`` with the gene of interest as the bait and +select the *positive* matches. + +To iteratively reconstruct mitochondria you could start by fishing out reads +matching any known mitochondrial sequence, assembly those, and repeat. + + +**Notes on paired read** + +.. class:: warningmark + +While MIRA 4.0 is aware of many read naming conventions to identify paired read +partners, this version of the ``mirabait`` tool considers each read in isolation. +Applying it to paired read files may leave you with orphaned reads. + +The version of ``mirabait`` included in MIRA 4.9.5 onwards is pair-aware. + + +**Citation** + +If you use this Galaxy tool in work leading to a scientific publication please +cite the following papers: + +Peter J.A. Cock, Björn A. Grüning, Konrad Paszkiewicz and Leighton Pritchard (2013). +Galaxy tools and workflows for sequence analysis with applications +in molecular plant pathology. PeerJ 1:e167 +http://dx.doi.org/10.7717/peerj.167 + +Bastien Chevreux, Thomas Wetter and Sándor Suhai (1999). +Genome Sequence Assembly Using Trace Signals and Additional Sequence Information. +Computer Science and Biology: Proceedings of the German Conference on Bioinformatics (GCB) 99, pp. 45-56. +http://www.bioinfo.de/isb/gcb99/talks/chevreux/main.html + +This wrapper is available to install into other Galaxy Instances via the Galaxy +Tool Shed at http://toolshed.g2.bx.psu.edu/view/peterjc/mira4_assembler + </help> + <citations> + <citation type="doi">10.7717/peerj.167</citation> + <citation type="bibtex">@ARTICLE{Chevreux1999-mira3, + author = {B. Chevreux and T. Wetter and S. Suhai}, + year = {1999}, + title = {Genome Sequence Assembly Using Trace Signals and Additional Sequence Information}, + journal = {Computer Science and Biology: Proceedings of the German Conference on Bioinformatics (GCB)} + volume = {99}, + pages = {45-56}, + url = {http://www.bioinfo.de/isb/gcb99/talks/chevreux/main.html} + }</citation> + </citations> +</tool>
--- a/tools/mira4_0/mira4_mapping.xml Tue Sep 15 09:55:02 2015 -0400 +++ b/tools/mira4_0/mira4_mapping.xml Fri Oct 02 06:08:30 2015 -0400 @@ -1,4 +1,4 @@ -<tool id="mira_4_0_mapping" name="MIRA v4.0 mapping" version="0.0.8"> +<tool id="mira_4_0_mapping" name="MIRA v4.0 mapping" version="0.0.9"> <description>Maps Sanger, Roche 454, Solexa/Illumina, Ion Torrent and PacBio reads</description> <requirements> <requirement type="binary">mira</requirement>