Mercurial > repos > rnateam > mirdeep2

--- a/mirdeep2.xml	Mon Jan 26 09:56:49 2015 -0500
+++ b/mirdeep2.xml	Thu Apr 05 08:38:04 2018 -0400
@@ -1,91 +1,67 @@
-<tool id="rbc_mirdeep2" name="MiRDeep2" version="2.0.0">
-    <description>
-<![CDATA[
-identification of novel and known miRNAs
-]]>
-    </description>
+<tool id="rbc_mirdeep2" name="MiRDeep2" version="2.0.0.8.1">
+    <description>identification of novel and known miRNAs</description>
     <requirements>
-        <requirement type="package" version="2.0">mirdeep2</requirement>
-        <requirement type="package" version="2.0">mirdeep2_quantifier</requirement>
-        <requirement type="package" version="0.12.7">bowtie</requirement>
-        <requirement type="package" version="5.18.1">perl</requirement>
-        <requirement type="package" version="1.8.5">vienna_rna</requirement>
-        <requirement type="package" version="2.023">pdf_api2</requirement>
-        <requirement type="package" version="2.0">randfold</requirement>
+        <requirement type="package" version="2.0.0.8">mirdeep2</requirement>
     </requirements>
-
-    <command>
+    <command detect_errors="aggressive">
 <![CDATA[
     miRDeep2.pl
-
+
     $reads
     $genome
     $mappings
-
+
     #if $mature_this
         $mature_this
     #else
         none
     #end if
-
+
     #if $mature_other
         $mature_other
     #else
         none
     #end if
-
+
     #if $precursors
         $precursors
     #else
         none
     #end if
-
+
     #if $species.value != 'all'
         -t $species
     #end if
-
+
     #if $star_sequences
         -s $star_sequences
     #end if
-
+
     #if $min_read_stack
         -a $min_read_stack
     #end if
-
+
     #if $min_read_stack
         -a $min_read_stack
     #end if
-
+
     -g $max_precursors_analyze
     -b $min_score_cutoff
     $disable_randfold
-
-    ; cp result*.bed result.bed 2> /dev/null
-    ; cp result*.csv result.csv 2> /dev/null
-    ; cp mirdeep_runs/run*/output.mrd . 2> /dev/null
-    ; cp mirdeep_runs/run*/survey.csv . 2> /dev/null
-
+
+    &&
+
     ## html output
-    ;
-    cp result*.html $html 2> /dev/null
-
+    mv result*.html $html 2> /dev/null
+
     ## move pdf directory to be accessible from the new index.html
-    ;
+    &&
     mkdir -p $html.files_path 2> /dev/null
-    ;
-    cp -R pdfs* $html.files_path 2> /dev/null
-
+    &&
+    mv pdfs* $html.files_path 2> /dev/null
 ]]>
     </command>
-    <stdio>
-        <!-- Anything other than zero is an error -->
-        <exit_code range="1:" />
-        <exit_code range=":-1" />
-        <!-- In case the return code has not been set propery check stderr too -->
-        <regex match="Error:" />
-        <regex match="Exception:" />
-    </stdio>
-    <inputs>
+    <inputs>
         <param name="reads" format="fasta" type="data" label="Collapsed deep sequencing reads">
             <help>
 <![CDATA[
@@ -97,7 +73,7 @@
         </param>
         <param name="genome" format="fasta" type="data" label="Genome" help="Genome contigs in fasta format. The identifiers should be unique."/>
         <param name="mappings" format="tabular" type="data" label="Mappings" help="Reads mapped against genome. Mappings should be in ARF format."/>
-        <param name="mature_this" optional="true" format="fasta" type="data" label="Mature miRNA sequences for this species"
+        <param name="mature_this" optional="true" format="fasta" type="data" label="Mature miRNA sequences for this species"
             help="miRBase miRNA sequences in fasta format. These should be the known mature sequences for the species being analyzed."/>
         <param name="mature_other" optional="true" format="fasta" type="data" label="Mature miRNA sequences for related species">
             <help>
@@ -107,9 +83,9 @@
 ]]>
             </help>
         </param>
-        <param name="precursors" optional="true" format="fasta" type="data" label="Precursor sequences"
+        <param name="precursors" optional="true" format="fasta" type="data" label="Precursor sequences"
             help="miRBase miRNA precursor sequences in fasta format. These should be the known precursor sequences for the species being analyzed."/>
-
+
         <param name="species" type="select" label="Search in species" help="If not searching in a specific species all species in your files will be analyzed. (-t)">
             <option value="all">All species</option>
             <option value="tni">tetraodon</option>
@@ -145,8 +121,8 @@
             <option value="dsi">d.simulans</option>
         </param>
         <param name="star_sequences" format="fasta" type="data" optional="true" label="Star sequences" help="From miRBase in fasta format (optional) (-s)"/>
-
-        <param name="min_read_stack" optional="true" type="integer" minvalue="0" label="Minimum read stack height">
+
+        <param name="min_read_stack" optional="true" type="integer" min="0" label="Minimum read stack height">
             <help>
 <![CDATA[
 minimum read stack height that triggers analysis. Using this option disables
@@ -154,18 +130,47 @@
 ]]>
             </help>
         </param>
-        <param name="max_precursors_analyze" type="integer" value="50000" label="Maximum precursors"
+        <param name="max_precursors_analyze" type="integer" value="50000" label="Maximum precursors"
             help="Maximum number of precursors to analyze when automatic excision gearing is used. If set to -1 all precursors will be analyzed. (-g)."/>
-        <param name="min_score_cutoff" type="integer" value="0" label="Minimum miRNA score"
+        <param name="min_score_cutoff" type="integer" value="0" label="Minimum miRNA score"
             help="Minimum score cut-off for predicted novel miRNAs to be displayed in the overview table. (-b)"/>
         <param name="disable_randfold" type="boolean" truevalue="-c" falsevalue="" label="Disable randfold analysis" help="(-c)"/>
+        <param name="detailed_fasta" type="boolean" checked="false" label="Detailed fasta output" help="Output fasta files of precursors, mature and star strand for both novel and known miRNAs"/>
     </inputs>
     <outputs>
-        <data name="tab_results" format="tabular" from_work_dir="result.csv" label="Tabular output of ${tool.name} on ${on_string}"/>
+        <data name="tab_results" format="tabular" from_work_dir="result*.csv" label="Tabular output of ${tool.name} on ${on_string}"/>
         <data format="html" name="html" label="${tool.name} on ${on_string} (html report)"/>
-        <data name="pred_acc" format="tabular" from_work_dir="survey.csv" label="Prediction accuracy output of ${tool.name} on ${on_string}"/>
-        <data name="bed_out" format="bed" from_work_dir="result.bed" label="Bed output of ${tool.name} on ${on_string}"/>
-        <data name="mrd_out" format="txt" from_work_dir="output.mrd" label="Text output of ${tool.name} on ${on_string}"/>
+        <data name="pred_acc" format="tabular" from_work_dir="mirdeep_runs/run*/survey.csv" label="Prediction accuracy output of ${tool.name} on ${on_string}"/>
+        <data name="bed_out" format="bed" from_work_dir="result*.bed" label="Bed output of ${tool.name} on ${on_string}"/>
+        <data name="mrd_out" format="txt" from_work_dir="mirdeep_runs/run*/output.mrd" label="Text output of ${tool.name} on ${on_string}"/>
+
+        <data name="known_mature" format="fasta" from_work_dir="mirna_results_*/known_mature_*.fa" label="${tool.name} on ${on_string}: known mature">
+            <filter>detailed_fasta</filter>
+        </data>
+        <data name="known_pres" format="fasta" from_work_dir="mirna_results_*/known_pres_*.fa" label="${tool.name} on ${on_string}: known precursors">
+            <filter>detailed_fasta</filter>
+        </data>
+        <data name="known_star" format="fasta" from_work_dir="mirna_results_*/known_star_*.fa" label="${tool.name} on ${on_string}: known star">
+            <filter>detailed_fasta</filter>
+        </data>
+        <data name="not_mature" format="fasta" from_work_dir="mirna_results_*/not_mature_*.fa" label="${tool.name} on ${on_string}: not detected mature">
+            <filter>detailed_fasta</filter>
+        </data>
+        <data name="not_pres" format="fasta" from_work_dir="mirna_results_*/not_pres_*.fa" label="${tool.name} on ${on_string}: not detected precursors">
+            <filter>detailed_fasta</filter>
+        </data>
+        <data name="not_star" format="fasta" from_work_dir="mirna_results_*/not_star_*.fa" label="${tool.name} on ${on_string}: not detected star">
+            <filter>detailed_fasta</filter>
+        </data>
+        <data name="novel_mature" format="fasta" from_work_dir="mirna_results_*/novel_mature_*.fa" label="${tool.name} on ${on_string}: novel mature">
+            <filter>detailed_fasta</filter>
+        </data>
+        <data name="novel_pres" format="fasta" from_work_dir="mirna_results_*/novel_pres_*.fa" label="${tool.name} on ${on_string}: novel precursors">
+            <filter>detailed_fasta</filter>
+        </data>
+        <data name="novel_star" format="fasta" from_work_dir="mirna_results_*/novel_star_*.fa" label="${tool.name} on ${on_string}: novel star">
+            <filter>detailed_fasta</filter>
+        </data>
     </outputs>
     <tests>
         <test>
@@ -175,18 +180,66 @@
             <param name="mature_this" value="mature_ref_this_species.fa"/>
             <param name="mature_other" value="mature_ref_other_species.fa"/>
             <param name="precursors" value="precursors_ref_this_species.fa"/>
-
+
             <output name="tab_results" file="result.csv" compare="sim_size"/>
-            <output name="prec_acc" file="survey.csv" compare="sim_size"/>
+            <output name="pred_acc" file="survey.csv" compare="sim_size"/>
             <output name="bed_out" file="result.bed" compare="sim_size"/>
             <output name="mrd_out" file="output.mrd" compare="sim_size"/>
         </test>
+        <test>
+            <param name="reads" value="reads_collapsed.fa"/>
+            <param name="genome" value="cel_cluster.fa"/>
+            <param name="mappings" value="reads_collapsed_vs_genome.arf"/>
+            <param name="mature_this" value="mature_ref_this_species.fa"/>
+            <param name="mature_other" value="mature_ref_other_species.fa"/>
+            <param name="precursors" value="precursors_ref_this_species.fa"/>
+            <param name="detailed_fasta" value="true"/>
+
+            <output name="tab_results" file="result.csv" compare="sim_size"/>
+            <output name="pred_acc" file="survey.csv" compare="sim_size"/>
+            <output name="bed_out" file="result.bed" compare="sim_size"/>
+            <output name="mrd_out" file="output.mrd" compare="sim_size"/>
+
+            <output name="known_mature" file="detailed_fasta/known_mature.fa" compare="sim_size"/>
+            <output name="known_pres" file="detailed_fasta/known_pres.fa" compare="sim_size"/>
+            <output name="known_star" file="detailed_fasta/known_star.fa" compare="sim_size"/>
+            <output name="not_mature" file="detailed_fasta/not_mature.fa" compare="sim_size"/>
+            <output name="not_pres" file="detailed_fasta/not_pres.fa" compare="sim_size"/>
+            <output name="not_star" file="detailed_fasta/not_star.fa" compare="sim_size"/>
+            <output name="novel_mature" file="detailed_fasta/novel_mature.fa" compare="sim_size"/>
+            <output name="novel_pres" file="detailed_fasta/novel_pres.fa" compare="sim_size"/>
+            <output name="novel_star" file="detailed_fasta/novel_star.fa" compare="sim_size"/>
+        </test>
     </tests>
     <help>
 <![CDATA[
-**What MiRDeep2 does**
+
+**What it does**

 MiRDeep2 is a software package for identification of novel and known miRNAs in deep sequencing data. Furthermore, it can be used for miRNA expression profiling across samples.
+
+**Input**
+
+A FASTA file with deep sequencing reads, a FASTA file of the corresponding genome, a file of mapped reads to the genome in miRDeep2 arf format, an optional fasta file with known miRNAs of the analysing species and an option fasta file of known miRNAs of related species.
+
+Arf format:
+Is a proprietary file format generated and processed by miRDeep2. It contains information of reads mapped to a reference genome. Each line in such a file contains 13 columns:
+
+1. read identifier
+2. length of read sequence
+3. start position in read sequence that is mapped
+4. end position in read sequence that is mapped
+5. read sequence
+6. identifier of the genome-part to which a read is mapped to. This is either a scaffold id or a chromosome name
+7. length of the genome sequence a read is mapped to
+8. start position in the genome where a read is mapped to
+9. end position in the genome where a read is mapped to
+10. genome sequence to which a read is mapped
+11. genome strand information. Plus means the read is aligned to the sense-strand of the genome. Minus means it is aligned to the antisense-strand of the genome.
+12. Number of mismatches in the read mapping
+13. Edit string that indicates matches by lowercase 'm' and mismatches by uppercase 'M'
+
+
 ]]>
     </help>
     <citations>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/detailed_fasta/known_mature.fa	Thu Apr 05 08:38:04 2018 -0400
@@ -0,0 +1,14 @@
+>chrII:11534525-11540624_11
+TCACCGGGTGAACACTTGCAGT
+>chrII:11534525-11540624_17
+TCACCGGGTGTACATCAGCTAA
+>chrII:11534525-11540624_9
+TCACCGGGTGAAAATTCGCATG
+>chrII:11534525-11540624_15
+TCACCGGGTGTAAATCAGCTTG
+>chrII:11534525-11540624_19
+GGTGGTTTTTCTCTGCAGTGATA
+>chrII:11534525-11540624_13
+TCACCGGGAGAAAAACTGGAGT
+>chrII:11534525-11540624_12
+TCACCGGGTGAACACTTGCAGT
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/detailed_fasta/known_pres.fa	Thu Apr 05 08:38:04 2018 -0400
@@ -0,0 +1,14 @@
+>chrII:11534525-11540624_11
+TGTGGGTGTCCGTTGCGGTGCTACATTCTCTAATCTGTATCACCGGGTGAACACTTGCAGT
+>chrII:11534525-11540624_17
+AGTGGATGTATGCCATGATGATAAGATATCAGAAATCCTATCACCGGGTGTACATCAGCTAA
+>chrII:11534525-11540624_9
+CGCCAATTTTCGCTTCAGTGCTAGACCATCCAAAGTGTCTATCACCGGGTGAAAATTCGCATG
+>chrII:11534525-11540624_15
+AGCTGATTTCGTCTTGGTAATAAGCTCGTCATTGAGATTATCACCGGGTGTAAATCAGCTTG
+>chrII:11534525-11540624_19
+GGTGGTTTTTCTCTGCAGTGATAGATACTTCTAACAACTCGCTATCACCGGGTGAAAAATCACCTA
+>chrII:11534525-11540624_13
+TCCGGTTTTTTCCGTGGTGATAACGCATCCAAAAGTCTCTATCACCGGGAGAAAAACTGGAGT
+>chrII:11534525-11540624_12
+TCACCGGGTGAACACTTGCAGTGGTCCTCGTGGTTTCTCTGTGAGCCAGGTCCTGTTCCGGTTTTTTCCGTGGTGATA
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/detailed_fasta/known_star.fa	Thu Apr 05 08:38:04 2018 -0400
@@ -0,0 +1,14 @@
+>chrII:11534525-11540624_11
+TGTGGGTGTCCGTTGCGGTGCTA
+>chrII:11534525-11540624_17
+AGTGGATGTATGCCATGATGATA
+>chrII:11534525-11540624_9
+CGCCAATTTTCGCTTCAGTGCTA
+>chrII:11534525-11540624_15
+AGCTGATTTCGTCTTGGTAATA
+>chrII:11534525-11540624_19
+TCACCGGGTGAAAAATCACCTA
+>chrII:11534525-11540624_13
+TCCGGTTTTTTCCGTGGTGATA
+>chrII:11534525-11540624_12
+TGTTCCGGTTTTTTCCGTGGTGATA
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/detailed_fasta/novel_mature.fa	Thu Apr 05 08:38:04 2018 -0400
@@ -0,0 +1,2 @@
+>chrII:11534525-11540624_7
+TCACCGGGTGGAAACTAGCAGT
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/detailed_fasta/novel_pres.fa	Thu Apr 05 08:38:04 2018 -0400
@@ -0,0 +1,2 @@
+>chrII:11534525-11540624_7
+TGCTGGTTTCTTCCACAGTGGTACTTTCCATTAGAACTATCACCGGGTGGAAACTAGCAGT
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/detailed_fasta/novel_star.fa	Thu Apr 05 08:38:04 2018 -0400
@@ -0,0 +1,2 @@
+>chrII:11534525-11540624_7
+TGCTGGTTTCTTCCACAGTGGTA
--- a/tool_dependencies.xml	Mon Jan 26 09:56:49 2015 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,40 +0,0 @@
-<?xml version="1.0"?>
-<tool_dependency>
-    <package name="perl" version="5.18.1">
-        <repository changeset_revision="a1a111b9faa5" name="package_perl_5_18" owner="iuc" prior_installation_required="True" toolshed="https://testtoolshed.g2.bx.psu.edu" />
-    </package>
-    <package name="bowtie" version="0.12.7">
-        <repository changeset_revision="f54826948b0b" name="package_bowtie_0_12_7" owner="devteam" toolshed="https://testtoolshed.g2.bx.psu.edu" />
-    </package>
-    <package name="vienna_rna" version="1.8.5">
-        <repository changeset_revision="263ded3da06c" name="package_vienna_rna_1_8" owner="iuc" toolshed="https://testtoolshed.g2.bx.psu.edu" />
-    </package>
-    <package name="pdf_api2" version="2.023">
-        <repository changeset_revision="356e5d565b9f" name="package_perl_pdf_api2_2_023" owner="iuc" toolshed="https://testtoolshed.g2.bx.psu.edu" />
-    </package>
-    <package name="randfold" version="2.0">
-        <repository changeset_revision="d44a316336e5" name="package_randfold_2_0" owner="iuc" toolshed="https://testtoolshed.g2.bx.psu.edu" />
-    </package>
-    <package name="mirdeep2_quantifier" version="2.0">
-        <repository changeset_revision="f3fa4faa3ab3" name="mirdeep2_quantifier" owner="rnateam" toolshed="https://testtoolshed.g2.bx.psu.edu" />
-    </package>
-    <package name="mirdeep2" version="2.0">
-        <install version="1.0">
-            <actions>
-                <action type="download_by_url">https://raw.githubusercontent.com/bgruening/download_store/master/miRDeep2/miRDeep2.tar.gz</action>
-                <action type="move_directory_files">
-                    <source_directory>.</source_directory>
-                    <destination_directory>$INSTALL_DIR</destination_directory>
-                </action>
-                <action type="set_environment">
-                    <environment_variable action="prepend_to" name="PATH">$INSTALL_DIR</environment_variable>
-                </action>
-            </actions>
-        </install>
-        <readme>
-
-MiRDeep2 is a software package for identification of novel and known miRNAs in deep sequencing data. Furthermore, it can be used for miRNA expression profiling across samples.
-
-        </readme>
-    </package>
-</tool_dependency>