Mercurial > repos > glogobyte > isoread

--- a/isoread.xml	Sun Dec 05 13:13:33 2021 +0000
+++ b/isoread.xml	Sun Dec 05 13:13:59 2021 +0000
@@ -1,80 +1,163 @@
-<tool id="ArmDB" name="ArmDB: Generation of Pre-miRNA arm reference DB (5p and 3p arms)" version="0.1.0">
+
+<tool id="IsoRead" name="IsoRead: miR and isomiR identification and classification" version="0.1.0">
   <requirements>
-    <requirement type="package" version="2.28.0">Bedtools</requirement>
-     <requirement type="package" version="3.7.4">python</requirement>
+   <requirement type="package" version="1.7">fpdf</requirement>
+   <requirement type="package" version="0.8">logomaker</requirement>
+   <requirement type="package" version="3.7.4">python</requirement>
+   <requirement type="package" version="1.17.3">numpy</requirement>
+   <requirement type="package" version="3.1.2">matplotlib</requirement>
+   <requirement type="package" version="1.0.3">pandas</requirement>
   </requirements>
   <command>
-    #if $mir_input.analysis == "1":
-     #set path=$mir_input.genome1.fields.dbkey
-     python $__tool_directory__/pre_mirbase.py -pos $positions -tool_dir $__tool_directory__ -gff3 "$path" -gen $mir_input.genome1.fields.value
-    #end if
-    #if $mir_input.analysis == "2":
-     #set path=$mir_input.genome2.fields.value
-     python $__tool_directory__/pre_mirgene.py -pos $positions -tool_dir $__tool_directory__ -sym "$path"
+    #set controls=[]
+    #for $input in $control#
+    $controls.extend([str($input.element_identifier),str($input)])
+    #end for#
+    #set treateds=[]
+    #for $input in $treated#
+    $treateds.extend([str($input.element_identifier),str($input)])
+    #end for#
+    #if $mir_input.database == "1":
+      #if $f.fil == "1":
+        #set path=$mir_input.genome1.fields.path
+        python -W ignore $__tool_directory__/mirbase_opt.py -con $controls -tre $treateds -analysis $analysis -tool_dir $__tool_directory__ -gen "$path" -f "$mir_input.database" -percentage "-1" -counts "-1" -name1 "$fal1" -name2 "$fal2"
+      #end if
+      #if $f.fil == "2":
+        #set path=$mir_input.genome1.fields.path
+        python -W ignore $__tool_directory__/mirbase_opt.py -con $controls -tre $treateds -analysis $analysis -tool_dir $__tool_directory__ -gen "$path" -f "$mir_input.database" -percentage "$f.fil1" -counts "$f.fil2" -name1 "$fal1" -name2 "$fal2"
+      #end if
+    #else:
+      #if $f.fil == "1":
+        #set path=$mir_input.genome2.fields.value
+        python -W ignore $__tool_directory__/mirgene_with_new_names.py -con $controls -tre $treateds -analysis $analysis -tool_dir $__tool_directory__ -gen "$path" -f "$mir_input.database" -percentage "-1" -counts "-1" -name1 "$fal1" -name2 "$fal2"
+      #end if
+      #if $f.fil == "2":
+        #set path=$mir_input.genome2.fields.value
+        python -W ignore $__tool_directory__/mirgene_with_new_names.py -con $controls -tre $treateds -analysis $analysis -tool_dir $__tool_directory__ -gen "$path" -f "$mir_input.database" -percentage "$f.fil1" -counts "$f.fil2" -name1 "$fal1" -name2 "$fal2"
+      #end if
     #end if
+
   </command>
   <inputs>
-   <conditional name="mir_input">
-    <param name="analysis" type="select" label="Choose reference Mir database" help="Choose which database prefer to be used.">
+    <param name="analysis" type="select" label="Discover miR with templated or/and non-templated isomiRs" help="Choose the category of miRNAs for detection">
+      <option value="1" selected="true">Detection of only templated miRNAs</option>
+      <option value="2">Detection of templated and non-templated miRNAs</option>
+    </param>
+    <conditional name="mir_input">
+     <param name="database" type="select" label="Reference miRNA DB" help="Choose which database prefer to be used.">
       <option value="1" selected="true">MirBase</option>
       <option value="2">MirGene</option>
-    </param>
-    <when value="1">
-        <param name="genome1" type="select" label="Choose organism" help="If your genome coordinates of interest is not listed, contact the Galaxy team">
-          <options from_data_table="mirbase" />
-        </param>
-    </when>
-    <when value="2">
-      <param name="genome2" type="select" label="Choose organism" help="If your genome coordinates of interest is not listed, contact the" >
-        <options from_data_table="mirgene" />
-      </param>
-    </when>
-   </conditional>
-   <param name="positions" type="integer" min="0" max="8" value="1" label="Custom Arm length" help="Choose how many nucleotides you want to extend the 5p and 3p mature sequences to create the refernce arms" />
+     </param>
+     <when value="1">
+        <param name="genome1" type="select" label="Reference miRNAs (organism)" help="If your genome coordinates of interest are not listed, contact the Galaxy team">
+         <options from_data_table="mirbase" />
+        </param>
+     </when>
+     <when value="2">
+        <param name="genome2" type="select" label="Reference miRNAs (organism)" help="If your genome coordinates of interest are not listed, contact the Galaxy team">
+         <options from_data_table="mirgene" />
+        </param>
+     </when>
+    </conditional>
+    <param name="fal1" type="text" value="FactorLevel" label="Specify a factor level, typical values could be 'tumor', 'normal', 'treated' or 'control'"/>
+    <param name="control" format="sam" type="data" multiple="True" label="Select BAM files of the factor level samples" />
+    <param name="fal2" type="text" value="FactorLevel" label="Specify a factor level, typical values could be 'tumor', 'normal', 'treated' or 'control'"/>
+    <param name="treated" format="sam" type="data" multiple="True" label="Select BAM files of the factor level samples" />
+    <conditional name="f">
+     <param name="fil" type="select" label="Filter low counts" help="Treat genes with very low expression as unexpressed and filter out">
+      <option value="1" selected="true">No</option>
+      <option value="2">Yes</option>
+     </param>
+     <when value="2">
+      <param name="fil1" type="integer" value="0" label="Minimum percentage of the samples" help="Filter out all genes that do not meet the Minimum counts in at least this many samples of every category"/>
+      <param name="fil2" type="integer" value="0" label="Minimum counts" help="Filter out all genes that do not meet this minimum count"/>
+     </when>
+     <when value="1">
+     </when>
+    </conditional>
+    <param name="db" type="boolean" checked="true" truevalue="1" falsevalue="0" label="Output Database files" />
+    <param name="cmatrix" type="boolean" checked="false" truevalue="1" falsevalue="0" label="Output Matrix files, one for each factor level"  />
+    <param name="c_files" type="boolean" checked="true" truevalue="1" falsevalue="0" label="Output Count tables, one for each sample" />
   </inputs>
   <outputs>
-    <data name="shifted_gff1" format="txt" label="Custom Bed file" from_work_dir="$__tool_directory__/shifted_mirnas.bed">
-         <filter>mir_input['analysis']== "1" </filter>
-    </data>
-    <data name="normal_gff1" format="fasta" label="Official Bed file" from_work_dir="$__tool_directory__/original_mirnas.bed" >
-       	 <filter>mir_input['analysis']== "1" </filter>
-    </data>
-    <data name="custom reference genome1" format="fasta" label="Custom reference arms" from_work_dir="$__tool_directory__/new_ref.fa" >
-       	 <filter>mir_input['analysis']== "1" </filter>
-    </data>
-
-    <data name="shifted_gff" format="txt" label="Mature and star miRNA sequences" from_work_dir="$__tool_directory__/shifted_mirnas.bed">
-       	 <filter>mir_input['analysis']== "2" </filter>
-    </data>
-    <data name="normal_gff" format="fasta" label="Primary transcripts with flanks" from_work_dir="$__tool_directory__/original_mirnas.bed" >
-         <filter>mir_input['analysis']== "2" </filter>
-    </data>
-    <data name="custom reference genome" format="fasta" label="Custom reference arms" from_work_dir="$__tool_directory__/new_ref.fa" >
-    	 <filter>mir_input['analysis']== "2" </filter>
-    </data>
-
+    <collection name="list_output1" type="list" label="Database ${fal1} templated" >
+        <discover_datasets pattern="__name__" format="tabular" directory="split1"  />
+        <filter>db == 1 and (analysis == "1" or analysis == "2")</filter>
+    </collection>
+    <collection name="list_output2" type="list" label="Database ${fal2} templated" >
+        <discover_datasets pattern="__name__" format="tabular" directory="split2" />
+        <filter>db == 1 and (analysis == "1" or analysis == "2")</filter>
+    </collection>
+    <collection name="list_output3" type="list" label="Database ${fal1} non-templated" >
+        <discover_datasets pattern="__name__" format="tabular" directory="split3" />
+        <filter>db == 1 and analysis == "2"</filter>
+    </collection>
+    <collection name="list_output4" type="list" label="Database ${fal2} non-templated" >
+        <discover_datasets pattern="__name__" format="tabular" directory="split4" />
+        <filter>db == 1 and analysis == "2"</filter>
+    </collection>
+
+    <collection name="Counts" type="list" label="Count Matrices" >
+        <discover_datasets pattern="__name__" format="tabular" directory="Counts" />
+        <filter>cmatrix==1</filter>
+    </collection>
+    <collection name="list_output9" type="list" label="Count files ${fal1} for Differential Expression" >
+        <discover_datasets pattern="__name__" format="tabular" directory="Diff/temp_con"  />
+        <filter>c_files==1 and (analysis == "1")</filter>
+    </collection>
+    <collection name="list_output10" type="list" label="Count files ${fal2} for Differential Expression" >
+        <discover_datasets pattern="__name__" format="tabular" directory="Diff/temp_tre" />
+        <filter>c_files==1 and (analysis == "1")</filter>
+    </collection>
+    <collection name="list_output11" type="list" label="Count files ${fal1} for Differential Expression" >
+        <discover_datasets pattern="__name__" format="tabular" directory="Diff/n_temp_con" />
+        <filter>c_files==1 and analysis == "2"</filter>
+    </collection>
+    <collection name="list_output12" type="list" label="Count files ${fal2} for Differential Expression" >
+        <discover_datasets pattern="__name__" format="tabular" directory="Diff/n_temp_tre" />
+        <filter>c_files==1 and analysis == "2"</filter>
+    </collection>
+
+    <data name="Results non templated treated1" format="pdf" label="IsomiR Profile Report" from_work_dir="$__tool_directory__/report1.pdf" />
   </outputs>
   <help><![CDATA[
-.. class:: infomark
-
-**What it does**
-
------
+  **Inputs**
+
+  IsoRead takes SAM files from alignment tool and RefSeq miRNA sequences from miRbase or miRGene as input. It discovers and classifies miRs and their isoforms in two main types—templated isomiRs (_t) and non-templated isomiRs (_nont) across samples. The coordinates in the name extension show the sequence offset relative to the RefSeq sequence.

-**Inputs**
+  **Examples**
+
+  ============================= ========================
+  **RefSeq miRNA**              **Sequence**
+  ----------------------------- ------------------------
+  hsa-let-7c-5p_chr21(+)        TGAGGTAGTAGGTTGTATGGTT
+  **Templated miRNA**           **Sequence**
+  hsa-let-7c-5p_chr21(+)_t_0_-1 TGAGGTAGTAGGTTGTATGGT
+  **Non Templated miRNA**       **Sequence**
+  hsa-let-7c-5p_nont_0_+2_AT    TGAGGTAGTAGGTTGTATGGTTAT
+  ============================= ========================
+
+  **Outputs**

-- Genome coordinates in General Feature Format (GFF3) or FASTA files of RefSeq miRNAs
-
-ArmDB takes genome coordinates (GFF3) files from MirBase or FASTA files from MirGene as input. GFF3 files and FASTA files are downloaded directly from the databases.
+  As output, the tool produces tabular files with all the detected miRs and isomiRs per category for every SAM file as a database for the user and two types of count matrices containing the copy number of each entry. One of the count matrix types is compatible with the DESeq2 and EdgeR tools so that the user can continue with differential expression analysis. The other types of count matrices generated per miR category (RefSeq miRs, template isomiRs, and non-template isomiRs) and per group are enriched with read sequences. Apart from the tabular files, the tool produces a PDF report file with several custom graphs and statistical information about the isomiR profiles in the user data.

-.. image:: armdb.png
+  **Examples**

-**Outputs**
+  Database file
+
+  .. image:: isoread.png

-- ArmDB generates FASTA files containing the custom reference arms that are produced with the user-defined nucleotides extension of the RefSeq sequences (up to 8 nt)
+  Count matrix enriched with read sequences

- ]]></help>
-  <citations>
-     <citation type="doi">10.3390/cancers13225663</citation>
-  </citations>
+  =============================== ====================== =========== ===========
+  **Name**                        **Sequence**           **Sample1** **Sample2**
+  ------------------------------- ---------------------- ----------- -----------
+  hsa-miR-320a-3p_chr8(-)         AAAAGCTGGGTTGAGAGGGCGA 260         609
+  hsa-miR-320a-3p_chr8(-)_t_+1_+1 AAAGCTGGGTTGAGAGGGCGAA 192         30
+  =============================== ====================== =========== ===========
+
+
+
+  ]]></help>
 </tool>
+