Mercurial > repos > galaxyp > proteomics_moff

--- a/moff.xml	Thu Jan 18 00:51:33 2018 -0500
+++ b/moff.xml	Wed Feb 14 07:19:27 2018 -0500
@@ -1,4 +1,4 @@
-<tool id="proteomics_moff" name="moFF" version="@VERSION@">
+<tool id="proteomics_moff" name="moFF" version="@VERSION@.1">
     <description>extracts MS1 intensities from spectrum files</description>
     <macros>
         <token name="@VERSION@">1.2.1</token>
@@ -77,36 +77,36 @@
         <token name="@WRANGLE_IDENT_INPUT_SINGLE@"><![CDATA[
             mkdir ./ident_inputs &&
             #if $task.ident_input.input_type_selector == "ps":
-                ln -s '$task.ident_input.ident_input_file' './ident_inputs/$task.ident_input.ident_input_file.display_name' &&
+                cp '$task.ident_input.ident_input_file' './ident_inputs/$task.ident_input.ident_input_file.display_name' &&
             #else
+                cp '$task.ident_input.ident_input_file' ./tempfile1.tab &&
                 ## optionally remove first line
                 #if $task.ident_input.remove_header:
-                    sed -i '1d' '$task.ident_input.ident_input_file' &&
+                    sed -i '1d' ./tempfile1.tab &&
                 #end if
                 ## header row with correct names: "peptide", "prot", "mod_peptide", "rt", "mz", "mass", and "charge"
-                echo -e "peptide\tprot\tmod_peptide\trt\tmz\tmass\tcharge" > tempfile.tab &&
-                awk 'BEGIN{OFS="\t"; FS="\t"}{print \$pep,\$prot,\$mod,\$rt,\$mz,\$mass,\$charge}' pep="${task.ident_input.peptide}" prot="$task.ident_input.prot" mod="$task.ident_input.mod_peptide" rt="$task.ident_input.rt" mz="$task.ident_input.mz" mass="$task.ident_input.mass" charge="$task.ident_input.charge" '$task.ident_input.ident_input_file' >> tempfile.tab &&
-                mv tempfile.tab '$task.ident_input.ident_input_file' &&
-                ln -s '$task.ident_input.ident_input_file' './ident_inputs/$task.ident_input.ident_input_file.display_name' &&
+                echo -e "peptide\tprot\tmod_peptide\trt\tmz\tmass\tcharge" > ./tempfile2.tab &&
+                awk 'BEGIN{OFS="\t"; FS="\t"}{print \$pep,\$prot,\$mod,\$rt,\$mz,\$mass,\$charge}' pep="${task.ident_input.peptide}" prot="$task.ident_input.prot" mod="$task.ident_input.mod_peptide" rt="$task.ident_input.rt" mz="$task.ident_input.mz" mass="$task.ident_input.mass" charge="$task.ident_input.charge" ./tempfile1.tab >> ./tempfile2.tab &&
+                mv ./tempfile2.tab './ident_inputs/$task.ident_input.ident_input_file.display_name' &&
             #end if
         ]]></token>
         <token name="@WRANGLE_IDENT_INPUT_MULTIPLE@"><![CDATA[
             mkdir ./ident_inputs &&
             #if $task.ident_input.input_type_selector == "ps":
                 #for $key in $task.ident_input.ident_input_file.keys():
-                    ln -s '${task.ident_input.ident_input_file[$key]}' './ident_inputs/${task.ident_input.ident_input_file[$key].display_name}' &&
+                    cp '${task.ident_input.ident_input_file[$key]}' './ident_inputs/${task.ident_input.ident_input_file[$key].display_name}' &&
                 #end for
             #else
                 #for $key in $task.ident_input.ident_input_file.keys():
+                    cp '${task.ident_input.ident_input_file[$key]}' './tempfile${key}_1.tab' &&
                     ## optionally remove first line
                     #if $task.ident_input.remove_header:
-                        sed -i '1d' '$task.ident_input.ident_input_file[$key]' &&
+                        sed -i '1d' './tempfile${key}_1.tab' &&
                     #end if
                     ## header row with correct names: "peptide", "prot", "mod_peptide", "rt", "mz", "mass", and "charge"
-                    echo -e "peptide\tprot\tmod_peptide\trt\tmz\tmass\tcharge" > tempfile.tab &&
-                    awk 'BEGIN{OFS="\t"; FS="\t"}{print \$pep,\$prot,\$mod,\$rt,\$mz,\$mass,\$charge}' pep="${task.ident_input.peptide}" prot="$task.ident_input.prot" mod="$task.ident_input.mod_peptide" rt="$task.ident_input.rt" mz="$task.ident_input.mz" mass="$task.ident_input.mass" charge="$task.ident_input.charge" '$filename' >> tempfile.tab &&
-                    mv tempfile.tab '$task.ident_input.ident_input_file[$key]' &&
-                    ln -s '$task.ident_input.ident_input_file[$key]' './ident_inputs/$task.ident_input.ident_input_file[$key].display_name' &&
+                    echo -e "peptide\tprot\tmod_peptide\trt\tmz\tmass\tcharge" > './tempfile${key}_2.tab' &&
+                    awk 'BEGIN{OFS="\t"; FS="\t"}{print \$pep,\$prot,\$mod,\$rt,\$mz,\$mass,\$charge}' pep="${task.ident_input.peptide}" prot="$task.ident_input.prot" mod="$task.ident_input.mod_peptide" rt="$task.ident_input.rt" mz="$task.ident_input.mz" mass="$task.ident_input.mass" charge="$task.ident_input.charge" './tempfile${key}_1.tab' >> './tempfile${key}_2.tab' &&
+                    mv './tempfile${key}_2.tab' './ident_inputs/$task.ident_input.ident_input_file[$key].display_name' &&
                 #end for
             #end if
         ]]></token>
@@ -135,6 +135,7 @@
     <requirements>
         <requirement type="package" version="@VERSION@">moff</requirement>
     </requirements>
+    <version_command>echo @VERSION@</version_command>
     <command detect_errors="aggressive"><![CDATA[
         mkdir ./out &&
         #if $task.task_selector == "moff":
@@ -188,7 +189,7 @@
     <inputs>
         <conditional name="task">
             <param name="task_selector" type="select" label="Choose which module to run">
-                <option value="moff" selected="true">Apex intensity</option>
+                <option value="moff">Apex intensity</option>
                 <option value="mbr">Match between runs</option>
                 <option value="all">All (match-between-runs followed by quantitation)</option>
             </param>
@@ -201,8 +202,6 @@
                     help="Specify rt window for xic in minutes." />
                 <param argument="--rt_p" type="float" value="1" label="Time window for the peak"
                     help="Specify the time windows for the peak in minutes." />
-                <param argument="--rt_p_match" type="float" value="1.5" label="Time window for the matched peak"
-                    help="Specify the time windows for the matched peak in minutes." />
                 <param argument="--peptide_summary" type="boolean" value="true" label="Output the peptide summary?"/>
             </when>
             <when value="mbr">
@@ -225,29 +224,32 @@
         </conditional>
     </inputs>
     <outputs>
-        <data format="tabular" name="output_table" label="${tool.name} quantification: ${on_string}">
+        <data format="tabular" name="output_table" label="${tool.name} on ${on_string}: quantification">
             <filter>task['task_selector']=='moff'</filter>
         </data>
-        <data format="txt" name="output_logs" label="${tool.name} log: ${on_string}">
+        <data format="txt" name="output_logs" label="${tool.name} ${on_string}: log">
             <filter>task['task_selector']=='moff'</filter>
         </data>
-        <collection name="ident_output" type="list" label="${tool.name} quantification: ${on_string}">
-            <filter>task['task_selector']=='all' or task['task_selector']=='mbr'</filter>
-            <!--discover datasets method -->
+        <collection name="ident_output" type="list" label="${tool.name} on ${on_string}: quantification">
+            <filter>task['task_selector']=='all'</filter>
             <discover_datasets pattern="(?P&lt;designation&gt;.*)\.txt" directory="out" format="tabular"/>
         </collection>
-        <collection name="log_output" type="list" label="${tool.name} logs: ${on_string}">
+        <collection name="ident_output_mbr" type="list" label="${tool.name} on ${on_string}: matched">
+            <filter>task['task_selector']=='mbr'</filter>
+            <discover_datasets pattern="(?P&lt;designation&gt;.*)\.txt" directory="out" format="tabular"/>
+        </collection>
+        <collection name="log_output" type="list" label="${tool.name} on ${on_string}: logs">
             <filter>task['task_selector']=='all' or task['task_selector']=='mbr'</filter>
             <discover_datasets pattern="(?P&lt;designation&gt;.*)\.log" directory="out" format="txt"/>
         </collection>
-        <data format="tabular" name="output_peptide_summary" label="${tool.name} peptide summary: ${on_string}">
-            <filter>task['peptide_summary']</filter>
+        <data format="tabular" name="output_peptide_summary" label="${tool.name} on ${on_string}: peptide summary">
+            <filter>task['peptide_summary'] and (task['task_selector']=='all' or task['task_selector']=='moff')</filter>
         </data>
     </outputs>
     <tests>
         <!-- test moff_all -->
         <test>
-            <param name="task_selector" value="all"/>
+            <param name="task|task_selector" value="all"/>
             <param name="ident_input|input_type_selector" value="ps"/>
             <param name="ident_input_file">
                 <collection type="list">
@@ -296,7 +298,7 @@
         </test>
         <!-- test moff alone -->
         <test>
-            <param name="task_selector" value="moff"/>
+            <param name="task|task_selector" value="moff"/>
             <param name="ident_input|input_type_selector" value="ps"/>
             <param name="ident_input_file" value="input/test.tabular" ftype="tabular"/>
             <param name="msms_input|input_type_selector" value="mzml"/>
@@ -311,7 +313,7 @@
         </test>
         <!-- test the generic input -->
         <test>
-            <param name="task_selector" value="moff"/>
+            <param name="task|task_selector" value="moff"/>
             <param name="ident_input|input_type_selector" value="generic"/>
             <param name="ident_input_file" value="input/test.tabular" ftype="tabular"/>
             <param name="remove_header" value="true"/>
@@ -332,9 +334,65 @@
                 </assert_contents>
             </output>
         </test>
+        <test>
+            <param name="task|task_selector" value="all"/>
+            <param name="ident_input|input_type_selector" value="generic"/>
+            <param name="ident_input_file">
+                <collection type="list">
+                    <element name="mbr_test1" value="input/mbr_test1.tabular"/>
+                    <element name="mbr_test2" value="input/mbr_test2.tabular"/>
+                </collection>
+            </param>
+            <param name="remove_header" value="true"/>
+            <param name="peptide" value="3"/>
+            <param name="prot" value="2"/>
+            <param name="mod_peptide" value="7"/>
+            <param name="rt" value="13"/>
+            <param name="mz" value="14"/>
+            <param name="mass" value="17"/>
+            <param name="charge" value="15"/>
+            <param name="msms_input|input_type_selector" value="mzml"/>
+            <param name="inputraw">
+                <collection type="list">
+                    <element name="mbr_test1" value="input/mbr_test1.mzml"/>
+                    <element name="mbr_test2" value="input/mbr_test2.mzml"/>
+                </collection>
+            </param>
+            <param name="peptide_summary" value="true"/>
+            <output name="output_peptide_summary" ftype="tabular">
+                <assert_contents>
+                    <has_text text="sumIntensity_mbr_test1"/>
+                    <has_text text="sumIntensity_mbr_test2"/>
+                </assert_contents>
+            </output>
+            <output_collection name="ident_output" type="list">
+                <element name="mbr_test1_match_moff_result">
+                    <assert_contents>
+                        <has_text text="NH2-QVEEAVQSDDK-COOH"/>
+                    </assert_contents>
+                </element>
+                <element name="mbr_test2_match_moff_result">
+                    <assert_contents>
+                        <has_text text="NH2-RDVGINNTVK-COOH"/>
+                    </assert_contents>
+                </element>
+            </output_collection>
+            <output_collection name="log_output" type="list">
+                <element name="mbr_test1_match__moff">
+                    <assert_contents>
+                        <has_line line="peptide at line 200 -->  MZ: 783.4200 RT: 134.6997 matched (yes=1/no=0): 0"/>
+                    </assert_contents>
+                </element>
+                <element name="mbr_test2_match__moff">
+                    <assert_contents>
+                        <has_line line="peptide at line 132 -->  MZ: 767.8700 RT: 98.1975 matched (yes=1/no=0): 0"/>
+                    </assert_contents>
+                </element>
+            </output_collection>
+        </test>
         <!-- test mbr -->
         <test>
-            <param name="task_selector" value="mbr"/>
+            <param name="task|task_selector" value="mbr"/>
             <param name="ident_input|input_type_selector" value="ps"/>
             <param name="ident_input_file">
                 <collection type="list">
@@ -343,7 +401,7 @@
                 </collection>
             </param>
             <param name="ext" value="tabular"/>
-            <output_collection name="ident_output" type="list" count="2">
+            <output_collection name="ident_output_mbr" type="list" count="2">
                 <element name="mbr_test1_match">
                     <assert_contents>
                         <has_text text="NH2-QVEEAVQSDDK-COOH"/>
@@ -378,6 +436,9 @@
 If both match between runs and apex intensity are desired, it is best to run them both at once (i.e., run the 'All' module).
 The MBR module is mainly useful for observing the intermediate steps of the algorithm - its outputs are not able to be used as inputs in moFF or in other tools.

+If quantification of multiple files without MBR is desired, the apex intensity module may be run with multiple files or a dataset collection in batch mode.
+In either case, moFF must be given the paired files at the same time - thus the best method is to construct a dataset collection in which the raw and identification files are in the same order.
+

 *Inputs:*