Mercurial > repos > greg > cwpair2

--- a/cwpair2.xml	Wed Dec 02 16:13:59 2015 -0500
+++ b/cwpair2.xml	Wed Dec 02 16:14:07 2015 -0500
@@ -11,9 +11,9 @@
             #for $i in $input:
                  --input "${i}" "${i.hid}"
             #end for
-            --method $method
             --up_distance $up_distance
             --down_distance $down_distance
+            --method $method
             --binsize $binsize
             --threshold_format $threshold_format_cond.threshold_format
             #if str($threshold_format_cond.threshold_format) == "absolute_threshold":
@@ -22,20 +22,19 @@
                 --relative_threshold $threshold_format_cond.relative_threshold
             #end if
             --output_files $output_files
-            --sort_score $sort_score
             --statistics_output "$statistics_output"
         ]]>
     </command>
     <inputs>
         <param  name="input" type="data" format="gff" multiple="True" label="Find matched pairs on" />
+        <param name="up_distance" type="integer" value="50" min="0" label="Distance upstream from a peak to allow a pair" />
+        <param name="down_distance" type="integer" value="100" min="0" label="Distance downstream from a peak to allow a pair" />
         <param name="method" type="select" label="Method of finding a match">
             <option value="mode" selected="True">Mode</option>
             <option value="closest">Closest</option>
             <option value="largest">Largest</option>
             <option value="all">All</option>
         </param>
-        <param name="up_distance" type="integer" value="50" min="0" label="Distance upstream from a peak to allow a pair" />
-        <param name="down_distance" type="integer" value="100" min="0" label="Distance downstream from a peak to allow a pair" />
         <param name="binsize" type="integer" value="1" min="0" label="Width of bins for frequency plots and mode calculation" help="Value 1 implies no bins" />
         <conditional name="threshold_format_cond">
             <param name="threshold_format" type="select" label="Filter using">
@@ -50,197 +49,111 @@
             </when>
         </conditional>
         <param name="output_files" type="select" label="Restrict output to" help="Statistics will always be generated." >
-            <option value="simple" selected="True">matched pairs only (S)</option>
-            <option value="simple_orphan">matched pairs and orphans only (O,S)</option>
-            <option value="simple_orphan_detail">matched pairs, orphans and details only (D,O,S)</option>
-            <option value="all">no restrictions (output everything: C,D,F,O,P,S)</option>
-        </param>
-        <param name="sort_score" type="select" label="Sort output by score?">
-            <option value="no" selected="True">No</option>
-            <option value="asc">Yes, in ascending order</option>
-            <option value="desc">Yes, in descending order</option>
+            <option value="all" selected="True">no restrictions (output everything: C,D,F,O,P,MP)</option>
+            <option value="matched_pair">matched pairs only (MP)</option>
+            <option value="matched_pair_orphan">matched pairs and orphans only (O,MP)</option>
+            <option value="matched_pair_orphan_detail">matched pairs, orphans and details only (D,O,MP)</option>
         </param>
     </inputs>
     <outputs>
-        <collection name="closest_C_pdf" type="list" label="closest C: ${tool.name} on ${on_string}">
-            <filter>output_files == "all" and method in ["all", "closest"]</filter>
-            <discover_datasets pattern="(?P&lt;designation&gt;.*)" directory="closest_C" ext="pdf" visible="false" />
-        </collection>
-        <collection name="largest_C_pdf" type="list" label="largest C: ${tool.name} on ${on_string}">
-            <filter>output_files == "all" and method in ["all", "largest"]</filter>
-            <discover_datasets pattern="(?P&lt;designation&gt;.*)" directory="largest_C" ext="pdf" visible="false" />
-        </collection>
-        <collection name="mode_C_pdf" type="list" label="mode C: ${tool.name} on ${on_string}">
-            <filter>output_files == "all" and method in ["all", "mode"]</filter>
-            <discover_datasets pattern="(?P&lt;designation&gt;.*)" directory="mode_C" ext="pdf" visible="false" />
-        </collection>
-        <collection name="closest_D" type="list" label="closest D: ${tool.name} on ${on_string}">
-            <filter>output_files in ["all", "simple_orphan_detail"] and method in ["all", "closest"]</filter>
-            <discover_datasets pattern="(?P&lt;designation&gt;.*)" directory="closest_D" ext="tabular" visible="false" />
-        </collection>
-        <collection name="largest_D" type="list" label="largest D: ${tool.name} on ${on_string}">
-            <filter>output_files in ["all", "simple_orphan_detail"] and method in ["all", "largest"]</filter>
-            <discover_datasets pattern="(?P&lt;designation&gt;.*)" directory="largest_D" ext="tabular" visible="false" />
-        </collection>
-        <collection name="mode_D" type="list" label="mode D: ${tool.name} on ${on_string}">
-            <filter>output_files in ["all", "simple_orphan_detail"] and method in ["all", "mode"]</filter>
-            <discover_datasets pattern="(?P&lt;designation&gt;.*)" directory="mode_D" ext="tabular" visible="false" />
-        </collection>
-        <collection name="closest_F_pdf" type="list" label="closest F: ${tool.name} on ${on_string}">
-            <filter>output_files == "all" and method in ["all", "closest"]</filter>
-            <discover_datasets pattern="(?P&lt;designation&gt;.*)" directory="closest_F" ext="pdf" visible="false" />
-        </collection>
-        <collection name="largest_F_pdf" type="list" label="largest F: ${tool.name} on ${on_string}">
-            <filter>output_files == "all" and method in ["all", "largest"]</filter>
-            <discover_datasets pattern="(?P&lt;designation&gt;.*)" directory="largest_F" ext="pdf" visible="false" />
-        </collection>
-        <collection name="mode_F_pdf" type="list" label="mode F: ${tool.name} on ${on_string}">
-            <filter>output_files == "all" and method in ["all", "mode"]</filter>
-            <discover_datasets pattern="(?P&lt;designation&gt;.*)" directory="mode_F" ext="pdf" visible="false" />
+        <data name="statistics_output" format="tabular" label="Statistics Table: ${tool.name} on ${on_string}" />
+        <collection name="H" type="list" label="Statistics Histogram: ${tool.name} on ${on_string}">
+            <filter>output_files == "all"</filter>
+            <discover_datasets pattern="(?P&lt;designation&gt;.*)" directory="H" ext="pdf" visible="false" />
         </collection>
-        <collection name="closest_O" type="list" label="closest O: ${tool.name} on ${on_string}">
-            <filter>output_files in ["all", "simple_orphan", "simple_orphan_detail"] and method in ["all", "closest"]</filter>
-            <discover_datasets pattern="(?P&lt;designation&gt;.*)" directory="closest_O" ext="tabular" visible="false" />
-        </collection>
-        <collection name="largest_O" type="list" label="largest O: ${tool.name} on ${on_string}">
-            <filter>output_files in ["all", "simple_orphan", "simple_orphan_detail"] and method in ["all", "largest"]</filter>
-            <discover_datasets pattern="(?P&lt;designation&gt;.*)" directory="largest_O" ext="tabular" visible="false" />
-        </collection>
-        <collection name="mode_O" type="list" label="mode O: ${tool.name} on ${on_string}">
-            <filter>output_files in ["all", "simple_orphan", "simple_orphan_detail"] and method in ["all", "mode"]</filter>
-            <discover_datasets pattern="(?P&lt;designation&gt;.*)" directory="mode_O" ext="tabular" visible="false" />
-        </collection>
-        <collection name="closest_P_pdf" type="list" label="closest P: ${tool.name} on ${on_string}">
-            <filter>output_files == "all" and method in ["all", "closest"]</filter>
-            <discover_datasets pattern="(?P&lt;designation&gt;.*)" directory="closest_P" ext="pdf" visible="false" />
+        <collection name="D" type="list" label="Data D: ${tool.name} on ${on_string}">
+            <filter>output_files in ["all", "matched_pair_orphan_detail"]</filter>
+            <discover_datasets pattern="(?P&lt;designation&gt;.*)" directory="data_D" ext="tabular" visible="false" />
         </collection>
-        <collection name="largest_P_pdf" type="list" label="largest P: ${tool.name} on ${on_string}">
-            <filter>output_files == "all" and method in ["all", "largest"]</filter>
-            <discover_datasets pattern="(?P&lt;designation&gt;.*)" directory="largest_P" ext="pdf" visible="false" />
-        </collection>
-        <collection name="mode_P_pdf" type="list" label="mode P: ${tool.name} on ${on_string}">
-            <filter>output_files == "all" and method in ["all", "mode"]</filter>
-            <discover_datasets pattern="(?P&lt;designation&gt;.*)" directory="mode_P" ext="pdf" visible="false" />
+        <collection name="O" type="list" label="Data O: ${tool.name} on ${on_string}">
+            <filter>output_files in ["all", "matched_pair_orphan", "matched_pair_orphan_detail"]</filter>
+            <discover_datasets pattern="(?P&lt;designation&gt;.*)" directory="data_O" ext="tabular" visible="false" />
         </collection>
-        <collection name="closest_S" type="list" label="closest S: ${tool.name} on ${on_string}">
-            <filter>method in ["all", "closest"]</filter>
-            <discover_datasets pattern="(?P&lt;designation&gt;.*)" directory="closest_S" ext="gff" visible="false" />
+        <collection name="MP" type="list" label="Data MP: ${tool.name} on ${on_string}">
+            <discover_datasets pattern="(?P&lt;designation&gt;.*)" directory="data_MP" ext="gff" visible="false" />
         </collection>
-        <collection name="largest_S" type="list" label="largest S: ${tool.name} on ${on_string}">
-            <filter>method in ["all", "largest"]</filter>
-            <discover_datasets pattern="(?P&lt;designation&gt;.*)" directory="largest_S" ext="gff" visible="false" />
-        </collection>
-        <collection name="mode_S" type="list" label="mode S: ${tool.name} on ${on_string}">
-            <filter>method in ["all", "mode"]</filter>
-            <discover_datasets pattern="(?P&lt;designation&gt;.*)" directory="mode_S" ext="gff" visible="false" />
-        </collection>
-        <data name="statistics_output" format="tabular" label="Statistics: ${tool.name} on ${on_string}" />
     </outputs>
     <tests>
         <test>
             <param name="input" value="cwpair2_input1.gff" />
-            <param name="method" value="all" />
             <param name="up_distance" value="25" />
             <param name="down_distance" value="100" />
+            <param name="method" value="all" />
             <param name="binsize" value="1" />
             <param name="threshold_format" value="relative_threshold" />
             <param name="relative_threshold" value="0.0" />
-            <param name="output_files" value="simple" />
-            <param name="sort_score" value="asc" />
-            <output_collection name="closest_S" type="list">
-                <element name="closest_S_data_1_f0u25d100b1" file="closest_s_output1.gff" ftype="gff" />
+            <param name="output_files" value="matched_pair" />
+            <output name="statistics_output" file="statistics1.tabular" ftype="tabular" />
+            <output_collection name="MP" type="list">
+                <element name="f0u25d100_on_data_1" file="closest_mp_output1.gff" ftype="gff" />
+                <element name="f0u25d100_on_data_1" file="largest_mp_output1.gff" ftype="gff" />
+                <element name="f0u25d100_on_data_1" file="mode_mp_output1.gff" ftype="gff" />
             </output_collection>
-            <output_collection name="largest_S" type="list">
-                <element name="largest_S_data_1_f0u25d100b1" file="largest_s_output1.gff" ftype="gff" />
-            </output_collection>
-            <output_collection name="mode_S" type="list">
-                <element name="mode_S_data_1_f0u25d100b1" file="mode_s_output1.gff" ftype="gff" />
-            </output_collection>
-            <output name="statistics_output" file="statistics1.tabular" ftype="tabular" />
         </test>
         <test>
             <param name="input" value="cwpair2_input1.gff" />
-            <param name="method" value="all" />
             <param name="up_distance" value="50" />
             <param name="down_distance" value="100" />
+            <param name="method" value="all" />
             <param name="binsize" value="1" />
             <param name="threshold_format" value="relative_threshold" />
             <param name="relative_threshold" value="0.0" />
             <param name="output_files" value="all" />
-            <param name="sort_score" value="no" />
-            <output_collection name="closest_D" type="list">
-                <element name="closest_D_data_1_f0u50d100b1" file="closest_d_output2.tabular" ftype="tabular" />
-            </output_collection>
-            <output_collection name="closest_F" type="list">
-                <element name="closest_F_data_1_f0u50d100b1" file="closest_f_output2.pdf" ftype="pdf" compare="sim_size" />
-            </output_collection>
-            <output_collection name="closest_O" type="list">
-                <element name="closest_O_data_1_f0u50d100b1" file="closest_o_output2.tabular" ftype="tabular" />
-            </output_collection>
-            <output_collection name="closest_S" type="list">
-                <element name="closest_S_data_1_f0u50d100b1" file="closest_s_output2.gff" ftype="gff" />
-            </output_collection>
-            <output_collection name="largest_D" type="list">
-                <element name="largest_D_data_1_f0u50d100b1" file="largest_d_output2.tabular" ftype="tabular" />
-            </output_collection>
-            <output_collection name="largest_F" type="list">
-                <element name="largest_F_data_1_f0u50d100b1" file="largest_f_output2.pdf" ftype="pdf" compare="sim_size" />
-            </output_collection>
-            <output_collection name="largest_O" type="list">
-                <element name="largest_O_data_1_f0u50d100b1" file="largest_o_output2.tabular" ftype="tabular" />
+            <output name="statistics_output" file="statistics2.tabular" ftype="tabular" />
+            <output_collection name="H" type="list">
+                <element name="histogram_C_mode_f0u50d100_on_data_1" file="mode_c_output2.pdf" ftype="pdf" compare="sim_size" />
+                <element name="histogram_F_closest_f0u50d100_on_data_1" file="closest_f_output2.pdf" ftype="pdf" compare="sim_size" />
+                <element name="histogram_F_largest_f0u50d100_on_data_1" file="largest_f_output2.pdf" ftype="pdf" compare="sim_size" />
+                <element name="histogram_F_mode_f0u50d100_on_data_1" file="mode_f_output2.pdf" ftype="pdf" compare="sim_size" />
+                <element name="histogram_P_mode_f0u50d100_on_data_1" file="mode_p_output2.pdf" ftype="pdf" compare="sim_size" />
             </output_collection>
-            <output_collection name="largest_S" type="list">
-                <element name="largest_S_data_1_f0u50d100b1" file="largest_s_output2.gff" ftype="gff" />
-            </output_collection>
-            <output_collection name="mode_C" type="list">
-                <element name="mode_C_data_1_f0u50d100b1" file="mode_c_output2.pdf" ftype="pdf" compare="sim_size" />
-            </output_collection>
-            <output_collection name="mode_D" type="list">
-                <element name="mode_D_data_1_f0u50d100b1" file="mode_d_output2.tabular" ftype="tabular" />
+            <output_collection name="D" type="list">
+                <element name="data_D_closest_f0u50d100_on_data_1" file="closest_d_output2.tabular" ftype="tabular" />
+                <element name="data_D_largest_f0u50d100_on_data_1" file="largest_d_output2.tabular" ftype="tabular" />
+                <element name="data_D_mode_f0u50d100_on_data_1" file="mode_d_output2.tabular" ftype="tabular" />
             </output_collection>
-            <output_collection name="mode_F" type="list">
-                <element name="mode_F_data_1_f0u50d100b1" file="mode_f_output2.pdf" ftype="pdf" compare="sim_size" />
-            </output_collection>
-            <output_collection name="mode_O" type="list">
-                <element name="mode_O_data_1_f0u50d100b1" file="mode_o_output2.tabular" ftype="tabular" />
+            <output_collection name="O" type="list">
+                <element name="data_O_closest_f0u50d100_on_data_1" file="closest_o_output2.tabular" ftype="tabular" />
+                <element name="data_O_largest_f0u50d100_on_data_1" file="largest_o_output2.tabular" ftype="tabular" />
+                <element name="data_O_mode_f0u50d100_on_data_1" file="mode_o_output2.tabular" ftype="tabular" />
             </output_collection>
-            <output_collection name="mode_P" type="list">
-                <element name="mode_P_data_1_f0u50d100b1" file="mode_p_output2.pdf" ftype="pdf" compare="sim_size" />
+            <output_collection name="MP" type="list">
+                <element name="data_MP_closest_f0u50d100_on_data_1" file="closest_mp_output2.gff" ftype="gff" />
+                <element name="data_MP_largest_f0u50d100_on_data_1" file="largest_mp_output2.gff" ftype="gff" />
+                <element name="data_MP_mode_f0u50d100_on_data_1" file="mode_mp_output2.gff" ftype="gff" />
             </output_collection>
-            <output_collection name="mode_S" type="list">
-                <element name="mode_S_data_1_f0u50d100b1" file="mode_s_output2.gff" ftype="gff" />
-            </output_collection>
-            <output name="statistics_output" file="statistics2.tabular" ftype="tabular" />
         </test>
     </tests>
     <help>
 **What it does**

-Takes a list of called peaks on both strands and produces a list of matched pairs and a list of unmatched orphans
-using a specified method for finding matched pairs.  Methods for finding matched pairs are mode, closest, largest
-or all (where the analysis is run for each method).  A statistics dataset is generated and a collection of datasets
+Takes a list of called peaks on both strands and produces lists of matched pairs and unmatched peaks using a
+specified method for finding matched pairs.  Methods for finding matched pairs are mode, closest, largest or
+all (where the analysis is run for each method).  A statistics dataset is generated and a collection of datasets
 is produced for each method as follows.

+**Data Files**
+
+* **closest/largest/mode MP** - the Matched Pairs in gff format
+* **closest/largest/mode O** - the Orphans in tabular format
+* **closest/largest/mode D** - the Details in tabular format
+
+**Statistics Files**
+
 * **closest/largest/mode C** - the stastics graph in pdf format
-* **closest/largest/mode D** - the details in tabular format
+* **closest/largest/mode P** - the preview plots graph in pdf format
 * **closest/largest/mode F** - the final plots graph in pdf format
-* **closest/largest/mode O** - the orphans in tabular format
-* **closest/largest/mode P** - the preview plots graph in pdf format
-* **closest/largest/mode S** - the matched pairs in gff format

 -----

 **Options**

-* **Output files** - Restrict output dataset collections to matched pairs only or one of several combinations of collection types.
 * **Method of finding match** - Method of finding matched pair, mode, closest, largest, or all (run with each method).
 * **Distance upstream from a peak to allow a pair** - Distance upstream from a Watson peak to allow a Crick pair.
 * **Distance downstream from a peak to allow a pair** - Distance downstream from a Watson peak to allow a Crick pair.
 * **Percentage of the 95 percentile value to filter below** - Percentage of the 95 percentile value below which to filter when using a relative threshold.
 * **Absolute value to filter below** - Absolute value below which to filter when using an absolute threshold.
-* **Sort output by chromosomes in** - Output will be sorted by chromsome in the specified order.
-* **Sort output by score?** - If yes, output will be sorted by score in the specified order.
-* **Summary output format** - Format for summary output.
+* **Output files** - Restrict output dataset collections to matched pairs only or one of several combinations of collection types.

     </help>
     <expand macro="citations" />