comparison subtract_query.xml @ 0:980bf1f6f37b draft default tip

Uploaded
author slegras
date Mon, 27 Jul 2015 08:14:06 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:980bf1f6f37b
1 <tool id="subtract_query1" name="Subtract Whole Dataset" version="0.1">
2 <description>from another dataset</description>
3 <command interpreter="python">
4 subtract_query.py $input1 $input2 $begin_col $end_col $output
5 #if str($ignore_empty_end_cols) == 'true':
6 --ignore-empty-end-cols
7 #end if
8
9 </command>
10 <inputs>
11 <param format="txt" name="input2" type="data" label="Subtract" help="Second dataset" />
12 <param format="txt" name="input1" type="data" label="from" help="First dataset" />
13 <param name="begin_col" type="data_column" data_ref="input1" force_select="False" label="Restrict subtraction between 'begin column'" />
14 <param name="end_col" type="data_column" data_ref="input1" force_select="False" label="and 'end column'" help="Specifying columns for restricting subtraction is available only for tabular formatted datasets" />
15 <param name="ignore_empty_end_cols" type="boolean" label="Ignore empty columns and whitespace at end of line when subtracting"/>
16 </inputs>
17 <outputs>
18 <data format="input" name="output" metadata_source="input1" />
19 </outputs>
20 <tests>
21 <!-- Subtract 2 non-tabular files with no column restrictions. -->
22 <!-- Cannot figure out why this test won't pass, it works in real time...
23 <test>
24 <param name="input1" value="1.txt" />
25 <param name="input2" value="2.txt" />
26 <param name="begin_col" value="None" />
27 <param name="end_col" value="None" />
28 <output name="output" file="subtract-query-1.dat" />
29 </test>
30 -->
31 <!-- Subtract 2 tabular files with no column restrictions. -->
32 <test>
33 <param name="input1" value="eq-showbeginning.dat" />
34 <param name="input2" value="eq-showtail.dat" />
35 <param name="begin_col" value="None" />
36 <param name="end_col" value="None" />
37 <output name="output" file="subtract-query-2.dat" />
38 </test>
39 <!-- Subtract 2 tabular files with column restrictions. -->
40 <test>
41 <param name="input1" value="eq-showbeginning.dat" />
42 <param name="input2" value="eq-removebeginning.dat" />
43 <param name="begin_col" value="c1" />
44 <param name="end_col" value="c3" />
45 <output name="output" file="subtract-query-3.dat" />
46 </test>
47 <!-- Subtract a non-tabular file from a tabular file with no column restrictions. -->
48 <test>
49 <param name="input1" value="eq-showbeginning.dat" />
50 <param name="input2" value="2.txt" />
51 <param name="begin_col" value="None" />
52 <param name="end_col" value="None" />
53 <output name="output" file="subtract-query-4.dat" />
54 </test>
55 <!-- Subtract 2 tabular files with no column restrictions, ignoring empty end columns. -->
56 <test>
57 <param name="input1" value="eq-showbeginning_e.dat" />
58 <param name="input2" value="eq-showtail.dat" />
59 <param name="begin_col" value="None" />
60 <param name="end_col" value="None" />
61 <param name="ignore_empty_end_cols" value="true" />
62 <output name="output" file="subtract-query-2.dat" />
63 </test>
64 </tests>
65 <help>
66
67 .. class:: infomark
68
69 **TIP:** This tool complements the tool in the **Operate on Genomic Intervals** tool set which subtracts the intervals of two datasets.
70
71
72 -----
73
74 **Syntax**
75
76 This tool subtracts an entire dataset from another dataset.
77
78 - Any text format is valid.
79 - If both dataset formats are tabular, you may restrict the subtraction to specific columns **contained in both datasets** and the resulting dataset will include only the columns specified.
80 - The begin column must be less than or equal to the end column. If it is not, begin column is switched with end column.
81 - If begin column is specified but end column is not, end column will default to begin_column (and vice versa).
82 - All blank and comment lines are skipped and not included in the resulting dataset (comment lines are lines beginning with a # character).
83 - Duplicate lines are eliminated from both dataset prior to subtraction. If any duplicate lines were eliminated from the first dataset, the number is displayed in the resulting history item.
84
85 -----
86
87 **Example**
88
89 If this is the **First dataset**::
90
91 chr1 4225 19670
92 chr10 6 8
93 chr1 24417 24420
94 chr6_hla_hap2 0 150
95 chr2 1 5
96 chr10 2 10
97 chr1 30 55
98 chrY 1 20
99 chr1 1225979 42287290
100 chr10 7 8
101
102 and this is the **Second dataset**::
103
104 chr1 4225 19670
105 chr10 6 8
106 chr1 24417 24420
107 chr6_hla_hap2 0 150
108 chr2 1 5
109 chr1 30 55
110 chrY 1 20
111 chr1 1225979 42287290
112
113 Subtracting the **Second dataset** from the **First dataset** (including all columns) will yield::
114
115 chr10 7 8
116 chr10 2 10
117
118 Conversely, subtracting the **First dataset** from the **Second dataset** (including all columns) will result in an empty dataset.
119
120 Subtracting the **Second dataset** from the **First dataset** (restricting to columns c1 and c2) will yield::
121
122 chr10 7
123 chr10 2
124
125 </help>
126 </tool>