Mercurial > repos > bcrain-completegenomics > testing2
changeset 12:ed3c76be8a41 draft
Uploaded
author | bcrain-completegenomics |
---|---|
date | Tue, 12 Jun 2012 13:25:24 -0400 |
parents | 8c5691a9e67f |
children | 63541ee5f319 |
files | cgatools/tools/cgatools/join.xml |
diffstat | 1 files changed, 63 insertions(+), 14 deletions(-) [+] |
line wrap: on
line diff
--- a/cgatools/tools/cgatools/join.xml Tue Jun 12 13:20:49 2012 -0400 +++ b/cgatools/tools/cgatools/join.xml Tue Jun 12 13:25:24 2012 -0400 @@ -8,15 +8,25 @@ <command> <!--run executable--> cgatools join --beta - --input $input1 - --input $input2 + --input $inputA + --input $inputB --output $output --output-mode $outmode $dump --select $col - #for $m in $matched <!--get all matched columns--> - --match ${m.match} + #for $m in $matches <!--get all matched columns--> + --match ${m.match} #end for + #if $range_overlap.range == 'yes' + #for $o in $range_overlap.overlaps <!--get all matched columns--> + --overlap ${o.overlap} + #end for + --overlap-mode $range_overlap.overlapmode + --overlap-fraction-A $range_overlap.fractionA + --boundary-uncertainty-A $range_overlap.boundaryA + --overlap-fraction-B $range_overlap.fractionB + --boundary-uncertainty-B $range_overlap.boundaryB + #end if </command> <outputs> @@ -25,7 +35,7 @@ <inputs> <!--form field to select input file A--> - <param name="input1" type="data" format="tabular" label="Select first input file (A)"> + <param name="inputA" type="data" format="tabular" label="Select input file A "> <validator type="unspecified_build" /> <validator type="dataset_metadata_in_file" filename="cg_crr_files.loc" metadata_name="dbkey" metadata_column="0" @@ -33,20 +43,15 @@ </param> <!--form field to select input file B--> - <param name="input2" type="data" format="tabular" label="Select second input file (B)"> + <param name="inputB" type="data" format="tabular" label="Select input file B "> <validator type="unspecified_build" /> <validator type="dataset_metadata_in_file" filename="cg_crr_files.loc" metadata_name="dbkey" metadata_column="0" message="cgatools is not currently available for this build."/> </param> - <!--form field to specify columns to match--> - <repeat name="matched" title="Matched column"> - <param name="match" type="text" label="Enter column A:column B"/> - </repeat> - <!--form field to specify columns to print--> - <param name="col" type="text" value="A.*,B.*" label="Specify columns to print from file A and B in format A.col_name1,A.col_name2,B.col_name1" /> + <param name="col" type="text" value="A.*,B.*" size="40" label="Specify columns for output" help="The default value A.*,B.* prints all columns from both files, other selections enter in the format A.col_name1,A.col_name3,B.col_name1" /> <!--form field to select output-mode--> <param name="outmode" type="select" label="Select output mode"> @@ -60,8 +65,40 @@ <option value="--always-dump" selected="true">print all records of A even if not matched in B</option> <option value="">print only records of A that are matched in B</option> </param> + + <!--form field to specify columns to match--> + <repeat name="matches" title="Exact match column"> + <param name="match" type="text" size="40" label="Enter column:column" help="Enter column_from_A:column_from_B, e.g. chromosome:chromosome"/> + </repeat> + + <conditional name="range_overlap"> + <param name="range" type="select" label="Do you want to match columns by overlapping range?"> + <option value="no">no</option> + <option value="yes">yes</option> + </param> + + <when value="yes"> + <!--form field to specify columns to overlap--> + <repeat name="overlaps" title="Range column"> + <param name="overlap" type="text" size="40" label="Enter column[,column]:column[,column]" help="Enter range_start_from_A[,range_stop_from_A]:range_start_from_B[,range_stop_from_B], e.g. begin,end:begin,end (overlapping range of positions) or begin,end:position"/> + </repeat> + + <!--form field to select overlap-mode--> + <param name="overlapmode" type="select" label="Select overlap mode"> + <option value="strict" selected="true">strict (overlap if A.begin<B.end and B.begin>A.end)</option> + <option value="allow-abutting-points">allow-abutting-points (overlap if A.begin<B.end and B.begin>A.end, or if A.begin<=B.end and B.begin<=A.end and either A or B has zero length.)</option> + </param> + + <!--form fields to overlap options--> + <param name="fractionA" type="integer" value="0" label="Minimum fraction of A region overlap " /> + <param name="boundaryA" type="integer" value="0" label="Boundary uncertainty for A for overlap filtering " help="Records failing the following boundary-uncertainty calculation are not included in the output: overlap length >= overlap-fraction-A * (A-range-length - boundary-uncertainty-A)"/> + + <param name="fractionB" type="integer" value="0" label="Minimum fraction of B region overlap " /> + <param name="boundaryB" type="integer" value="0" label="Boundary uncertainty for overlap filtering " help="Records failing the following boundary-uncertainty calculation are not included in the output: overlap length >= overlap-fraction-B * (B-range-length - boundary-uncertainty-B)"/> + </when> + </conditional> </inputs> - + <help> **What it does** @@ -107,7 +144,19 @@ separated by a colon. --overlap arg - + Overlap specification. An overlap specification consists of a range + definition for files A and B, separated by a colon. A range definition + may be two columns, in which case they are interpreted as the beginning + and end of the range. Or it may be one column, in which case the range + is defined as the 1-base range starting at the given value. The records + from the two files must overlap in order to be considered for output. + Two ranges are considered to overlap if the overlap is at least one + base long, or if one of the ranges is length 0 and the ranges overlap + or abut. For example, "begin,end:offset" will match wherever end-begin + > 0, begin<offset+1, and end>offset, or wherever end-begin = 0, + begin<=offset+1, and end>=offset. + + -m [ --output-mode ] arg (=full) Output mode, one of the following: full Print an output record for each match found between