changeset 12:ed3c76be8a41 draft

Uploaded
author bcrain-completegenomics
date Tue, 12 Jun 2012 13:25:24 -0400
parents 8c5691a9e67f
children 63541ee5f319
files cgatools/tools/cgatools/join.xml
diffstat 1 files changed, 63 insertions(+), 14 deletions(-) [+]
line wrap: on
line diff
--- a/cgatools/tools/cgatools/join.xml	Tue Jun 12 13:20:49 2012 -0400
+++ b/cgatools/tools/cgatools/join.xml	Tue Jun 12 13:25:24 2012 -0400
@@ -8,15 +8,25 @@
 
   <command> <!--run executable-->
 		cgatools join --beta 
-		--input $input1 
-		--input $input2 
+		--input $inputA 
+		--input $inputB 
 		--output $output 
 		--output-mode $outmode 
 		$dump 
 		--select $col
-		#for $m in $matched <!--get all matched columns-->
-		--match ${m.match}
+		#for $m in $matches <!--get all matched columns-->
+			--match ${m.match}
 		#end for
+		#if $range_overlap.range == 'yes'
+			#for $o in $range_overlap.overlaps <!--get all matched columns-->
+				--overlap ${o.overlap}
+			#end for
+			--overlap-mode $range_overlap.overlapmode
+			--overlap-fraction-A $range_overlap.fractionA
+			--boundary-uncertainty-A $range_overlap.boundaryA
+			--overlap-fraction-B $range_overlap.fractionB
+			--boundary-uncertainty-B $range_overlap.boundaryB
+		#end if
   </command>
 
   <outputs>
@@ -25,7 +35,7 @@
   
   <inputs>
    	<!--form field to select input file A-->
-    <param name="input1" type="data" format="tabular" label="Select first input file (A)">
+    <param name="inputA" type="data" format="tabular" label="Select input file A ">
       <validator type="unspecified_build" />
 			<validator type="dataset_metadata_in_file" filename="cg_crr_files.loc"
 				metadata_name="dbkey" metadata_column="0"
@@ -33,20 +43,15 @@
     </param>
     
   	<!--form field to select input file B-->
-    <param name="input2" type="data" format="tabular" label="Select second input file (B)">
+    <param name="inputB" type="data" format="tabular" label="Select input file B ">
       <validator type="unspecified_build" />
 			<validator type="dataset_metadata_in_file" filename="cg_crr_files.loc"
 				metadata_name="dbkey" metadata_column="0"
 				message="cgatools is not currently available for this build."/>
     </param>
     
-  	<!--form field to specify columns to match-->
-    <repeat name="matched" title="Matched column">
-      <param name="match" type="text" label="Enter column A:column B"/>
-    </repeat>
-
   	<!--form field to specify columns to print-->
-    <param name="col" type="text" value="A.*,B.*" label="Specify columns to print from file A and B in format A.col_name1,A.col_name2,B.col_name1" />
+    <param name="col" type="text" value="A.*,B.*" size="40" label="Specify columns for output" help="The default value A.*,B.* prints all columns from both files, other selections enter in the format A.col_name1,A.col_name3,B.col_name1" />
 
   	<!--form field to select output-mode-->
 		<param name="outmode" type="select" label="Select output mode">
@@ -60,8 +65,40 @@
 			<option value="--always-dump" selected="true">print all records of A even if not matched in B</option>
 			<option value="">print only records of A that are matched in B</option>
 		</param>
+
+  	<!--form field to specify columns to match-->
+    <repeat name="matches" title="Exact match column">
+      <param name="match" type="text" size="40" label="Enter column:column" help="Enter column_from_A:column_from_B, e.g. chromosome:chromosome"/>
+    </repeat>
+    
+    <conditional name="range_overlap">
+    	<param name="range" type="select" label="Do you want to match columns by overlapping range?">
+    		<option value="no">no</option>
+    		<option value="yes">yes</option>
+    	</param>
+    	
+    	<when value="yes">
+				<!--form field to specify columns to overlap-->
+				<repeat name="overlaps" title="Range column">
+					<param name="overlap" type="text" size="40" label="Enter column&#91;,column&#93;:column&#91;,column&#93;" help="Enter range_start_from_A&#91;,range_stop_from_A&#93;:range_start_from_B&#91;,range_stop_from_B&#93;, e.g. begin,end:begin,end (overlapping range of positions) or begin,end:position"/>
+				</repeat>
+
+				<!--form field to select overlap-mode-->
+				<param name="overlapmode" type="select" label="Select overlap mode">
+					<option value="strict" selected="true">strict (overlap if A.begin&lt;B.end and B.begin&gt;A.end)</option>
+					<option value="allow-abutting-points">allow-abutting-points (overlap if A.begin&lt;B.end and B.begin&gt;A.end, or if A.begin&lt;=B.end and B.begin&lt;=A.end and either A or B has zero length.)</option>
+				</param>
+
+				<!--form fields to overlap options-->
+				<param name="fractionA" type="integer" value="0" label="Minimum fraction of A region overlap " />
+				<param name="boundaryA" type="integer" value="0" label="Boundary uncertainty for A for overlap filtering " help="Records failing the following boundary-uncertainty calculation are not included in the output: overlap length >= overlap-fraction-A * (A-range-length - boundary-uncertainty-A)"/>
+				
+				<param name="fractionB" type="integer" value="0" label="Minimum fraction of B region overlap " />
+				<param name="boundaryB" type="integer" value="0" label="Boundary uncertainty for overlap filtering "  help="Records failing the following boundary-uncertainty calculation are not included in the output: overlap length >= overlap-fraction-B * (B-range-length - boundary-uncertainty-B)"/>
+    	</when>
+		</conditional>
   </inputs>
-
+  
   <help>
   
 **What it does**
@@ -107,7 +144,19 @@
 		      separated by a colon.
 		
 		  --overlap arg
-						
+		      Overlap specification. An overlap specification consists of a range 
+		      definition for files A and B, separated by a colon. A range definition 
+		      may be two columns, in which case they are interpreted as the beginning
+		      and end of the range. Or it may be one column, in which case the range 
+		      is defined as the 1-base range starting at the given value. The records
+		      from the two files must overlap in order to be considered for output. 
+		      Two ranges are considered to overlap if the overlap is at least one 
+		      base long, or if one of the ranges is length 0 and the ranges overlap 
+		      or abut. For example, "begin,end:offset" will match wherever end-begin 
+		      &gt; 0, begin&lt;offset+1, and end&gt;offset, or wherever end-begin = 0, 
+		      begin&lt;=offset+1, and end&gt;=offset.
+
+
 		  -m [ --output-mode ] arg (=full)
 		      Output mode, one of the following:
 		        full        Print an output record for each match found between