changeset 0:84f5e02c76f3 draft default tip

Uploaded
author qfab
date Wed, 21 May 2014 03:30:16 -0400
parents
children
files overlapselect_wrapper.sh overlapselect_wrapper.xml test-data/NM_001206.gp test-data/mrna.psl test-data/psl_over_NM_001206gp.psl tool_dependencies.xml
diffstat 6 files changed, 213 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/overlapselect_wrapper.sh	Wed May 21 03:30:16 2014 -0400
@@ -0,0 +1,4 @@
+#!/bin/sh
+
+echo $@ 
+overlapSelect $@
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/overlapselect_wrapper.xml	Wed May 21 03:30:16 2014 -0400
@@ -0,0 +1,147 @@
+<tool id="overlapselect_wrapper" name="overlap Select" version="1.0">
+     <requirements>
+       <requirement type='package' version="latest">overlapSelect</requirement>
+     </requirements>
+
+
+  <description>UCSC Overlap Select</description>
+  <command interpreter="bash">
+	overlapselect_wrapper.sh
+#if $optionSpec.parType == "specify":
+    #if $str($optionSpec.strand) == "true":
+        -strand
+    #end if
+    #if $str($optionSpec.oppositeStrand) == "true":
+        -oppositeStrand
+    #end if
+    #if $str($optionSpec.nonOverlapping) == "true":
+        -nonOverlapping
+    #end if
+    #if $str($optionSpec.mergeOutput) == "true":
+        -mergeOutput
+    #end if
+    -overlapBases=$optionSpec.overlapBases
+    -overlapThreshold=$optionSpec.overlapThreshold
+    -overlapThresholdCeil=$optionSpec.overlapThresholdCeil
+    #if $str($optionSpec.selectFormat) == "bed":
+        -selectFmt=bed
+    #end if
+    #if $str($optionSpec.inFormat) == "bed":
+        -inFmt=bed
+    #end if
+    #if $str($optionSpec.selectFormat) == "tab":
+        -selectCoordCols=$optionSpec.selColStart
+    #end if
+    #if $str($optionSpec.inFormat) == "tab":
+        -inCoordCols=$optionSpec.inColStart
+    #end if
+#else
+    -selectFmt=bed inFmt=bed
+#end if
+    $selectFile
+    $inFile
+    $outFile
+  </command>
+  <inputs>
+    <param name="inFile" type="data" format="Tabular" metadata_name="dbkey" label="Select the query (input) file" />
+    <param name="selectFile" type="data" format="Tabular" metadata_name="dbkey" label="Select the reference (select) file" />
+    <conditional name="optionSpec">      
+      <param name="parType" type="select" label="Do you want to select your target from the list or from your history?" help="">
+        <option value="default">Use Default Parameters</option>
+        <option value="specify">Specify Parameters</option>
+      </param>
+      <when value="default">
+      </when>
+      <when value="specify">
+        <param name="selectFormat" type="select" label="Query File Format -selectFmt">
+                          <option value="bed">Bed</option>
+                          <option value="tab">Tabular</option>
+        </param>
+        <param name="inFormat" type="select" label="Reference File Format -inFmt">
+                          <option value="bed">Bed</option>
+                          <option value="tab">Tabular</option>
+        </param>
+        <param name="selColStart" type="integer" value="0" label="Query File is unstranded with chr starting at this column (0 based - i.e. 0=col1), followed by start and end (-1 to disable)-selectCoordCols" /> 
+        <param name="inColStart" type="integer" value="0" label="Reference File is unstranded with chr starting at this column (0 based - i.e. 0=col1), followed by start and end  (-1 to disable)-inCoordCols" /> 
+        <param name="strand" type="boolean" truevalue="true" falsevalue="false" checked="false" label="Only search on same strand -strand" />
+        <param name="oppositeStrand" type="boolean" truevalue="true" falsevalue="false" checked="false" label="Only search on opposite strand -oppositeStrand" />
+        <param name="nonOverlapping" type="boolean" truevalue="true" falsevalue="false" checked="false" label="Select non-overlapping instead of overlapping records" />
+        <param name="mergeOutput" type="boolean" truevalue="true" falsevalue="false" checked="false" label="Merge Output -mergeOutput" />
+        <param name="overlapBases" type="integer" value="-1" label="Number of overlap minimum (-overlapBases) (-1 to disable)" /> 
+        <param name="overlapThreshold" type="float" value="0.0" label="Minimum fraction overlap" help="minimum fraction of an inFile record that must be overlapped by a single select record to be considered
+        overlapping.  Note that this is only coverage by a single select record, not total coverage" /> 
+        <param name="overlapThresholdCeil" type="float" value="1.1" label="Maximum fraction overlap" help="select only inFile records with less than this amount of overlap with a single record, provided they are selected by other criteria." /> 
+
+      </when>  <!-- history -->	  	  
+    </conditional>  <!-- optionSpec -->
+  </inputs>
+  <outputs>
+    <data format="interval" name="outFile" label="Overlapselect on ${selectFile.name} vs ${inFile.name} "/>
+  </outputs>
+  <tests>
+  	<test>
+		<param name="inFile" value="NM_001206.gp"/>
+		<param name="selectFile" value="mrna.psl"/>
+		<output name="outFile" value="psl_over_NM_001206gp.psl"/>
+  	</test>
+  </tests>
+  
+<help>
+**Overlap Select Help**
+
+
+**Options**
+
+* selectCds - Use only CDS in the selectFile
+
+* selectRange - Use entire range instead of blocks from records in the selectFile.
+
+* inFmt=fmt - specify inFile format, same values as -selectFmt.
+
+* inCoordCols=spec - inFile is tab-separate with coordinates specified by spec, in format described above.
+
+* inCds - Use only CDS in the inFile
+
+* inRange - Use entire range instead of blocks of records in the inFile.
+
+* nonOverlapping - select non-overlapping instead of overlapping records
+
+* strand - must be on the same strand to be considered overlapping
+
+* oppositeStrand - must be on the opposite strand to be considered overlapping
+
+* excludeSelf - don't compare records with the same coordinates and name. Warning: using only one of -inCds or -selectCds will result in different coordinates for the same record.
+
+* idMatch - only select overlapping records if they have the same id
+
+* aggregate - instead of computing overlap bases on individual select entries, compute it based on the total number of inFile bases overlap by selectFile records. -overlapSimilarity and -mergeOutput will not work with this option.
+
+* overlapThreshold=0.0 - minimum fraction of an inFile record that must be overlapped by a single select record to be considered overlapping.  Note that this is only coverage by a single select record, not total coverage.
+
+* overlapThresholdCeil=1.1 - select only inFile records with less than this amount of overlap with a single record, provided they are selected by other criteria.
+
+* overlapSimilarity=0.0 - minimum fraction of inFile and select records that: Note that this is only coverage by a single select record and this  is; bidirectional inFile and selectFile must overlap by this amount.  A value of 1.0 will select identical records (or CDS if  both CDS options are specified.  Not currently supported with  *aggregate.*
+
+* overlapSimilarityCeil=1.1 - select only inFile records with less than this amount of similarity with a single record. provided they are selected by other criteria.
+
+* overlapBases=-1 - minimum number of bases of overlap, lt 0 disables.
+
+* statsOutput - output overlap statistics instead of selected records. If no overlap criteria is specified, all overlapping entries are reported, Otherwise only the pairs passing the criteria are reported. This results in a tab-separated file with the columns:  inId selectId inOverlap selectOverlap overBases: Where inOverlap is the fraction of the inFile record overlapped by the selectFile record and selectOverlap is the fraction of the select record overlap by inFile records.  With -aggregate, output is: inId inOverlap inOverBases inBases 
+
+* statsOutputAll - like -statsOutput, however output all inFile records, including those that are not overlapped.
+
+* statsOutputBoth - like -statsOutput, however output all selectFile and inFile records, including those that are not overlapped.
+
+* mergeOutput - output file with be a merge of the input file with the selectFile records that selected it.  The format is inRec selectRec. if multiple select records hit, inRec is repeated. This will increase
+      the memory required. Not supported with -nonOverlapping or -aggregate.
+      
+* idOutput - output a tab-separated file of pairs of inId selectId with -aggregate, only a single column of inId is written
+
+* dropped=file  - output rows that were dropped to this file.
+
+* verbose=n - verbose gt 1 prints some details,
+
+
+  </help>
+</tool>
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/NM_001206.gp	Wed May 21 03:30:16 2014 -0400
@@ -0,0 +1,1 @@
+NM_001206	chr9	-	68456943	68486659	68459810	68485398	3	68456943,68484893,68485500,	68460040,68485487,68486659,	
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/mrna.psl	Wed May 21 03:30:16 2014 -0400
@@ -0,0 +1,15 @@
+1015	3	0	0	0	0	3	41777	-	BC040840	1033	0	1018	chr9	136372045	68288114	68330909	4	342,176,21,479,	15,357,533,554,	68288114,68301248,68329949,68330430,
+3480	1	0	0	0	0	24	89947	+	BC038225	3507	10	3491	chr9	136372045	68331023	68424451	25	275,142,53,163,135,141,162,72,256,155,114,95,133,174,170,124,123,126,45,96,105,120,180,96,226,	10,285,427,480,643,778,919,1081,1153,1409,1564,1678,1773,1906,2080,2250,2374,2497,2623,2668,2764,2869,2989,3169,3265,	68331023,68336338,68339957,68349344,68350525,68352793,68354456,68358234,68370000,68372080,68377281,68386776,68387480,68390548,68390828,68395517,68396055,68416178,68418639,68419100,68419647,68419953,68422148,68422410,68424225,
+2883	2	0	0	0	0	21	86071	+	BC017666	2900	0	2885	chr9	136372045	68331037	68419993	22	261,142,53,163,135,141,162,72,256,155,114,95,133,174,170,124,123,126,45,96,105,40,	0,261,403,456,619,754,895,1057,1129,1385,1540,1654,1749,1882,2056,2226,2350,2473,2599,2644,2740,2845,	68331037,68336338,68339957,68349344,68350525,68352793,68354456,68358234,68370000,68372080,68377281,68386776,68387480,68390548,68390828,68395517,68396055,68416178,68418639,68419100,68419647,68419953,
+5903	3	0	0	0	0	24	89947	+	AB011166	5906	0	5906	chr9	136372045	68331055	68426908	25	243,142,53,163,135,141,162,72,256,155,114,95,133,174,170,124,123,126,45,96,105,120,180,96,2683,	0,243,385,438,601,736,877,1039,1111,1367,1522,1636,1731,1864,2038,2208,2332,2455,2581,2626,2722,2827,2947,3127,3223,	68331055,68336338,68339957,68349344,68350525,68352793,68354456,68358234,68370000,68372080,68377281,68386776,68387480,68390548,68390828,68395517,68396055,68416178,68418639,68419100,68419647,68419953,68422148,68422410,68424225,
+2098	2	0	0	0	0	15	57736	+	AK055825	2257	0	2100	chr9	136372045	68331083	68390919	16	215,142,53,163,135,52,88,162,72,256,155,114,95,133,174,91,	0,215,357,410,573,708,760,848,1010,1082,1338,1493,1607,1702,1835,2009,	68331083,68336338,68339957,68349344,68350525,68352793,68352846,68354456,68358234,68370000,68372080,68377281,68386776,68387480,68390548,68390828,
+3388	3	0	0	0	0	24	89947	+	AJ310550	3410	0	3391	chr9	136372045	68331113	68424451	25	185,142,53,163,135,141,162,72,256,155,114,95,133,174,170,124,123,126,45,96,105,120,180,96,226,	0,185,327,380,543,678,819,981,1053,1309,1464,1578,1673,1806,1980,2150,2274,2397,2523,2568,2664,2769,2889,3069,3165,	68331113,68336338,68339957,68349344,68350525,68352793,68354456,68358234,68370000,68372080,68377281,68386776,68387480,68390548,68390828,68395517,68396055,68416178,68418639,68419100,68419647,68419953,68422148,68422410,68424225,
+1246	0	0	0	0	0	0	0	+	BC035661	1300	0	1246	chr9	136372045	68382909	68384155	1	1246,	0,	68382909,
+1710	6	0	0	1	4	0	0	+	BC020393	1747	0	1720	chr9	136372045	68386410	68388126	2	937,779,	0,941,	68386410,68387347,
+2694	8	0	0	1	5	2	5	+	AK093849	2707	0	2707	chr9	136372045	68392171	68394878	4	1263,111,673,655,	0,1268,1379,2052,	68392171,68393434,68393546,68394223,
+2694	8	0	0	1	5	2	5	+	AX748336	2707	0	2707	chr9	136372045	68392171	68394878	4	1263,111,673,655,	0,1268,1379,2052,	68392171,68393434,68393546,68394223,
+3164	6	0	0	2	4	3	3672	+	BX537694	3192	0	3174	chr9	136372045	68406049	68412891	6	117,1163,468,858,270,294,	0,119,1282,1752,2610,2880,	68406049,68406166,68410997,68411465,68412324,68412597,
+733	1	0	0	0	0	0	0	+	BC034441	773	0	734	chr9	136372045	68426170	68426904	1	734,	0,	68426170,
+4830	8	0	0	3	12	12	24878	-	D31716	4859	9	4859	chr9	136372045	68456943	68486659	14	488,38,168,632,615,544,3,16,584,574,19,465,131,561,	0,489,527,695,1327,1943,2487,2490,2506,3090,3664,3693,4158,4289,	68456943,68457434,68457473,68457642,68458275,68458890,68459435,68459439,68459456,68484893,68485468,68485500,68485966,68486098,
+1339	1	0	0	1	10	4	16	-	S72504	1359	9	1359	chr9	136372045	68485303	68486659	5	164,19,465,131,561,	0,164,193,658,789,	68485303,68485468,68485500,68485966,68486098,
+1815	2	0	0	0	0	3	63052	-	AK124136	1817	0	1817	chr9	136372045	68266032	68330901	4	558,136,171,952,	0,558,694,865,	68266032,68288320,68290349,68329949,
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/psl_over_NM_001206gp.psl	Wed May 21 03:30:16 2014 -0400
@@ -0,0 +1,2 @@
+4830	8	0	0	3	12	12	24878	-	D31716	4859	9	4859	chr9	136372045	68456943	68486659	14	488,38,168,632,615,544,3,16,584,574,19,465,131,561,	0,489,527,695,1327,1943,2487,2490,2506,3090,3664,3693,4158,4289,	68456943,68457434,68457473,68457642,68458275,68458890,68459435,68459439,68459456,68484893,68485468,68485500,68485966,68486098,
+1339	1	0	0	1	10	4	16	-	S72504	1359	9	1359	chr9	136372045	68485303	68486659	5	164,19,465,131,561,	0,164,193,658,789,	68485303,68485468,68485500,68485966,68486098,
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_dependencies.xml	Wed May 21 03:30:16 2014 -0400
@@ -0,0 +1,44 @@
+<?xml version="1.0.1"?>
+<tool_dependency>
+<package name="overlapSelect" version="latest">
+	<install version="1.0">
+	<actions>
+	<action type="shell_command">
+		mkdir $INSTALL_DIR/bin;
+		mkdir overlapSelect;
+
+		if [[ "$(uname -m)" = "x86_64" ]] ; then
+		if [[ "$(uname)" = "Linux" ]] ; then
+		echo "We are in first"
+		wget -nH --cut-dirs=3 -O overlapSelect/overlapSelect http://hgdownload.cse.ucsc.edu/admin/exe/linux.x86_64/overlapSelect
+		fi
+		if [[ "$(uname)" = "Darwin" ]]; then
+		echo "We are in second"
+		wget -nH --cut-dirs=3 -O overlapSelect/overlapSelect overlapSelect/ http://hgdownload.cse.ucsc.edu/admin/exe/macOSX.x86_64/overlapSelect
+		fi
+		fi
+		if [[ "$(uname -m)" = "i386" ]] ; then
+		if [[ "$(uname)" = "Darwin" ]]; then
+		echo "We are in third"
+		wget -nH --cut-dirs=3 -O overlapSelect/overlapSelect http://hgdownload.cse.ucsc.edu/admin/exe/macOSX.i386/overlapSelect
+		fi
+		fi
+
+                chmod u+x overlapSelect/overlapSelect;
+	</action>
+        <action type="move_file">
+	        <source>overlapSelect</source>
+		<destination>$INSTALL_DIR/bin</destination>
+	</action>
+        <action type="set_environment">
+ 	       <environment_variable name="PATH" action="prepend_to">$INSTALL_DIR/bin</environment_variable>
+	</action>
+	</actions>
+	</install>
+	<readme>
+	The Tool dependencies script download the executable tool available at http://hgdownload.cse.ucsc.edu/admin/exe/ . 
+	The script tests if the Operationg sytem is either Mac or Linux.
+	Compiling the source for this tool from implies the installation of mysql-dev libraries. 
+	</readme>
+</package>
+</tool_dependency>