# HG changeset patch
# User qfab
# Date 1400657416 14400
# Node ID 84f5e02c76f3e88f1a5ca1fd4e005a9408d06ec2
Uploaded
diff -r 000000000000 -r 84f5e02c76f3 overlapselect_wrapper.sh
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/overlapselect_wrapper.sh Wed May 21 03:30:16 2014 -0400
@@ -0,0 +1,4 @@
+#!/bin/sh
+
+echo $@
+overlapSelect $@
diff -r 000000000000 -r 84f5e02c76f3 overlapselect_wrapper.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/overlapselect_wrapper.xml Wed May 21 03:30:16 2014 -0400
@@ -0,0 +1,147 @@
+
+
+ overlapSelect
+
+
+
+ UCSC Overlap Select
+
+ overlapselect_wrapper.sh
+#if $optionSpec.parType == "specify":
+ #if $str($optionSpec.strand) == "true":
+ -strand
+ #end if
+ #if $str($optionSpec.oppositeStrand) == "true":
+ -oppositeStrand
+ #end if
+ #if $str($optionSpec.nonOverlapping) == "true":
+ -nonOverlapping
+ #end if
+ #if $str($optionSpec.mergeOutput) == "true":
+ -mergeOutput
+ #end if
+ -overlapBases=$optionSpec.overlapBases
+ -overlapThreshold=$optionSpec.overlapThreshold
+ -overlapThresholdCeil=$optionSpec.overlapThresholdCeil
+ #if $str($optionSpec.selectFormat) == "bed":
+ -selectFmt=bed
+ #end if
+ #if $str($optionSpec.inFormat) == "bed":
+ -inFmt=bed
+ #end if
+ #if $str($optionSpec.selectFormat) == "tab":
+ -selectCoordCols=$optionSpec.selColStart
+ #end if
+ #if $str($optionSpec.inFormat) == "tab":
+ -inCoordCols=$optionSpec.inColStart
+ #end if
+#else
+ -selectFmt=bed inFmt=bed
+#end if
+ $selectFile
+ $inFile
+ $outFile
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+**Overlap Select Help**
+
+
+**Options**
+
+* selectCds - Use only CDS in the selectFile
+
+* selectRange - Use entire range instead of blocks from records in the selectFile.
+
+* inFmt=fmt - specify inFile format, same values as -selectFmt.
+
+* inCoordCols=spec - inFile is tab-separate with coordinates specified by spec, in format described above.
+
+* inCds - Use only CDS in the inFile
+
+* inRange - Use entire range instead of blocks of records in the inFile.
+
+* nonOverlapping - select non-overlapping instead of overlapping records
+
+* strand - must be on the same strand to be considered overlapping
+
+* oppositeStrand - must be on the opposite strand to be considered overlapping
+
+* excludeSelf - don't compare records with the same coordinates and name. Warning: using only one of -inCds or -selectCds will result in different coordinates for the same record.
+
+* idMatch - only select overlapping records if they have the same id
+
+* aggregate - instead of computing overlap bases on individual select entries, compute it based on the total number of inFile bases overlap by selectFile records. -overlapSimilarity and -mergeOutput will not work with this option.
+
+* overlapThreshold=0.0 - minimum fraction of an inFile record that must be overlapped by a single select record to be considered overlapping. Note that this is only coverage by a single select record, not total coverage.
+
+* overlapThresholdCeil=1.1 - select only inFile records with less than this amount of overlap with a single record, provided they are selected by other criteria.
+
+* overlapSimilarity=0.0 - minimum fraction of inFile and select records that: Note that this is only coverage by a single select record and this is; bidirectional inFile and selectFile must overlap by this amount. A value of 1.0 will select identical records (or CDS if both CDS options are specified. Not currently supported with *aggregate.*
+
+* overlapSimilarityCeil=1.1 - select only inFile records with less than this amount of similarity with a single record. provided they are selected by other criteria.
+
+* overlapBases=-1 - minimum number of bases of overlap, lt 0 disables.
+
+* statsOutput - output overlap statistics instead of selected records. If no overlap criteria is specified, all overlapping entries are reported, Otherwise only the pairs passing the criteria are reported. This results in a tab-separated file with the columns: inId selectId inOverlap selectOverlap overBases: Where inOverlap is the fraction of the inFile record overlapped by the selectFile record and selectOverlap is the fraction of the select record overlap by inFile records. With -aggregate, output is: inId inOverlap inOverBases inBases
+
+* statsOutputAll - like -statsOutput, however output all inFile records, including those that are not overlapped.
+
+* statsOutputBoth - like -statsOutput, however output all selectFile and inFile records, including those that are not overlapped.
+
+* mergeOutput - output file with be a merge of the input file with the selectFile records that selected it. The format is inRec selectRec. if multiple select records hit, inRec is repeated. This will increase
+ the memory required. Not supported with -nonOverlapping or -aggregate.
+
+* idOutput - output a tab-separated file of pairs of inId selectId with -aggregate, only a single column of inId is written
+
+* dropped=file - output rows that were dropped to this file.
+
+* verbose=n - verbose gt 1 prints some details,
+
+
+
+
+
diff -r 000000000000 -r 84f5e02c76f3 test-data/NM_001206.gp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/NM_001206.gp Wed May 21 03:30:16 2014 -0400
@@ -0,0 +1,1 @@
+NM_001206 chr9 - 68456943 68486659 68459810 68485398 3 68456943,68484893,68485500, 68460040,68485487,68486659,
diff -r 000000000000 -r 84f5e02c76f3 test-data/mrna.psl
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/mrna.psl Wed May 21 03:30:16 2014 -0400
@@ -0,0 +1,15 @@
+1015 3 0 0 0 0 3 41777 - BC040840 1033 0 1018 chr9 136372045 68288114 68330909 4 342,176,21,479, 15,357,533,554, 68288114,68301248,68329949,68330430,
+3480 1 0 0 0 0 24 89947 + BC038225 3507 10 3491 chr9 136372045 68331023 68424451 25 275,142,53,163,135,141,162,72,256,155,114,95,133,174,170,124,123,126,45,96,105,120,180,96,226, 10,285,427,480,643,778,919,1081,1153,1409,1564,1678,1773,1906,2080,2250,2374,2497,2623,2668,2764,2869,2989,3169,3265, 68331023,68336338,68339957,68349344,68350525,68352793,68354456,68358234,68370000,68372080,68377281,68386776,68387480,68390548,68390828,68395517,68396055,68416178,68418639,68419100,68419647,68419953,68422148,68422410,68424225,
+2883 2 0 0 0 0 21 86071 + BC017666 2900 0 2885 chr9 136372045 68331037 68419993 22 261,142,53,163,135,141,162,72,256,155,114,95,133,174,170,124,123,126,45,96,105,40, 0,261,403,456,619,754,895,1057,1129,1385,1540,1654,1749,1882,2056,2226,2350,2473,2599,2644,2740,2845, 68331037,68336338,68339957,68349344,68350525,68352793,68354456,68358234,68370000,68372080,68377281,68386776,68387480,68390548,68390828,68395517,68396055,68416178,68418639,68419100,68419647,68419953,
+5903 3 0 0 0 0 24 89947 + AB011166 5906 0 5906 chr9 136372045 68331055 68426908 25 243,142,53,163,135,141,162,72,256,155,114,95,133,174,170,124,123,126,45,96,105,120,180,96,2683, 0,243,385,438,601,736,877,1039,1111,1367,1522,1636,1731,1864,2038,2208,2332,2455,2581,2626,2722,2827,2947,3127,3223, 68331055,68336338,68339957,68349344,68350525,68352793,68354456,68358234,68370000,68372080,68377281,68386776,68387480,68390548,68390828,68395517,68396055,68416178,68418639,68419100,68419647,68419953,68422148,68422410,68424225,
+2098 2 0 0 0 0 15 57736 + AK055825 2257 0 2100 chr9 136372045 68331083 68390919 16 215,142,53,163,135,52,88,162,72,256,155,114,95,133,174,91, 0,215,357,410,573,708,760,848,1010,1082,1338,1493,1607,1702,1835,2009, 68331083,68336338,68339957,68349344,68350525,68352793,68352846,68354456,68358234,68370000,68372080,68377281,68386776,68387480,68390548,68390828,
+3388 3 0 0 0 0 24 89947 + AJ310550 3410 0 3391 chr9 136372045 68331113 68424451 25 185,142,53,163,135,141,162,72,256,155,114,95,133,174,170,124,123,126,45,96,105,120,180,96,226, 0,185,327,380,543,678,819,981,1053,1309,1464,1578,1673,1806,1980,2150,2274,2397,2523,2568,2664,2769,2889,3069,3165, 68331113,68336338,68339957,68349344,68350525,68352793,68354456,68358234,68370000,68372080,68377281,68386776,68387480,68390548,68390828,68395517,68396055,68416178,68418639,68419100,68419647,68419953,68422148,68422410,68424225,
+1246 0 0 0 0 0 0 0 + BC035661 1300 0 1246 chr9 136372045 68382909 68384155 1 1246, 0, 68382909,
+1710 6 0 0 1 4 0 0 + BC020393 1747 0 1720 chr9 136372045 68386410 68388126 2 937,779, 0,941, 68386410,68387347,
+2694 8 0 0 1 5 2 5 + AK093849 2707 0 2707 chr9 136372045 68392171 68394878 4 1263,111,673,655, 0,1268,1379,2052, 68392171,68393434,68393546,68394223,
+2694 8 0 0 1 5 2 5 + AX748336 2707 0 2707 chr9 136372045 68392171 68394878 4 1263,111,673,655, 0,1268,1379,2052, 68392171,68393434,68393546,68394223,
+3164 6 0 0 2 4 3 3672 + BX537694 3192 0 3174 chr9 136372045 68406049 68412891 6 117,1163,468,858,270,294, 0,119,1282,1752,2610,2880, 68406049,68406166,68410997,68411465,68412324,68412597,
+733 1 0 0 0 0 0 0 + BC034441 773 0 734 chr9 136372045 68426170 68426904 1 734, 0, 68426170,
+4830 8 0 0 3 12 12 24878 - D31716 4859 9 4859 chr9 136372045 68456943 68486659 14 488,38,168,632,615,544,3,16,584,574,19,465,131,561, 0,489,527,695,1327,1943,2487,2490,2506,3090,3664,3693,4158,4289, 68456943,68457434,68457473,68457642,68458275,68458890,68459435,68459439,68459456,68484893,68485468,68485500,68485966,68486098,
+1339 1 0 0 1 10 4 16 - S72504 1359 9 1359 chr9 136372045 68485303 68486659 5 164,19,465,131,561, 0,164,193,658,789, 68485303,68485468,68485500,68485966,68486098,
+1815 2 0 0 0 0 3 63052 - AK124136 1817 0 1817 chr9 136372045 68266032 68330901 4 558,136,171,952, 0,558,694,865, 68266032,68288320,68290349,68329949,
diff -r 000000000000 -r 84f5e02c76f3 test-data/psl_over_NM_001206gp.psl
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/psl_over_NM_001206gp.psl Wed May 21 03:30:16 2014 -0400
@@ -0,0 +1,2 @@
+4830 8 0 0 3 12 12 24878 - D31716 4859 9 4859 chr9 136372045 68456943 68486659 14 488,38,168,632,615,544,3,16,584,574,19,465,131,561, 0,489,527,695,1327,1943,2487,2490,2506,3090,3664,3693,4158,4289, 68456943,68457434,68457473,68457642,68458275,68458890,68459435,68459439,68459456,68484893,68485468,68485500,68485966,68486098,
+1339 1 0 0 1 10 4 16 - S72504 1359 9 1359 chr9 136372045 68485303 68486659 5 164,19,465,131,561, 0,164,193,658,789, 68485303,68485468,68485500,68485966,68486098,
diff -r 000000000000 -r 84f5e02c76f3 tool_dependencies.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_dependencies.xml Wed May 21 03:30:16 2014 -0400
@@ -0,0 +1,44 @@
+
+
+
+
+
+
+ mkdir $INSTALL_DIR/bin;
+ mkdir overlapSelect;
+
+ if [[ "$(uname -m)" = "x86_64" ]] ; then
+ if [[ "$(uname)" = "Linux" ]] ; then
+ echo "We are in first"
+ wget -nH --cut-dirs=3 -O overlapSelect/overlapSelect http://hgdownload.cse.ucsc.edu/admin/exe/linux.x86_64/overlapSelect
+ fi
+ if [[ "$(uname)" = "Darwin" ]]; then
+ echo "We are in second"
+ wget -nH --cut-dirs=3 -O overlapSelect/overlapSelect overlapSelect/ http://hgdownload.cse.ucsc.edu/admin/exe/macOSX.x86_64/overlapSelect
+ fi
+ fi
+ if [[ "$(uname -m)" = "i386" ]] ; then
+ if [[ "$(uname)" = "Darwin" ]]; then
+ echo "We are in third"
+ wget -nH --cut-dirs=3 -O overlapSelect/overlapSelect http://hgdownload.cse.ucsc.edu/admin/exe/macOSX.i386/overlapSelect
+ fi
+ fi
+
+ chmod u+x overlapSelect/overlapSelect;
+
+
+ overlapSelect
+ $INSTALL_DIR/bin
+
+
+ $INSTALL_DIR/bin
+
+
+
+
+ The Tool dependencies script download the executable tool available at http://hgdownload.cse.ucsc.edu/admin/exe/ .
+ The script tests if the Operationg sytem is either Mac or Linux.
+ Compiling the source for this tool from implies the installation of mysql-dev libraries.
+
+
+