Mercurial > repos > qfab > overlapselect
changeset 0:84f5e02c76f3 draft default tip
Uploaded
author | qfab |
---|---|
date | Wed, 21 May 2014 03:30:16 -0400 |
parents | |
children | |
files | overlapselect_wrapper.sh overlapselect_wrapper.xml test-data/NM_001206.gp test-data/mrna.psl test-data/psl_over_NM_001206gp.psl tool_dependencies.xml |
diffstat | 6 files changed, 213 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/overlapselect_wrapper.sh Wed May 21 03:30:16 2014 -0400 @@ -0,0 +1,4 @@ +#!/bin/sh + +echo $@ +overlapSelect $@
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/overlapselect_wrapper.xml Wed May 21 03:30:16 2014 -0400 @@ -0,0 +1,147 @@ +<tool id="overlapselect_wrapper" name="overlap Select" version="1.0"> + <requirements> + <requirement type='package' version="latest">overlapSelect</requirement> + </requirements> + + + <description>UCSC Overlap Select</description> + <command interpreter="bash"> + overlapselect_wrapper.sh +#if $optionSpec.parType == "specify": + #if $str($optionSpec.strand) == "true": + -strand + #end if + #if $str($optionSpec.oppositeStrand) == "true": + -oppositeStrand + #end if + #if $str($optionSpec.nonOverlapping) == "true": + -nonOverlapping + #end if + #if $str($optionSpec.mergeOutput) == "true": + -mergeOutput + #end if + -overlapBases=$optionSpec.overlapBases + -overlapThreshold=$optionSpec.overlapThreshold + -overlapThresholdCeil=$optionSpec.overlapThresholdCeil + #if $str($optionSpec.selectFormat) == "bed": + -selectFmt=bed + #end if + #if $str($optionSpec.inFormat) == "bed": + -inFmt=bed + #end if + #if $str($optionSpec.selectFormat) == "tab": + -selectCoordCols=$optionSpec.selColStart + #end if + #if $str($optionSpec.inFormat) == "tab": + -inCoordCols=$optionSpec.inColStart + #end if +#else + -selectFmt=bed inFmt=bed +#end if + $selectFile + $inFile + $outFile + </command> + <inputs> + <param name="inFile" type="data" format="Tabular" metadata_name="dbkey" label="Select the query (input) file" /> + <param name="selectFile" type="data" format="Tabular" metadata_name="dbkey" label="Select the reference (select) file" /> + <conditional name="optionSpec"> + <param name="parType" type="select" label="Do you want to select your target from the list or from your history?" help=""> + <option value="default">Use Default Parameters</option> + <option value="specify">Specify Parameters</option> + </param> + <when value="default"> + </when> + <when value="specify"> + <param name="selectFormat" type="select" label="Query File Format -selectFmt"> + <option value="bed">Bed</option> + <option value="tab">Tabular</option> + </param> + <param name="inFormat" type="select" label="Reference File Format -inFmt"> + <option value="bed">Bed</option> + <option value="tab">Tabular</option> + </param> + <param name="selColStart" type="integer" value="0" label="Query File is unstranded with chr starting at this column (0 based - i.e. 0=col1), followed by start and end (-1 to disable)-selectCoordCols" /> + <param name="inColStart" type="integer" value="0" label="Reference File is unstranded with chr starting at this column (0 based - i.e. 0=col1), followed by start and end (-1 to disable)-inCoordCols" /> + <param name="strand" type="boolean" truevalue="true" falsevalue="false" checked="false" label="Only search on same strand -strand" /> + <param name="oppositeStrand" type="boolean" truevalue="true" falsevalue="false" checked="false" label="Only search on opposite strand -oppositeStrand" /> + <param name="nonOverlapping" type="boolean" truevalue="true" falsevalue="false" checked="false" label="Select non-overlapping instead of overlapping records" /> + <param name="mergeOutput" type="boolean" truevalue="true" falsevalue="false" checked="false" label="Merge Output -mergeOutput" /> + <param name="overlapBases" type="integer" value="-1" label="Number of overlap minimum (-overlapBases) (-1 to disable)" /> + <param name="overlapThreshold" type="float" value="0.0" label="Minimum fraction overlap" help="minimum fraction of an inFile record that must be overlapped by a single select record to be considered + overlapping. Note that this is only coverage by a single select record, not total coverage" /> + <param name="overlapThresholdCeil" type="float" value="1.1" label="Maximum fraction overlap" help="select only inFile records with less than this amount of overlap with a single record, provided they are selected by other criteria." /> + + </when> <!-- history --> + </conditional> <!-- optionSpec --> + </inputs> + <outputs> + <data format="interval" name="outFile" label="Overlapselect on ${selectFile.name} vs ${inFile.name} "/> + </outputs> + <tests> + <test> + <param name="inFile" value="NM_001206.gp"/> + <param name="selectFile" value="mrna.psl"/> + <output name="outFile" value="psl_over_NM_001206gp.psl"/> + </test> + </tests> + +<help> +**Overlap Select Help** + + +**Options** + +* selectCds - Use only CDS in the selectFile + +* selectRange - Use entire range instead of blocks from records in the selectFile. + +* inFmt=fmt - specify inFile format, same values as -selectFmt. + +* inCoordCols=spec - inFile is tab-separate with coordinates specified by spec, in format described above. + +* inCds - Use only CDS in the inFile + +* inRange - Use entire range instead of blocks of records in the inFile. + +* nonOverlapping - select non-overlapping instead of overlapping records + +* strand - must be on the same strand to be considered overlapping + +* oppositeStrand - must be on the opposite strand to be considered overlapping + +* excludeSelf - don't compare records with the same coordinates and name. Warning: using only one of -inCds or -selectCds will result in different coordinates for the same record. + +* idMatch - only select overlapping records if they have the same id + +* aggregate - instead of computing overlap bases on individual select entries, compute it based on the total number of inFile bases overlap by selectFile records. -overlapSimilarity and -mergeOutput will not work with this option. + +* overlapThreshold=0.0 - minimum fraction of an inFile record that must be overlapped by a single select record to be considered overlapping. Note that this is only coverage by a single select record, not total coverage. + +* overlapThresholdCeil=1.1 - select only inFile records with less than this amount of overlap with a single record, provided they are selected by other criteria. + +* overlapSimilarity=0.0 - minimum fraction of inFile and select records that: Note that this is only coverage by a single select record and this is; bidirectional inFile and selectFile must overlap by this amount. A value of 1.0 will select identical records (or CDS if both CDS options are specified. Not currently supported with *aggregate.* + +* overlapSimilarityCeil=1.1 - select only inFile records with less than this amount of similarity with a single record. provided they are selected by other criteria. + +* overlapBases=-1 - minimum number of bases of overlap, lt 0 disables. + +* statsOutput - output overlap statistics instead of selected records. If no overlap criteria is specified, all overlapping entries are reported, Otherwise only the pairs passing the criteria are reported. This results in a tab-separated file with the columns: inId selectId inOverlap selectOverlap overBases: Where inOverlap is the fraction of the inFile record overlapped by the selectFile record and selectOverlap is the fraction of the select record overlap by inFile records. With -aggregate, output is: inId inOverlap inOverBases inBases + +* statsOutputAll - like -statsOutput, however output all inFile records, including those that are not overlapped. + +* statsOutputBoth - like -statsOutput, however output all selectFile and inFile records, including those that are not overlapped. + +* mergeOutput - output file with be a merge of the input file with the selectFile records that selected it. The format is inRec selectRec. if multiple select records hit, inRec is repeated. This will increase + the memory required. Not supported with -nonOverlapping or -aggregate. + +* idOutput - output a tab-separated file of pairs of inId selectId with -aggregate, only a single column of inId is written + +* dropped=file - output rows that were dropped to this file. + +* verbose=n - verbose gt 1 prints some details, + + + </help> +</tool> +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/NM_001206.gp Wed May 21 03:30:16 2014 -0400 @@ -0,0 +1,1 @@ +NM_001206 chr9 - 68456943 68486659 68459810 68485398 3 68456943,68484893,68485500, 68460040,68485487,68486659,
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/mrna.psl Wed May 21 03:30:16 2014 -0400 @@ -0,0 +1,15 @@ +1015 3 0 0 0 0 3 41777 - BC040840 1033 0 1018 chr9 136372045 68288114 68330909 4 342,176,21,479, 15,357,533,554, 68288114,68301248,68329949,68330430, +3480 1 0 0 0 0 24 89947 + BC038225 3507 10 3491 chr9 136372045 68331023 68424451 25 275,142,53,163,135,141,162,72,256,155,114,95,133,174,170,124,123,126,45,96,105,120,180,96,226, 10,285,427,480,643,778,919,1081,1153,1409,1564,1678,1773,1906,2080,2250,2374,2497,2623,2668,2764,2869,2989,3169,3265, 68331023,68336338,68339957,68349344,68350525,68352793,68354456,68358234,68370000,68372080,68377281,68386776,68387480,68390548,68390828,68395517,68396055,68416178,68418639,68419100,68419647,68419953,68422148,68422410,68424225, +2883 2 0 0 0 0 21 86071 + BC017666 2900 0 2885 chr9 136372045 68331037 68419993 22 261,142,53,163,135,141,162,72,256,155,114,95,133,174,170,124,123,126,45,96,105,40, 0,261,403,456,619,754,895,1057,1129,1385,1540,1654,1749,1882,2056,2226,2350,2473,2599,2644,2740,2845, 68331037,68336338,68339957,68349344,68350525,68352793,68354456,68358234,68370000,68372080,68377281,68386776,68387480,68390548,68390828,68395517,68396055,68416178,68418639,68419100,68419647,68419953, +5903 3 0 0 0 0 24 89947 + AB011166 5906 0 5906 chr9 136372045 68331055 68426908 25 243,142,53,163,135,141,162,72,256,155,114,95,133,174,170,124,123,126,45,96,105,120,180,96,2683, 0,243,385,438,601,736,877,1039,1111,1367,1522,1636,1731,1864,2038,2208,2332,2455,2581,2626,2722,2827,2947,3127,3223, 68331055,68336338,68339957,68349344,68350525,68352793,68354456,68358234,68370000,68372080,68377281,68386776,68387480,68390548,68390828,68395517,68396055,68416178,68418639,68419100,68419647,68419953,68422148,68422410,68424225, +2098 2 0 0 0 0 15 57736 + AK055825 2257 0 2100 chr9 136372045 68331083 68390919 16 215,142,53,163,135,52,88,162,72,256,155,114,95,133,174,91, 0,215,357,410,573,708,760,848,1010,1082,1338,1493,1607,1702,1835,2009, 68331083,68336338,68339957,68349344,68350525,68352793,68352846,68354456,68358234,68370000,68372080,68377281,68386776,68387480,68390548,68390828, +3388 3 0 0 0 0 24 89947 + AJ310550 3410 0 3391 chr9 136372045 68331113 68424451 25 185,142,53,163,135,141,162,72,256,155,114,95,133,174,170,124,123,126,45,96,105,120,180,96,226, 0,185,327,380,543,678,819,981,1053,1309,1464,1578,1673,1806,1980,2150,2274,2397,2523,2568,2664,2769,2889,3069,3165, 68331113,68336338,68339957,68349344,68350525,68352793,68354456,68358234,68370000,68372080,68377281,68386776,68387480,68390548,68390828,68395517,68396055,68416178,68418639,68419100,68419647,68419953,68422148,68422410,68424225, +1246 0 0 0 0 0 0 0 + BC035661 1300 0 1246 chr9 136372045 68382909 68384155 1 1246, 0, 68382909, +1710 6 0 0 1 4 0 0 + BC020393 1747 0 1720 chr9 136372045 68386410 68388126 2 937,779, 0,941, 68386410,68387347, +2694 8 0 0 1 5 2 5 + AK093849 2707 0 2707 chr9 136372045 68392171 68394878 4 1263,111,673,655, 0,1268,1379,2052, 68392171,68393434,68393546,68394223, +2694 8 0 0 1 5 2 5 + AX748336 2707 0 2707 chr9 136372045 68392171 68394878 4 1263,111,673,655, 0,1268,1379,2052, 68392171,68393434,68393546,68394223, +3164 6 0 0 2 4 3 3672 + BX537694 3192 0 3174 chr9 136372045 68406049 68412891 6 117,1163,468,858,270,294, 0,119,1282,1752,2610,2880, 68406049,68406166,68410997,68411465,68412324,68412597, +733 1 0 0 0 0 0 0 + BC034441 773 0 734 chr9 136372045 68426170 68426904 1 734, 0, 68426170, +4830 8 0 0 3 12 12 24878 - D31716 4859 9 4859 chr9 136372045 68456943 68486659 14 488,38,168,632,615,544,3,16,584,574,19,465,131,561, 0,489,527,695,1327,1943,2487,2490,2506,3090,3664,3693,4158,4289, 68456943,68457434,68457473,68457642,68458275,68458890,68459435,68459439,68459456,68484893,68485468,68485500,68485966,68486098, +1339 1 0 0 1 10 4 16 - S72504 1359 9 1359 chr9 136372045 68485303 68486659 5 164,19,465,131,561, 0,164,193,658,789, 68485303,68485468,68485500,68485966,68486098, +1815 2 0 0 0 0 3 63052 - AK124136 1817 0 1817 chr9 136372045 68266032 68330901 4 558,136,171,952, 0,558,694,865, 68266032,68288320,68290349,68329949,
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/psl_over_NM_001206gp.psl Wed May 21 03:30:16 2014 -0400 @@ -0,0 +1,2 @@ +4830 8 0 0 3 12 12 24878 - D31716 4859 9 4859 chr9 136372045 68456943 68486659 14 488,38,168,632,615,544,3,16,584,574,19,465,131,561, 0,489,527,695,1327,1943,2487,2490,2506,3090,3664,3693,4158,4289, 68456943,68457434,68457473,68457642,68458275,68458890,68459435,68459439,68459456,68484893,68485468,68485500,68485966,68486098, +1339 1 0 0 1 10 4 16 - S72504 1359 9 1359 chr9 136372045 68485303 68486659 5 164,19,465,131,561, 0,164,193,658,789, 68485303,68485468,68485500,68485966,68486098,
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_dependencies.xml Wed May 21 03:30:16 2014 -0400 @@ -0,0 +1,44 @@ +<?xml version="1.0.1"?> +<tool_dependency> +<package name="overlapSelect" version="latest"> + <install version="1.0"> + <actions> + <action type="shell_command"> + mkdir $INSTALL_DIR/bin; + mkdir overlapSelect; + + if [[ "$(uname -m)" = "x86_64" ]] ; then + if [[ "$(uname)" = "Linux" ]] ; then + echo "We are in first" + wget -nH --cut-dirs=3 -O overlapSelect/overlapSelect http://hgdownload.cse.ucsc.edu/admin/exe/linux.x86_64/overlapSelect + fi + if [[ "$(uname)" = "Darwin" ]]; then + echo "We are in second" + wget -nH --cut-dirs=3 -O overlapSelect/overlapSelect overlapSelect/ http://hgdownload.cse.ucsc.edu/admin/exe/macOSX.x86_64/overlapSelect + fi + fi + if [[ "$(uname -m)" = "i386" ]] ; then + if [[ "$(uname)" = "Darwin" ]]; then + echo "We are in third" + wget -nH --cut-dirs=3 -O overlapSelect/overlapSelect http://hgdownload.cse.ucsc.edu/admin/exe/macOSX.i386/overlapSelect + fi + fi + + chmod u+x overlapSelect/overlapSelect; + </action> + <action type="move_file"> + <source>overlapSelect</source> + <destination>$INSTALL_DIR/bin</destination> + </action> + <action type="set_environment"> + <environment_variable name="PATH" action="prepend_to">$INSTALL_DIR/bin</environment_variable> + </action> + </actions> + </install> + <readme> + The Tool dependencies script download the executable tool available at http://hgdownload.cse.ucsc.edu/admin/exe/ . + The script tests if the Operationg sytem is either Mac or Linux. + Compiling the source for this tool from implies the installation of mysql-dev libraries. + </readme> +</package> +</tool_dependency>