Mercurial > repos > yating-l > ucsc_blat
changeset 5:70d7377d5e24 draft
planemo upload commit 7856c637db5bd4ea0b8b4db63e242618421a9cc6-dirty
author | yating-l |
---|---|
date | Wed, 01 Feb 2017 17:16:02 -0500 |
parents | fbeff1df110b |
children | 6f06b6d68c0b |
files | README.rst blat.xml test-data/amaVit1_Gallus_gallus.psl test-data/amaVit1_Gallus_gallus_filtered.psl tool_dependencies.xml |
diffstat | 5 files changed, 146 insertions(+), 96 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/README.rst Wed Feb 01 17:16:02 2017 -0500 @@ -0,0 +1,14 @@ +Galaxy wrapper for UCSC BLAT alignment tool +=========================================== +BLAT (http://genome.ucsc.edu/goldenPath/help/blatSpec.html) is a bioinformatics software a tool which performs rapid mRNA/DNA and cross-species protein alignments. + +Features +-------- +1. Perform rapid mRNA/DNA and cross-species protein alignments using blat +2. Sort the output file +3. Filter cDNA alignments in psl format using pslCDnaFilter + +Licence +------- +Please note that commercial download and installation of the Blat and In-Silico PCR software may be licensed through Kent Informatics (http://www.kentinformatics.com). +
--- a/blat.xml Tue Jan 31 18:31:42 2017 -0500 +++ b/blat.xml Wed Feb 01 17:16:02 2017 -0500 @@ -1,5 +1,6 @@ +<?xml version="1.0"?> <tool id="ucsc_blat" name="UCSC BLAT Alignment Tool" version="1.0"> - <description>Rapidly align sequences to the genome</description> + <description>Standalone blat sequence search command line tool</description> <requirements> <requirement type="package" version="1.0">ucsc_tools_340_for_BLAT</requirement> </requirements> @@ -13,8 +14,46 @@ -mask=$mask '${database}' '${query}' - '${output}' - + output + && sort -k 10,10 -k 12,12n output > '${output_sorted}' + && pslReps -minAli=0.25 '${output_sorted}' output.reps.psl output.reps.psr + && faPolyASizes '${query}' query.polyA + #if $filter_param.filter =="yes" + && pslCDnaFilter + #if $filter_param.assembly_type == "native" + -localNearBest=0.001 + #if $filter_param.assembly_category == "finished" + -minId=0.95 + -minCover=0.25 + #else if $filter_param.assembly_category == "well-ordered" + -minId=0.95 + -minCover=0.15 + #else + -minId=0.94 + -minAlnSize=80 + #end if + #else + -localNearBest=0.010 + #if $filter_param.assembly_category == "finished" + -minId=0.35 + -minCover=0.25 + #else if $filter_param.assembly_category == "well-ordered" + -minId=0.35 + -minCover=0.15 + #else + -minId=0.33 + -minAlnSize=80 + #end if + #end if + -minQSize=20 + -ignoreIntrons + -repsAsMatch + -ignoreNs + -bestOverlap + -polyASizes=query.polyA + output.reps.psl + '${output_filtered}' + #end if ]]></command> <inputs> <param type="data" name="database" format="fasta" /> @@ -40,93 +79,27 @@ <option value="out">out - mask according to database.out RepeatMasker .out file</option> <option value="file.out">file.out - mask database according to RepeatMasker file.out</option> </param> - <!--<conditional name="database" format="fasta"> - <param type="select" name="database_type" format="text" multiple="false" label="database type" help="Choose your database type, the default is dna"> - <option value="dna">DNA sequence</option> - <option value="prot">protein sequence</option> - <option value="dnax">DNA sequence translated in six frames to protein</option> + <conditional name="filter_param"> + <param name="filter" type="select" label="Filter BLAT results with pslCDnaFilter"> + <option value="no" selected="true">No</option> + <option value="yes">Yes</option> </param> - <when value="dna"> - <param type="integer" name="tileSize" value="11" min="1" max="12" label="tileSize" help="Sets the size of match that triggers an alignment. Usually between 8 and 12">tileSize</param> - <param name="minMatch" type="integer" value="2" label="Sets the number of tile matches. Usually set from 2 to 4. - Default is 2 for nucleotide, 1 for protein.">-minMatch</param> - <param name="minIdentity" type="integer" value="90" label="Sets minimum sequence identity (in percent). Default is - 90 for nucleotide searches, 25 for protein or translated - protein searches.">-minIdentity</param> - - </when> - <when value="prot"> - <param type="integer" name="tileSize" value="5" min="1" max="12" label="tileSize" help="Sets the size of match that triggers an alignment. Usually between 8 and 12">tileSize</param> - <param name="minMatch" type="integer" value="1" label="Sets the number of tile matches. Usually set from 2 to 4. - Default is 2 for nucleotide, 1 for protein.">-minMatch</param> - <param name="minIdentity" type="integer" value="25" label="Sets minimum sequence identity (in percent). Default is - 90 for nucleotide searches, 25 for protein or translated - protein searches.">-minIdentity</param> - </when> - </conditional> - <param type="select" name="query_type" format="text" multiple="false" label="query type" help="Choose your query type, the default is dna"> - <option value="dna">DNA sequence</option> - <option value="rna">RNA sequence</option> - <option value="prot">protein sequence</option> - <option value="dnax">DNA sequence translated in six frames to protein</option> - <option value="rnax">DNA sequence translated in three frames to protein</option> - </param> - <conditional name="settings"> - <param name="advanced" type="select" multiple="false" label="Specify advanced parameters"> - <option value="simple" selected="true">No, use program defaults. </option> - <option value="advanced">Yes, see full parameter list.</option> - </param> - <when value="advanced"> - <param name="mask" type="select" label="Mask out repeats" help="Alignments won't be started in masked region - but may extend through it in nucleotide searches. Masked areas - are ignored entirely in protein or translated searches."> - <option value="lower">lower - mask out lower-cased sequence</option> - <option value="upper">upper - mask out upper-cased sequence</option> - <option value="out">out - mask according to database.out RepeatMasker .out file</option> - <option value="file.out">file.out - mask database according to RepeatMasker file.out</option> - </param> - <param name="qmask" type="select" label="Mask out repeats in query sequence" help="Similar to -mask above, but - for query rather than target sequence."> - <option value="lower">lower - mask out lower-cased sequence</option> - <option value="upper">upper - mask out upper-cased sequence</option> - <option value="out">out - mask according to database.out RepeatMasker .out file</option> - <option value="file.out">file.out - mask database according to RepeatMasker file.out</option> - </param> - <param name="oneOff" type="integer" value="0" label="If set to 1, this allows one mismatch in tile and still triggers an alignment. Default is 0.">-oneOff</param> - <param name="minScore" type="integer" value="30" label="Sets minimum score. This is the matches minus the - mismatches minus some sort of gap penalty. Default is 30.">-minScore</param> - <param name="maxGap" type="integer" value="2" label="Sets the size of maximum gap between tiles in a clump. Usually - set from 0 to 3. Default is 2. Only relevent for minMatch > 1.">-maxGap</param> - <param name="minRepDivergence" type="integer" value="15" min="0" max="100" label="Minimum percent divergence of repeats to allow - them to be unmasked. Default is 15. Only relevant for - masking using RepeatMasker .out files.">-minRepDivergence</param> - <param name="noHead" type="boolean" value="false" label="Suppresses .psl header (so it's just a tab-separated file)." /> - <param name="dots" type="integer" value="0" label="Output dot every N sequences to show program's progress." /> - <param name="trimT" type="boolean" value="false" label="Trim leading poly-T." /> - <param name="trimHardA" type="boolean" value="false" label="Remove poly-A tail from qSize as well as alignments in - psl output." /> - <param name="fastMap" type="boolean" value="false" label="Run for fast DNA/DNA remapping - not allowing introns, - requiring high %ID. Query sizes must not exceed 5000." /> - <param name="fine" type="boolean" value="false" label="For high-quality mRNAs, look harder for small initial and - terminal exons. Not recommended for ESTs." /> - <param name="out" type="select" label="Output file format"> - <option value="psl" selected="true">psl - Default. Tab-separated format, no sequence</option> - <option value="pslx">pslx - Tab-separated format with sequence</option> - <option value="axt">axt - blastz-associated axt format</option> - <option value="maf">maf - multiz-associated maf format</option> - <option value="sim4">sim4 - similar to sim4 format</option> - <option value="wublast">wublast - similar to wublast format</option> - <option value="blast">blast - similar to NCBI blast format</option> - <option value="blast8">blast8- NCBI blast tabular format</option> - <option value="blast9">blast9 - NCBI blast tabular format with comments</option> - </param> - <param name="maxIntro" type="integer" value="750000" label="Sets maximum intron size. Default is 750000." /> - <param name="extendThroughN" type="boolean" value="false" label="Allows extension of alignment through large blocks of Ns." /> - </when> - </conditional>--> + <when value="yes"> + <param name="assembly_type" type="select" label="Choose your type of cDNA sequence"> + <option value="native">Same species</option> + <option value="xeno">Across species</option> + </param> + <param name="assembly_category" type="select" label="Choose your genome assembly category"> + <option value="finished">finished assemblies (high quality)</option> + <option value="well-ordered">well-ordered assemblies (well ordered, whole genome shotgun)</option> + <option value="low-coverage">low-coverage assemblies (low coverage (< 4x"), lots of contigs, N50 scaffold size < 1mb) </option> + </param> + </when> + </conditional> </inputs> <outputs> - <data format="psl" name="output"></data> + <data format="psl" name="output_sorted"></data> + <data format="psl" name="output_filtered"></data> </outputs> <tests> <test> @@ -136,9 +109,64 @@ <param name="query_type" value="rnax" /> <param name="noHead" value="true" /> <param name="mask" value="lower" /> - <output name="output" value="amaVit1_Gallus_gallus.psl" /> + <param name="filter" value="yes" /> + <param name="assembly_type" value="xeno" /> + <param name="assembly_category" value="well-ordered" /> + <output name="output_sorted" value="amaVit1_Gallus_gallus.psl" /> + <output name="output_filtered" value="amaVit1_Gallus_gallus_filtered.psl" /> </test> - </tests> + </tests> + <help> + <![CDATA[ +BLAT +==== +BLAT is a bioinformatics software a tool which performs rapid mRNA/DNA and cross-species protein alignments. + +blat (version: v340)- Standalone blat sequence search command line tool. +--------------------------------------------------------- +usage: +++++++ + blat database query [-ooc=11.ooc] output.psl +where: + database and query are each either a .fa, .nib or .2bit file, + or a list of these files with one file name per line. + -ooc=11.ooc tells the program to load over-occurring 11-mers from + an external file. This will increase the speed + by a factor of 40 in many cases, but is not required. + output.psl is the name of the output file. +documentation: +++++++++++++++ +See Blat documentation (http://genome.ucsc.edu/goldenPath/help/blatSpec.html) +Source code: +++++++++++++ +http://hgdownload.cse.ucsc.edu/admin/exe/ +pslCDnaFilter (version: v340) +--------------------------- +Filter cDNA alignments in psl format. Filtering criteria are comparative, selecting near best in genome alignments for each given cDNA and non-comparative, based only on the quality of an individual alignment. +usage: +++++++ + pslCDnaFilter [options] inPsl outPsl +Source code: +++++++++++++ +http://hgdownload.cse.ucsc.edu/admin/exe/ + +Licence +======= +Please note that commercial download and installation of the Blat and In-Silico PCR software may be licensed through Kent Informatics (http://www.kentinformatics.com). +]]> +</help> +<citations> + <citation type="bibtex">@article{kent2002blat, + title={BLAT—the BLAST-like alignment tool}, + author={Kent, W James}, + journal={Genome research}, + volume={12}, + number={4}, + pages={656--664}, + year={2002}, + publisher={Cold Spring Harbor Lab} + }</citation> +</citations> </tool>
--- a/test-data/amaVit1_Gallus_gallus.psl Tue Jan 31 18:31:42 2017 -0500 +++ b/test-data/amaVit1_Gallus_gallus.psl Wed Feb 01 17:16:02 2017 -0500 @@ -1,12 +1,12 @@ 2134 377 0 62 6 361 28 33978 ++ NM_001006176 3248 296 3230 LMAW01000001.1 10443977 10027590 10064141 29 110,48,10,76,75,152,66,51,87,83,55,56,148,96,63,24,177,107,83,145,109,83,145,90,117,129,68,93,27, 296,406,454,502,578,653,844,910,961,1048,1131,1186,1242,1390,1486,1609,1741,1918,2025,2108,2253,2362,2445,2590,2680,2797,2926,3034,3203, 10027590,10031154,10031353,10031517,10033261,10033696,10034454,10035646,10043484,10043794,10059668,10059824,10059973,10060798,10060979,10061265,10061487,10061740,10061947,10062143,10062365,10062584,10062743,10063001,10063188,10063383,10063616,10063806,10064114, +1247 370 0 44 6 280 17 96639 +- NM_001012804 4592 121 2062 LMAW01000001.1 10443977 517465 615765 18 119,32,74,112,2,87,150,114,39,171,3,94,58,142,81,107,137,139, 121,240,272,355,467,469,598,757,1066,1129,1300,1303,1397,1455,1597,1679,1786,1923, 9828212,9828431,9869264,9881475,9881600,9882641,9893172,9893708,9899022,9904744,9904997,9908755,9909749,9911745,9915154,9915481,9920844,9926373, 420 114 0 0 4 99 3 7093 ++ NM_001031013 971 69 702 LMAW01000002.1 5883884 3610886 3618513 5 33,66,21,276,138, 69,138,232,285,564, 3610886,3610955,3617941,3618099,3618375, -189 73 0 0 4 773 4 13674 ++ NM_204766 5992 553 1588 LMAW01000003.1 4188689 2559457 2573393 5 84,66,33,13,66, 553,739,844,1457,1522, 2559457,2563244,2563349,2570767,2573327, -143 58 0 19 3 659 4 9089 ++ NM_204766 5992 574 1453 LMAW01000003.1 4188689 259524 268833 5 27,80,11,66,36, 574,700,780,847,1417, 259524,261255,261529,262379,268797, -3724 1107 0 326 14 609 44 22663 ++ NM_204766 5992 49 5815 LMAW01000002.1 5883884 103604 131424 45 51,78,132,3,152,2,29,75,90,63,1,95,81,3,136,119,150,2,169,180,123,48,90,117,123,134,241,261,177,146,90,72,267,129,81,2,196,348,116,193,106,123,177,99,87, 49,154,256,388,391,543,545,574,691,781,844,845,967,1048,1051,1187,1306,1456,1458,1627,1807,1960,2008,2098,2215,2338,2472,2713,2974,3151,3298,3388,3493,3778,3943,4024,4026,4222,4696,4812,5051,5212,5335,5608,5728, 103604,104407,107764,107909,108002,108233,108257,109261,112047,112967,113036,113148,114257,114391,114866,116183,116438,116593,116908,117767,118030,118682,120119,120672,121329,122185,122724,123518,123945,124449,124904,125793,125898,126790,127405,127543,127717,128055,129238,129442,129761,130020,130254,131103,131337, -537 234 0 42 7 1113 7 34987 ++ NM_204766 5992 262 2188 LMAW01000001.1 10443977 9729332 9765132 8 108,153,66,93,39,171,99,84, 262,388,574,688,1414,1459,1981,2104, 9729332,9734141,9751019,9755220,9762042,9762555,9764665,9765048, -1247 370 0 44 6 280 17 96639 +- NM_001012804 4592 121 2062 LMAW01000001.1 10443977 517465 615765 18 119,32,74,112,2,87,150,114,39,171,3,94,58,142,81,107,137,139, 121,240,272,355,467,469,598,757,1066,1129,1300,1303,1397,1455,1597,1679,1786,1923, 9828212,9828431,9869264,9881475,9881600,9882641,9893172,9893708,9899022,9904744,9904997,9908755,9909749,9911745,9915154,9915481,9920844,9926373, 2710 404 0 198 24 2053 34 11593 +- NM_001292086 6644 10 5375 LMAW01000003.1 4188689 2820233 2835138 37 64,727,107,5,90,146,87,93,135,137,171,93,72,90,69,90,23,34,9,72,125,70,106,21,23,161,26,29,93,15,45,18,11,12,54,162,27, 10,74,801,908,913,1003,1149,1236,1329,1464,1617,1792,1885,2049,2163,2350,2918,3036,3070,3366,3445,3576,3647,3794,3985,4008,4236,4263,4476,4603,4688,4742,4882,4941,5031,5113,5348, 1353551,1355131,1356116,1356798,1356971,1357445,1359020,1359444,1359816,1360523,1360971,1361443,1365082,1365245,1365351,1365525,1366096,1366220,1366255,1366474,1366549,1366674,1366749,1366889,1367081,1367105,1367332,1367358,1367558,1367688,1367786,1367837,1367979,1368029,1368113,1368194,1368429, 200 50 0 0 0 0 1 24 +- NM_001292086 6644 481 731 LMAW01000003.1 4188689 2832850 2833124 2 110,140, 481,591, 1355565,1355699, 182 48 0 20 1 24 0 0 +- NM_001292086 6644 508 782 LMAW01000003.1 4188689 2832901 2833151 2 102,148, 508,634, 1355538,1355640, 92 25 0 0 0 0 1 183 +- NM_001292086 6644 562 679 LMAW01000003.1 4188689 2832851 2833151 2 43,74, 562,605, 1355538,1355764, 49 11 0 0 0 0 0 0 +- NM_001292086 6644 619 679 LMAW01000003.1 4188689 2833085 2833145 1 60, 619, 1355544, +3724 1107 0 326 14 609 44 22663 ++ NM_204766 5992 49 5815 LMAW01000002.1 5883884 103604 131424 45 51,78,132,3,152,2,29,75,90,63,1,95,81,3,136,119,150,2,169,180,123,48,90,117,123,134,241,261,177,146,90,72,267,129,81,2,196,348,116,193,106,123,177,99,87, 49,154,256,388,391,543,545,574,691,781,844,845,967,1048,1051,1187,1306,1456,1458,1627,1807,1960,2008,2098,2215,2338,2472,2713,2974,3151,3298,3388,3493,3778,3943,4024,4026,4222,4696,4812,5051,5212,5335,5608,5728, 103604,104407,107764,107909,108002,108233,108257,109261,112047,112967,113036,113148,114257,114391,114866,116183,116438,116593,116908,117767,118030,118682,120119,120672,121329,122185,122724,123518,123945,124449,124904,125793,125898,126790,127405,127543,127717,128055,129238,129442,129761,130020,130254,131103,131337, +537 234 0 42 7 1113 7 34987 ++ NM_204766 5992 262 2188 LMAW01000001.1 10443977 9729332 9765132 8 108,153,66,93,39,171,99,84, 262,388,574,688,1414,1459,1981,2104, 9729332,9734141,9751019,9755220,9762042,9762555,9764665,9765048, +189 73 0 0 4 773 4 13674 ++ NM_204766 5992 553 1588 LMAW01000003.1 4188689 2559457 2573393 5 84,66,33,13,66, 553,739,844,1457,1522, 2559457,2563244,2563349,2570767,2573327, +143 58 0 19 3 659 4 9089 ++ NM_204766 5992 574 1453 LMAW01000003.1 4188689 259524 268833 5 27,80,11,66,36, 574,700,780,847,1417, 259524,261255,261529,262379,268797,
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/amaVit1_Gallus_gallus_filtered.psl Wed Feb 01 17:16:02 2017 -0500 @@ -0,0 +1,5 @@ +2134 377 0 62 6 361 28 33978 ++ NM_001006176 3248 296 3230 LMAW01000001.1 10443977 10027590 10064141 29 110,48,10,76,75,152,66,51,87,83,55,56,148,96,63,24,177,107,83,145,109,83,145,90,117,129,68,93,27, 296,406,454,502,578,653,844,910,961,1048,1131,1186,1242,1390,1486,1609,1741,1918,2025,2108,2253,2362,2445,2590,2680,2797,2926,3034,3203, 10027590,10031154,10031353,10031517,10033261,10033696,10034454,10035646,10043484,10043794,10059668,10059824,10059973,10060798,10060979,10061265,10061487,10061740,10061947,10062143,10062365,10062584,10062743,10063001,10063188,10063383,10063616,10063806,10064114, +1247 370 0 44 6 280 17 96639 +- NM_001012804 4592 121 2062 LMAW01000001.1 10443977 517465 615765 18 119,32,74,112,2,87,150,114,39,171,3,94,58,142,81,107,137,139, 121,240,272,355,467,469,598,757,1066,1129,1300,1303,1397,1455,1597,1679,1786,1923, 9828212,9828431,9869264,9881475,9881600,9882641,9893172,9893708,9899022,9904744,9904997,9908755,9909749,9911745,9915154,9915481,9920844,9926373, +420 114 0 0 4 99 3 7093 ++ NM_001031013 971 69 702 LMAW01000002.1 5883884 3610886 3618513 5 33,66,21,276,138, 69,138,232,285,564, 3610886,3610955,3617941,3618099,3618375, +2710 404 0 198 24 2053 34 11593 +- NM_001292086 6644 10 5375 LMAW01000003.1 4188689 2820233 2835138 37 64,727,107,5,90,146,87,93,135,137,171,93,72,90,69,90,23,34,9,72,125,70,106,21,23,161,26,29,93,15,45,18,11,12,54,162,27, 10,74,801,908,913,1003,1149,1236,1329,1464,1617,1792,1885,2049,2163,2350,2918,3036,3070,3366,3445,3576,3647,3794,3985,4008,4236,4263,4476,4603,4688,4742,4882,4941,5031,5113,5348, 1353551,1355131,1356116,1356798,1356971,1357445,1359020,1359444,1359816,1360523,1360971,1361443,1365082,1365245,1365351,1365525,1366096,1366220,1366255,1366474,1366549,1366674,1366749,1366889,1367081,1367105,1367332,1367358,1367558,1367688,1367786,1367837,1367979,1368029,1368113,1368194,1368429, +3724 1107 0 326 14 609 44 22663 ++ NM_204766 5992 49 5815 LMAW01000002.1 5883884 103604 131424 45 51,78,132,3,152,2,29,75,90,63,1,95,81,3,136,119,150,2,169,180,123,48,90,117,123,134,241,261,177,146,90,72,267,129,81,2,196,348,116,193,106,123,177,99,87, 49,154,256,388,391,543,545,574,691,781,844,845,967,1048,1051,1187,1306,1456,1458,1627,1807,1960,2008,2098,2215,2338,2472,2713,2974,3151,3298,3388,3493,3778,3943,4024,4026,4222,4696,4812,5051,5212,5335,5608,5728, 103604,104407,107764,107909,108002,108233,108257,109261,112047,112967,113036,113148,114257,114391,114866,116183,116438,116593,116908,117767,118030,118682,120119,120672,121329,122185,122724,123518,123945,124449,124904,125793,125898,126790,127405,127543,127717,128055,129238,129442,129761,130020,130254,131103,131337,
--- a/tool_dependencies.xml Tue Jan 31 18:31:42 2017 -0500 +++ b/tool_dependencies.xml Wed Feb 01 17:16:02 2017 -0500 @@ -1,8 +1,11 @@ <?xml version="1.0"?> <tool_dependency> - <!-- UCSC tools: + <!-- required tools: - blat - - + - sort + - pslReps + - faPolyASizes + - pslCDnaFilter --> <package name="ucsc_tools_340_for_BLAT" version="1.0"> <install version="1.0">