# HG changeset patch # User iuc # Date 1423659163 18000 # Node ID b0e3a2df7897c7e2f8ca3e0d04102805f7e454ea Uploaded diff -r 000000000000 -r b0e3a2df7897 .shed.yml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/.shed.yml Wed Feb 11 07:52:43 2015 -0500 @@ -0,0 +1,3 @@ +# repository published to https://toolshed.g2.bx.psu.edu/repos/iuc/pear +owner: iuc +name: pear diff -r 000000000000 -r b0e3a2df7897 pear.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pear.xml Wed Feb 11 07:52:43 2015 -0500 @@ -0,0 +1,204 @@ + + Paired-End read merger + + + pear + + + + + + + + + 0: + --max-asm-length $max_assembly_length + #end if + --min-asm-length $min_assembly_length + --min-trim-length $min_trim_length + --quality-theshold $quality_threshold + --max-uncalled-base $max_uncalled_base + --test-method $test_method + --empirical-freqs $empirical_freqs + -j "\${GALAXY_SLOTS:-8}" + --score-method $score_method + --cap $cap + $nbase +]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 'assembled' in outputs + + + 'forward' in outputs + + + 'reverse' in outputs + + + 'discarded' in outputs + + + + + + + + + + + + + + + + Forward paired-end FASTQ file. + -r, --reverse-fastq Reverse paired-end FASTQ file. + +Optional: + -p, --p-value Specify a p-value for the statistical test. If the computed + p-value of a possible assembly exceeds the specified p-value + then paired-end read will not be assembled. Valid options + are: 0.0001, 0.001, 0.01, 0.05 and 1.0. Setting 1.0 disables + the test. (default: 0.01) + -v, --min-overlap Specify the minimum overlap size. The minimum overlap may be + set to 1 when the statistical test is used. However, further + restricting the minimum overlap size to a proper value may + reduce false-positive assembles. (default: 10) + -m, --max-assembly-length Specify the maximum possible length of the assembled + sequences. Setting this value to 0 disables the restriction + and assembled sequences may be arbitrary long. (default: 0) + -n, --min-assembly-length Specify the minimum possible length of the assembled + sequences. Setting this value to 0 disables the restriction + and assembled sequences may be arbitrary short. (default: + 50) + -t, --min-trim-length Specify the minimum length of reads after trimming the low + quality part (see option -q). (default: 1) + -q, --quality-threshold Specify the quality score threshold for trimming the low + quality part of a read. If the quality scores of two + consecutive bases are strictly less than the specified + threshold, the rest of the read will be trimmed. (default: + 0) + -u, --max-uncalled-base Specify the maximal proportion of uncalled bases in a read. + Setting this value to 0 will cause PEAR to discard all reads + containing uncalled bases. The other extreme setting is 1 + which causes PEAR to process all reads independent on the + number of uncalled bases. (default: 1) + -g, --test-method Specify the type of statistical test. Two options are + available. (default: 1) + 1: Given the minimum allowed overlap, test using the highest + OES. Note that due to its discrete nature, this test usually + yields a lower p-value for the assembled read than the cut- + off (specified by -p). For example, setting the cut-off to + 0.05 using this test, the assembled reads might have an + actual p-value of 0.02. + + 2. Use the acceptance probability (m.a.p). This test methods + computes the same probability as test method 1. However, it + assumes that the minimal overlap is the observed overlap + with the highest OES, instead of the one specified by -v. + Therefore, this is not a valid statistical test and the + 'p-value' is in fact the maximal probability for accepting + the assembly. Nevertheless, we observed in practice that for + the case the actual overlap sizes are relatively small, test + 2 can correctly assemble more reads with only slightly + higher false-positive rate. + -e, --empirical-freqs Disable empirical base frequencies. (default: use empirical + base frequencies) + -s, --score-method Specify the scoring method. (default: 2) + 1. OES with +1 for match and -1 for mismatch. + 2: Assembly score (AS). Use +1 for match and -1 for mismatch + multiplied by base quality scores. + 3: Ignore quality scores and use +1 for a match and -1 for a + mismatch. + -c, --cap Specify the upper bound for the resulting quality score. If + set to zero, capping is disabled. (default: 40) + -z, --nbase When merging a base-pair that consists of two non-equal + bases out of which none is degenerate, set the merged base + to N and use the highest quality score of the two bases + + + + +]]> + + + 10.1093/bioinformatics/btt593 + + diff -r 000000000000 -r b0e3a2df7897 test-data/forward.fastq --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/forward.fastq Wed Feb 11 07:52:43 2015 -0500 @@ -0,0 +1,56 @@ +@EU861894-140/1 +CAACCATTCCACTCGCCTCTGCCGCACTCGAGGCCCCCAGTATGAGAGGCAGTTACGAGGTTGAACCTCAGAATCTCACCCCACACTTAAGGGCGCGCCT ++ +@#CAAF4FHFG;DIJJI?JG#GIIIDECD#4H*1IBIIIH#?IDID*DFIIF?C#HGBJICJBC#JEGEA>ECH#=7#DDF@##D#D<@C#CC@#C?D@, +@EU861894-138/1 +TCCGAGGTTGAGCCTCAGAATTTCACCCCCCACTTAACGGCCCGACTCCGCGCTCCTTACGCCCAGTAAGTTCGGACAACGCTGGCCCCCTACGTACTAC ++ +@CCFFFBFFDHFHJJJ+#C@B9DEFDD@ED#EDBD> +@EU861894-134/1 +TTGTGCAAGAGTCCTAATTGCTGCCCCCCGTAGGAGTCTGGGCCGTATCTCAGTCCCAATCCGGCTGTTCGTCCCCTCAGACCATGTACCCGTAGATGCC ++ +=CCFDFFFA?#BHIJJJCIJJJJD;EEJB#@2GFIE#>A;C@FCA@##BD>AD=D#D?##A, +@EU861894-132/1 +CGGAACACCCATGGCGAAGGCAGCTCGCTGGCCTGCAACTGACCCTGAGGCGCAAAAGCCGGGGGATCAAACAGGTTTATATACCCTGGTAGTCCCGAGC ++ +CCC4FFDFFFHH#AIJDEDBC#H;#E8A(?>##D +@EU861894-130/1 +TCGTCACTAACCGAAGAACCTTACCAGGGTTTGACATCCCCGGCCAGCACGAGAGATCGGGTGGTGCCCGCAAGGGGACCGGGTGACAGGTGTTGACTAG ++ +:B@FDFDFHCA@:CDFDEDAA#DDF:-D, +@EU861894-126/1 +GGAATGGTCGGTGTAGCGGTGAAATGCGTAGATATCGATCGGAACACCCATGGCTAAGGTAGCTTGCTGGCGTGCAACAGACGCTCAGGCGCAAAAGCCG ++ +BC?#FF=FBFHHHFGIGEIJ@IJJIFI@HJI9IJ;GHBGIIJGHDC<>#9E7@CG#D(BHED#=;CD:;#>C#C?B, +@EU861894-124/1 +CTGTGCCGCAGCTAACCCAAAAAGTATTCCGCCTGGGGAGTACGGCCGCAAGGTTGAAACTCAACGGAATTGACGGGGAACCGCACAAGCGGTGGACCAT ++ +CCCFFFFD,GBBD@8D>:BDD<###, +@EU861894-120/1 +CTGTCACCCGGTCCGCTTGCGGGGAACACCCGATCTCTCGCGCTGTCCGGGGATGTCAAACCCTGGTAATGTTCTTCCGTTAGTGACGAATTAATGCAGA ++ +@C@FAFFFAHHHFA#JJ#J:IJ?H:HJJH9IJ#9I?IG:J#*IEBIC;I?AEJDIJJHHCICCHDJJ;C##C2EF?B#C>?DD6(B+#(D@E:DCDCC#, +@EU861894-118/1 +CGAGCGCAACCCTGACCCAATGTTGGCAGCGTAAAGTCGGCAACTCTTTGGGAACTGCCCGTGGAAGCGGGAGGAAGGTGGGGATGACGTCAAATAGGCA ++ +@?;FF?EFFHHHH#HEAIGJ@J1JIJIJJGDGIF7GGH=J#JGIJJ#:GFJID#FJJ#JID4?#DBDJHEFID;EA5EDD;C?CCDCBDA>:CB>#DD#C +@EU861894-116/1 +GAGGCAGTTCCGAGGTTGAGCCTCAGAATTTCACCCCCCACTTAAGGGCCCGCCTACGCCCTATTTACGCCCATTAAATTCGGACAACGCTGGCCGCCTA ++ +C@@FFFD2GHHFFICFAGJJI3JJIIII8II#<*JJH#JJIID(EGEGJ;GE@D7IJHF#B;#C?H@GH;FCI#DD@DE6CCDCDDC<#ACD4 +@EU861894-114/1 +TTACTAGGGATTCCGGCTTCATGCAGGCGAGTTGCAGCCTGCAATCCGAACTGAGACCATATTTGTGCGATTGGCTCCTCTACGCGGCATAGCGTCGCGC ++ +@C:FFDD#BHHAAIJ(GIFGIJGHBHDI9ICHGE7JHEJI4IJBBIDIAHJIBCBFIF#J=BDFFDD8HCHJ8E:@@C#D##BA@DD5=E#A5B#D#B#F diff -r 000000000000 -r b0e3a2df7897 test-data/pear_assembled_results1.fastq --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/pear_assembled_results1.fastq Wed Feb 11 07:52:43 2015 -0500 @@ -0,0 +1,56 @@ +@EU861894-140/1 +CAACCATTCCACTCGCCTCTGCCGCACTCGAGGCCCCCAGTATGAGAGGCAGTTACGAGGTTGAGCCTCAGAATTTCTCCCCCCACTTAAGGGCCCGCCTACTCGCTCTTTACGACCAGTAAAATCGGTCAAAGCTCACCCCCTACGTATTACCGC ++ +@#CAAF4FHFG;DIJJI?JG#GIIIDECD#4H*1IBIIIH#?IDID*DFIIF?C#HjZmKE^ed@LEiedWfekA=7#iFmcD=W%Qc\g7ekeDjf_bU)G#EE=?'>:EHDG#CIHHJIH!#J:BI#JJH#IEGGGGCGJJH@*GJ;J#JJJD;EEiaAd4dFIG%ad^fciCdc6=ej_d^dlClbDAAT#7J#C>#J#F!#JDIBI(GGIIJCJHEH:JJ#IHJF=HJJHG+HIJI)kD_hQICPaCfaC`ERFfg@mAfjmeddfFPIIGI#HIG#HCI??JDIHIJBAG?F3?JJAJ*JJ!FG:?DD1HFDADF@@= +@EU861894-126/1 +GGAATGGTCGGTGTAGCGGTGAAATGCGTAGATATCGATCGGAACACCCATGGCGAAGGCAGCTCGCTGGCCTGCAACTGACGCTGAGGCGCAAAAGCCGGGGGACCAACCAGGATTAGATACTCTGGTAGTCCCGGCCGTAAACGATGAATA ++ +BC?#FF=FBFHHHFGIGEIJ@IJJIFI@HJI9IJ;GHBGIIJGE;g7adIEkLfk_gFbbC][aDfeDlYkQHFGG'(HG?#@F2JGIIEDCJG>!J3EFGEJ3EGJGJHAJHDHF?:CADF8C@ +@EU861894-124/1 +CTGTGCCGCAGCTAACCCAAAAAGTATTCCGCCTGGGGAGTACGGCCGCAAGGTTGAAACTCAAAGGAATTGACGGGGGCCCGCACAAGCGGTGGAGCATCTGCATTAATTCGTTACTAACCTAAGAACCTTAGCAGGGTTTGACATCCCCG ++ +CCCFFFFD,GBdlclfkD`kAiiCD5hZS##HGBGJFFI6HFG#IJF:JHI#FIEJFG2#HI#HJ:GL#NJJG#E@#<:J2JJCAIJEFHHJA2CH!422FF@@+ +@EU861894-120/1 +CTGTCACCCGGTCCGCTTGCGGGGAACACCCGATCTCTCGCGCTGTCCGGGGATGTCAAACCCTGGTAATTTTCTTCGGTTAGTGACGAATTAATGCACATGCTCCTCCCCTTGTGCGGGCCGCCGTCAATTCCTTTGAGTTTCAACCTTGCG ++ +@C@FAFFFAHHHFA#JJ#J:IJ?H:HJJH9IJ#9I?IG:J#*IEBIC;I?AEJJIJljHCkbbf[L^[C##jUYide=i_dheYBcSI(id^BdCll`CE7H=JGA!9G*#>E#HGC@4IF:#DEJCI#GJBAJJEJJGB4J;AJH#EF)#??BHHHHH;F=F=CC8 diff -r 000000000000 -r b0e3a2df7897 test-data/pear_unassembled_forward_results1.fastq diff -r 000000000000 -r b0e3a2df7897 test-data/reverse.fastq --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/reverse.fastq Wed Feb 11 07:52:43 2015 -0500 @@ -0,0 +1,56 @@ +@EU861894-140/2 +GCGGTAATACGTAGGGGGTGAGCTTTGACCGATTTTACTGGTCGTAAAGAGCGAGTAGGCGGGCCCTTAAGTGGGGGGAATAATTCTGACGCTCAACCTC ++ +;BCFDF#FHHFH)GJJHI##E'?=EE#G)JCB;A#C#E@###9CD#C<##@C +@EU861894-136/2 +TACCGCATGTGGATGTGCTCTGCCCTCAGAGCTTATCCAAAGCCTTCGGGCGCTTCGGGATGGGCACGCGGTCTACCAGCTATTTGGGTGGGTAACGTCC ++ +1B@FDFF:HHHA#AHGGGEG?@J<#J;JG*@HJJGCGGGGE>IFFDJJJCC#GCBHAFEJGIDEG#J;DBG#EDC)#C(GFD#C#@2?#D8CCD53D#AA +@EU861894-134/2 +GCCTTCGGGCACTTCGGGATGGGCCCGCGGCCTTTCAGCTAGTTGGGAGGGTACTGTCACATCAAGACTTCGACGGGTAGCTGCTCTGAGGGTTCGATCA ++ +@@+?+;FDDAHCHCIJFC#J7#I#?DDICIH;DBGD=6DD#DDDDDD####>#EA@@ +@EU861894-132/2 +GCGGCACAGGGGGGGTCGCTACCCCCAGCACCTAGTATTCATCGTTTACGGCCGGGAATACCAGCGTACCTAGTCCTGTTTGCTCCCCCGTCTTTTGCGC ++ +@1@DF=F)HH2H;JGJF3!FF+EIHH+#JC#BEHJ?JGBGFE8HDIFJJF?@GH#@B#B;ECEJ#C:I#D@C#C#A=DC#DDB!5FB+##AD' +@EU861894-130/2 +CAACATTGGGTGAGGGTTGCGCTTGTTGCATGACTTAACCCAACATCTCACGATAGTAGCTGACGACAACTGTGCAACCCCTGTCGACCGGTCCCCTTGC ++ +1@:DFDFDAH#+DGGG:EGFHI;+JJ::H##IF9CAI?IJ#I#D?GJDJBHF)#H##J@GJBADEGH;JC(!=3ACE>#DDE)C#AC +@EU861894-128/2 +CAAGGCCCCCAGTATGGGAGGCAGTTCCGAGGTTGAGCCTCAGAATCTCACCCCCCACTTCAGGGCCCGCCTTCGGTCTCATAACGCCCAGTAACTTCGG ++ +=@@FDADFH1DD?:GF!JJ*JAJJ?3F?GABJIHIDJ??ICH#GIH#IGIIE#I@BJIGD#J@IF#8E=C>D#A6##:ED#D')BBC@;&F?#80D;@@C +@EU861894-126/2 +TATTCATCGTTTACGGCCGGGACTACCAGAGTATCTAATCCTGGTTGGTCCCCCGGCTTTTGCGGCTCAGCGTCAGTTTCAGACCACCGAGCCGCCTTCG ++ +@C8FDAC:?FHDHJAHJGJGE3JEGFE3J!>GJCDEIIGJ2F@#?GH('GGFHFJ;JBCIBGB:#HFFD;DEEHE#BB#C#E#D>#!>A>#D#C##?DD' +@EU861894-124/2 +CGGGGATGTCAAACCCTGCTAAGGTTCTTAGGTTAGTAACGAATTAATGCAGATGCGTCACCGTTTGTGCGGGCCCCCGTTAATTCCTCAGCGTTTCTAC ++ +=C@F#DDDHGHAHB<:JH#IH#2GFJEIF#IHJ:FJI#GFH6IFFJGBGH##H9G5##JF#F;!HGJEGB#ADHH#ECA>(CAFD##D##@6CC5@?##' +@EU861894-122/2 +GTCCGAAGATACTGGGCGTAAAGAGCGCGTACGCGGCCCCTTAAGTGGAGGGTGAAATTCTGAGGCTCAACCTCGGAACTGCCACACATACTGTGGGCTT ++ ++@@FF224!HC2AJHHFEJIACJJ2J:<#@E#GJJC!JG#II)JIIF:#EIBII;JHC.HHFI@DGFDF/D@#D>EAF#C=HF#D##BC77:D#C?D:#A +@EU861894-120/2 +CGCAAGGTTGAAACTCAAAGGAATTGACGGCGGCCCGCACAAGGGGAGGAGCATGTGCGTTAATCCGTCACTAACCGAAGAAAGGTACCAGGGGATGGGA ++ +B1BFFDDBHHHHHB#*G9!AGJ=H7:C>JI#A):EF#GIB;DBEFBG=DFD5DH#!#A5#8?@@C##CC##' +@EU861894-118/2 +CCAGTGTAGCATGTGTGAAGCCCAGGGCGTAAGGGCCATGCCGATTTGACCTCCTCCCCATCTTCCTCCCGTTTACATGGGCACTTCCCAAAGAGTCGCC ++ +@CC++DD?ABHFHA+G#I#J3D2IJGJJCJHJHEJIJGJ4IJII#I?(EA!GH#CGG8E##5JHEHCD@BE#=@H@##CDBC:&DCD>D>?#C:C###C@ +@EU861894-116/2 +AATGACCGTACCACACGAAAAAGCCCTGGATAACTACGTGCCAGCAGCCGCGATAATACGTAGGGGGCCAACGTTGTCCTAAGTGTCTGGGCGTATAAAG ++ +BBCFFD#BFHHH:G#JE@F#J+JJJ@#CJ#HIIGGF#GIGEF=HJ?ECFJBE!BDIGE#EEI=>BHHC@D#G#=A#CFA#DECD##CDBD>#CC?#@#@D +@EU861894-114/2 +CCCTTACGCCCTGGGCCTGATACATGCTACAATGGCCGTTAATCAGAGCTACGCGATGCTACGAGGCGGAGCCAATCGCACAAGTAGGGTCACAGTTCGG ++ +8CC=F=F;HHHHHB??#)FE#HJA;J4BG>BI;@#JFI):GJ#D? + + + + +