# HG changeset patch
# User iuc
# Date 1423659163 18000
# Node ID b0e3a2df7897c7e2f8ca3e0d04102805f7e454ea
Uploaded
diff -r 000000000000 -r b0e3a2df7897 .shed.yml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/.shed.yml Wed Feb 11 07:52:43 2015 -0500
@@ -0,0 +1,3 @@
+# repository published to https://toolshed.g2.bx.psu.edu/repos/iuc/pear
+owner: iuc
+name: pear
diff -r 000000000000 -r b0e3a2df7897 pear.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/pear.xml Wed Feb 11 07:52:43 2015 -0500
@@ -0,0 +1,204 @@
+
+ Paired-End read merger
+
+
+ pear
+
+
+
+
+
+
+
+
+ 0:
+ --max-asm-length $max_assembly_length
+ #end if
+ --min-asm-length $min_assembly_length
+ --min-trim-length $min_trim_length
+ --quality-theshold $quality_threshold
+ --max-uncalled-base $max_uncalled_base
+ --test-method $test_method
+ --empirical-freqs $empirical_freqs
+ -j "\${GALAXY_SLOTS:-8}"
+ --score-method $score_method
+ --cap $cap
+ $nbase
+]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 'assembled' in outputs
+
+
+ 'forward' in outputs
+
+
+ 'reverse' in outputs
+
+
+ 'discarded' in outputs
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Forward paired-end FASTQ file.
+ -r, --reverse-fastq Reverse paired-end FASTQ file.
+
+Optional:
+ -p, --p-value Specify a p-value for the statistical test. If the computed
+ p-value of a possible assembly exceeds the specified p-value
+ then paired-end read will not be assembled. Valid options
+ are: 0.0001, 0.001, 0.01, 0.05 and 1.0. Setting 1.0 disables
+ the test. (default: 0.01)
+ -v, --min-overlap Specify the minimum overlap size. The minimum overlap may be
+ set to 1 when the statistical test is used. However, further
+ restricting the minimum overlap size to a proper value may
+ reduce false-positive assembles. (default: 10)
+ -m, --max-assembly-length Specify the maximum possible length of the assembled
+ sequences. Setting this value to 0 disables the restriction
+ and assembled sequences may be arbitrary long. (default: 0)
+ -n, --min-assembly-length Specify the minimum possible length of the assembled
+ sequences. Setting this value to 0 disables the restriction
+ and assembled sequences may be arbitrary short. (default:
+ 50)
+ -t, --min-trim-length Specify the minimum length of reads after trimming the low
+ quality part (see option -q). (default: 1)
+ -q, --quality-threshold Specify the quality score threshold for trimming the low
+ quality part of a read. If the quality scores of two
+ consecutive bases are strictly less than the specified
+ threshold, the rest of the read will be trimmed. (default:
+ 0)
+ -u, --max-uncalled-base Specify the maximal proportion of uncalled bases in a read.
+ Setting this value to 0 will cause PEAR to discard all reads
+ containing uncalled bases. The other extreme setting is 1
+ which causes PEAR to process all reads independent on the
+ number of uncalled bases. (default: 1)
+ -g, --test-method Specify the type of statistical test. Two options are
+ available. (default: 1)
+ 1: Given the minimum allowed overlap, test using the highest
+ OES. Note that due to its discrete nature, this test usually
+ yields a lower p-value for the assembled read than the cut-
+ off (specified by -p). For example, setting the cut-off to
+ 0.05 using this test, the assembled reads might have an
+ actual p-value of 0.02.
+
+ 2. Use the acceptance probability (m.a.p). This test methods
+ computes the same probability as test method 1. However, it
+ assumes that the minimal overlap is the observed overlap
+ with the highest OES, instead of the one specified by -v.
+ Therefore, this is not a valid statistical test and the
+ 'p-value' is in fact the maximal probability for accepting
+ the assembly. Nevertheless, we observed in practice that for
+ the case the actual overlap sizes are relatively small, test
+ 2 can correctly assemble more reads with only slightly
+ higher false-positive rate.
+ -e, --empirical-freqs Disable empirical base frequencies. (default: use empirical
+ base frequencies)
+ -s, --score-method Specify the scoring method. (default: 2)
+ 1. OES with +1 for match and -1 for mismatch.
+ 2: Assembly score (AS). Use +1 for match and -1 for mismatch
+ multiplied by base quality scores.
+ 3: Ignore quality scores and use +1 for a match and -1 for a
+ mismatch.
+ -c, --cap Specify the upper bound for the resulting quality score. If
+ set to zero, capping is disabled. (default: 40)
+ -z, --nbase When merging a base-pair that consists of two non-equal
+ bases out of which none is degenerate, set the merged base
+ to N and use the highest quality score of the two bases
+
+
+
+
+]]>
+
+
+ 10.1093/bioinformatics/btt593
+
+
diff -r 000000000000 -r b0e3a2df7897 test-data/forward.fastq
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/forward.fastq Wed Feb 11 07:52:43 2015 -0500
@@ -0,0 +1,56 @@
+@EU861894-140/1
+CAACCATTCCACTCGCCTCTGCCGCACTCGAGGCCCCCAGTATGAGAGGCAGTTACGAGGTTGAACCTCAGAATCTCACCCCACACTTAAGGGCGCGCCT
++
+@#CAAF4FHFG;DIJJI?JG#GIIIDECD#4H*1IBIIIH#?IDID*DFIIF?C#HGBJICJBC#JEGEA>ECH#=7#DDF@##D#D<@C#CC@#C?D@,
+@EU861894-138/1
+TCCGAGGTTGAGCCTCAGAATTTCACCCCCCACTTAACGGCCCGACTCCGCGCTCCTTACGCCCAGTAAGTTCGGACAACGCTGGCCCCCTACGTACTAC
++
+@CCFFFBFFDHFHJJJ+#C@B9DEFDD@ED#EDBD>
+@EU861894-134/1
+TTGTGCAAGAGTCCTAATTGCTGCCCCCCGTAGGAGTCTGGGCCGTATCTCAGTCCCAATCCGGCTGTTCGTCCCCTCAGACCATGTACCCGTAGATGCC
++
+=CCFDFFFA?#BHIJJJCIJJJJD;EEJB#@2GFIE#>A;C@FCA@##BD>AD=D#D?##A,
+@EU861894-132/1
+CGGAACACCCATGGCGAAGGCAGCTCGCTGGCCTGCAACTGACCCTGAGGCGCAAAAGCCGGGGGATCAAACAGGTTTATATACCCTGGTAGTCCCGAGC
++
+CCC4FFDFFFHH#AIJDEDBC#H;#E8A(?>##D
+@EU861894-130/1
+TCGTCACTAACCGAAGAACCTTACCAGGGTTTGACATCCCCGGCCAGCACGAGAGATCGGGTGGTGCCCGCAAGGGGACCGGGTGACAGGTGTTGACTAG
++
+:B@FDFDFHCA@:CDFDEDAA#DDF:-D,
+@EU861894-126/1
+GGAATGGTCGGTGTAGCGGTGAAATGCGTAGATATCGATCGGAACACCCATGGCTAAGGTAGCTTGCTGGCGTGCAACAGACGCTCAGGCGCAAAAGCCG
++
+BC?#FF=FBFHHHFGIGEIJ@IJJIFI@HJI9IJ;GHBGIIJGHDC<>#9E7@CG#D(BHED#=;CD:;#>C#C?B,
+@EU861894-124/1
+CTGTGCCGCAGCTAACCCAAAAAGTATTCCGCCTGGGGAGTACGGCCGCAAGGTTGAAACTCAACGGAATTGACGGGGAACCGCACAAGCGGTGGACCAT
++
+CCCFFFFD,GBBD@8D>:BDD<###,
+@EU861894-120/1
+CTGTCACCCGGTCCGCTTGCGGGGAACACCCGATCTCTCGCGCTGTCCGGGGATGTCAAACCCTGGTAATGTTCTTCCGTTAGTGACGAATTAATGCAGA
++
+@C@FAFFFAHHHFA#JJ#J:IJ?H:HJJH9IJ#9I?IG:J#*IEBIC;I?AEJDIJJHHCICCHDJJ;C##C2EF?B#C>?DD6(B+#(D@E:DCDCC#,
+@EU861894-118/1
+CGAGCGCAACCCTGACCCAATGTTGGCAGCGTAAAGTCGGCAACTCTTTGGGAACTGCCCGTGGAAGCGGGAGGAAGGTGGGGATGACGTCAAATAGGCA
++
+@?;FF?EFFHHHH#HEAIGJ@J1JIJIJJGDGIF7GGH=J#JGIJJ#:GFJID#FJJ#JID4?#DBDJHEFID;EA5EDD;C?CCDCBDA>:CB>#DD#C
+@EU861894-116/1
+GAGGCAGTTCCGAGGTTGAGCCTCAGAATTTCACCCCCCACTTAAGGGCCCGCCTACGCCCTATTTACGCCCATTAAATTCGGACAACGCTGGCCGCCTA
++
+C@@FFFD2GHHFFICFAGJJI3JJIIII8II#<*JJH#JJIID(EGEGJ;GE@D7IJHF#B;#C?H@GH;FCI#DD@DE6CCDCDDC<#ACD4
+@EU861894-114/1
+TTACTAGGGATTCCGGCTTCATGCAGGCGAGTTGCAGCCTGCAATCCGAACTGAGACCATATTTGTGCGATTGGCTCCTCTACGCGGCATAGCGTCGCGC
++
+@C:FFDD#BHHAAIJ(GIFGIJGHBHDI9ICHGE7JHEJI4IJBBIDIAHJIBCBFIF#J=BDFFDD8HCHJ8E:@@C#D##BA@DD5=E#A5B#D#B#F
diff -r 000000000000 -r b0e3a2df7897 test-data/pear_assembled_results1.fastq
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/pear_assembled_results1.fastq Wed Feb 11 07:52:43 2015 -0500
@@ -0,0 +1,56 @@
+@EU861894-140/1
+CAACCATTCCACTCGCCTCTGCCGCACTCGAGGCCCCCAGTATGAGAGGCAGTTACGAGGTTGAGCCTCAGAATTTCTCCCCCCACTTAAGGGCCCGCCTACTCGCTCTTTACGACCAGTAAAATCGGTCAAAGCTCACCCCCTACGTATTACCGC
++
+@#CAAF4FHFG;DIJJI?JG#GIIIDECD#4H*1IBIIIH#?IDID*DFIIF?C#HjZmKE^ed@LEiedWfekA=7#iFmcD=W%Qc\g7ekeDjf_bU)G#EE=?'>:EHDG#CIHHJIH!#J:BI#JJH#IEGGGGCGJJH@*GJ;J#JJJD;EEiaAd4dFIG%ad^fciCdc6=ej_d^dlClbDAAT#7J#C>#J#F!#JDIBI(GGIIJCJHEH:JJ#IHJF=HJJHG+HIJI)kD_hQICPaCfaC`ERFfg@mAfjmeddfFPIIGI#HIG#HCI??JDIHIJBAG?F3?JJAJ*JJ!FG:?DD1HFDADF@@=
+@EU861894-126/1
+GGAATGGTCGGTGTAGCGGTGAAATGCGTAGATATCGATCGGAACACCCATGGCGAAGGCAGCTCGCTGGCCTGCAACTGACGCTGAGGCGCAAAAGCCGGGGGACCAACCAGGATTAGATACTCTGGTAGTCCCGGCCGTAAACGATGAATA
++
+BC?#FF=FBFHHHFGIGEIJ@IJJIFI@HJI9IJ;GHBGIIJGE;g7adIEkLfk_gFbbC][aDfeDlYkQHFGG'(HG?#@F2JGIIEDCJG>!J3EFGEJ3EGJGJHAJHDHF?:CADF8C@
+@EU861894-124/1
+CTGTGCCGCAGCTAACCCAAAAAGTATTCCGCCTGGGGAGTACGGCCGCAAGGTTGAAACTCAAAGGAATTGACGGGGGCCCGCACAAGCGGTGGAGCATCTGCATTAATTCGTTACTAACCTAAGAACCTTAGCAGGGTTTGACATCCCCG
++
+CCCFFFFD,GBdlclfkD`kAiiCD5hZS##HGBGJFFI6HFG#IJF:JHI#FIEJFG2#HI#HJ:GL#NJJG#E@#<:J2JJCAIJEFHHJA2CH!422FF@@+
+@EU861894-120/1
+CTGTCACCCGGTCCGCTTGCGGGGAACACCCGATCTCTCGCGCTGTCCGGGGATGTCAAACCCTGGTAATTTTCTTCGGTTAGTGACGAATTAATGCACATGCTCCTCCCCTTGTGCGGGCCGCCGTCAATTCCTTTGAGTTTCAACCTTGCG
++
+@C@FAFFFAHHHFA#JJ#J:IJ?H:HJJH9IJ#9I?IG:J#*IEBIC;I?AEJJIJljHCkbbf[L^[C##jUYide=i_dheYBcSI(id^BdCll`CE7H=JGA!9G*#>E#HGC@4IF:#DEJCI#GJBAJJEJJGB4J;AJH#EF)#??BHHHHH;F=F=CC8
diff -r 000000000000 -r b0e3a2df7897 test-data/pear_unassembled_forward_results1.fastq
diff -r 000000000000 -r b0e3a2df7897 test-data/reverse.fastq
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/reverse.fastq Wed Feb 11 07:52:43 2015 -0500
@@ -0,0 +1,56 @@
+@EU861894-140/2
+GCGGTAATACGTAGGGGGTGAGCTTTGACCGATTTTACTGGTCGTAAAGAGCGAGTAGGCGGGCCCTTAAGTGGGGGGAATAATTCTGACGCTCAACCTC
++
+;BCFDF#FHHFH)GJJHI##E'?=EE#G)JCB;A#C#E@###9CD#C<##@C
+@EU861894-136/2
+TACCGCATGTGGATGTGCTCTGCCCTCAGAGCTTATCCAAAGCCTTCGGGCGCTTCGGGATGGGCACGCGGTCTACCAGCTATTTGGGTGGGTAACGTCC
++
+1B@FDFF:HHHA#AHGGGEG?@J<#J;JG*@HJJGCGGGGE>IFFDJJJCC#GCBHAFEJGIDEG#J;DBG#EDC)#C(GFD#C#@2?#D8CCD53D#AA
+@EU861894-134/2
+GCCTTCGGGCACTTCGGGATGGGCCCGCGGCCTTTCAGCTAGTTGGGAGGGTACTGTCACATCAAGACTTCGACGGGTAGCTGCTCTGAGGGTTCGATCA
++
+@@+?+;FDDAHCHCIJFC#J7#I#?DDICIH;DBGD=6DD#DDDDDD####>#EA@@
+@EU861894-132/2
+GCGGCACAGGGGGGGTCGCTACCCCCAGCACCTAGTATTCATCGTTTACGGCCGGGAATACCAGCGTACCTAGTCCTGTTTGCTCCCCCGTCTTTTGCGC
++
+@1@DF=F)HH2H;JGJF3!FF+EIHH+#JC#BEHJ?JGBGFE8HDIFJJF?@GH#@B#B;ECEJ#C:I#D@C#C#A=D#CCD>C#DDB!5FB+##AD'
+@EU861894-130/2
+CAACATTGGGTGAGGGTTGCGCTTGTTGCATGACTTAACCCAACATCTCACGATAGTAGCTGACGACAACTGTGCAACCCCTGTCGACCGGTCCCCTTGC
++
+1@:DFDFDAH#+DGGG:EGFHI;+JJ::H##IF9CAI?IJ#I#D?GJDJBHF)#H##J@GJBADEGH;JC(!=3ACE>#DDE)C#AC
+@EU861894-128/2
+CAAGGCCCCCAGTATGGGAGGCAGTTCCGAGGTTGAGCCTCAGAATCTCACCCCCCACTTCAGGGCCCGCCTTCGGTCTCATAACGCCCAGTAACTTCGG
++
+=@@FDADFH1DD?:GF!JJ*JAJJ?3F?GABJIHIDJ??ICH#GIH#IGIIE#I@BJIGD#J@IF#8E=C>D#A6##:ED#D')BBC@;&F?#80D;@@C
+@EU861894-126/2
+TATTCATCGTTTACGGCCGGGACTACCAGAGTATCTAATCCTGGTTGGTCCCCCGGCTTTTGCGGCTCAGCGTCAGTTTCAGACCACCGAGCCGCCTTCG
++
+@C8FDAC:?FHDHJAHJGJGE3JEGFE3J!>GJCDEIIGJ2F@#?GH('GGFHFJ;JBCIBGB:#HFFD;DEEHE#BB#C#E#D>#!>A>#D#C##?DD'
+@EU861894-124/2
+CGGGGATGTCAAACCCTGCTAAGGTTCTTAGGTTAGTAACGAATTAATGCAGATGCGTCACCGTTTGTGCGGGCCCCCGTTAATTCCTCAGCGTTTCTAC
++
+=C@F#DDDHGHAHB<:JH#IH#2GFJEIF#IHJ:FJI#GFH6IFFJGBGH##H9G5##JF#F;!HGJEGB#ADHH#ECA>(CAFD##D##@6CC5@?##'
+@EU861894-122/2
+GTCCGAAGATACTGGGCGTAAAGAGCGCGTACGCGGCCCCTTAAGTGGAGGGTGAAATTCTGAGGCTCAACCTCGGAACTGCCACACATACTGTGGGCTT
++
++@@FF224!HC2AJHHFEJIACJJ2J:<#@E#GJJC!JG#II)JIIF:#EIBII;JHC.HHFI@DGFDF/D@#D>EAF#C=HF#D##BC77:D#C?D:#A
+@EU861894-120/2
+CGCAAGGTTGAAACTCAAAGGAATTGACGGCGGCCCGCACAAGGGGAGGAGCATGTGCGTTAATCCGTCACTAACCGAAGAAAGGTACCAGGGGATGGGA
++
+B1BFFDDBHHHHHB#*G9!AGJ=H7:C>JI#A):EF#GIB;DBEFBG=DFD5DH#!#A5#8?@@C##CC##'
+@EU861894-118/2
+CCAGTGTAGCATGTGTGAAGCCCAGGGCGTAAGGGCCATGCCGATTTGACCTCCTCCCCATCTTCCTCCCGTTTACATGGGCACTTCCCAAAGAGTCGCC
++
+@CC++DD?ABHFHA+G#I#J3D2IJGJJCJHJHEJIJGJ4IJII#I?(EA!GH#CGG8E##5JHEHCD@BE#=@H@##CDBC:&DCD>D>?#C:C###C@
+@EU861894-116/2
+AATGACCGTACCACACGAAAAAGCCCTGGATAACTACGTGCCAGCAGCCGCGATAATACGTAGGGGGCCAACGTTGTCCTAAGTGTCTGGGCGTATAAAG
++
+BBCFFD#BFHHH:G#JE@F#J+JJJ@#CJ#HIIGGF#GIGEF=HJ?ECFJBE!BDIGE#EEI=>BHHC@D#G#=A#CFA#DECD##CDBD>#CC?#@#@D
+@EU861894-114/2
+CCCTTACGCCCTGGGCCTGATACATGCTACAATGGCCGTTAATCAGAGCTACGCGATGCTACGAGGCGGAGCCAATCGCACAAGTAGGGTCACAGTTCGG
++
+8CC=F=F;HHHHHB??#)FE#HJA;J4BG>BI;@#JFI):GJ#D?
+
+
+
+
+