# HG changeset patch # User pjbriggs # Date 1526310619 14400 # Node ID a3af1ff4cad1b821d17442b78246aaad82557144 # Parent 3f8bf1a0403b5c09c1811d6e308014ae5b34744a pal_finder 0.02.04.7 for testing. diff -r 3f8bf1a0403b -r a3af1ff4cad1 README.rst --- a/README.rst Thu Mar 22 07:21:26 2018 -0400 +++ b/README.rst Mon May 14 11:10:19 2018 -0400 @@ -63,7 +63,9 @@ 0.02.04.7 - Trap for errors in ``pal_finder_v0.02.04.pl`` resulting in bad ranges being supplied to ``primer3_core`` for some reads via - ``PRIMER_PRODUCT_RANGE_SIZE``. + ``PRIMER_PRODUCT_RANGE_SIZE`` (and enable 'bad' reads to be output + to a dataset); add new option to use a random subset of reads for + microsatellite detection. 0.02.04.6 - Update to get dependencies using ``conda`` when installed from the toolshed (this removes the explicit dependency on Perl 5.16 introduced in 0.02.04.2, as a result the outputs from the tool are diff -r 3f8bf1a0403b -r a3af1ff4cad1 pal_finder_macros.xml --- a/pal_finder_macros.xml Thu Mar 22 07:21:26 2018 -0400 +++ b/pal_finder_macros.xml Mon May 14 11:10:19 2018 -0400 @@ -14,6 +14,7 @@ + @@ -21,6 +22,44 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r 3f8bf1a0403b -r a3af1ff4cad1 pal_finder_wrapper.sh --- a/pal_finder_wrapper.sh Thu Mar 22 07:21:26 2018 -0400 +++ b/pal_finder_wrapper.sh Mon May 14 11:10:19 2018 -0400 @@ -32,6 +32,7 @@ # -primers: run the 'primers' filter option # -occurrences: run the 'occurrences' filter option # -rankmotifs: run the 'rankmotifs' filter option +# --subset N: use a subset of reads of size N # # pal_finder is available from http://sourceforge.net/projects/palfinder/ # @@ -104,7 +105,8 @@ OUTPUT_ASSEMBLY= FILTERED_MICROSATS= FILTER_OPTIONS= -BAD_PRIMER_RANGES= +SUBSET= +RANDOM_SEED=568765 # # Collect command line arguments if [ $# -lt 2 ] ; then @@ -220,6 +222,10 @@ shift OUTPUT_ASSEMBLY=$1 ;; + --subset) + shift + SUBSET=$1 + ;; *) echo Unknown option: $1 >&2 exit 1 @@ -234,6 +240,25 @@ fatal "primer3_core not found" fi # +# Check the n-mers specification +if [ $MIN_6_MER_REPS -ne 0 ] ; then + if [ $MIN_5_MER_REPS -eq 0 ] ; then + fatal "Minimum number of 5-mers cannot be zero if number of 6-mers is non-zero" + fi +fi +if [ $MIN_5_MER_REPS -ne 0 ] ; then + if [ $MIN_4_MER_REPS -eq 0 ] ; then + fatal "Minimum number of 4-mers cannot be zero if number of 5-mers is non-zero" + fi +fi +if [ $MIN_4_MER_REPS -ne 0 ] ; then + if [ $MIN_3_MER_REPS -eq 0 ] ; then + fatal "Minimum number of 3-mers cannot be zero if number of 4-mers is non-zero" + fi +fi +if [ $MIN_2_MER_REPS -eq 0 ] ; then + fatal "Minimum number of 2-mer repeats cannot be zero" +fi # Set up the working dir if [ "$PLATFORM" == "Illumina" ] ; then # Paired end Illumina data as input @@ -253,6 +278,14 @@ PRIMER_MISPRIMING_LIBRARY=$(basename $PRIMER_MISPRIMING_LIBRARY) mkdir Output # +# Use a subset of reads +if [ ! -z "$SUBSET" ] ; then + echo "### Extracting subset of reads ###" + $(dirname $0)/fastq_subset.py -n $SUBSET -s $RANDOM_SEED $fastq_r1 $fastq_r2 + fastq_r1="subset_r1.fq" + fastq_r2="subset_r2.fq" +fi +# # Copy in the default config.txt file echo "### Creating config.txt file for pal_finder run ###" /bin/cp $PALFINDER_DATA_DIR/config.txt . @@ -311,8 +344,13 @@ fi tail -$MAX_LINES pal_finder.log # -# Check that log ends with "Done!!" message -if [ -z "$(tail -n 1 pal_finder.log | grep Done!!)" ] ; then +# Check for success/failure +if [ ! -z "$(tail -n 1 pal_finder.log | grep 'No microsatellites found in any reads. Ending script.')" ] ; then + # No microsatellites found + fatal ERROR pal_finder failed to locate any microsatellites + exit 1 +elif [ -z "$(tail -n 1 pal_finder.log | grep Done!!)" ] ; then + # Log doesn't end with "Done!!" (indicates failure) fatal ERROR pal_finder failed to complete successfully fi echo "### pal_finder finished ###" @@ -321,33 +359,38 @@ echo "### Checking for errors ###" if [ ! -z "$(grep 'primer3_core: Illegal element in PRIMER_PRODUCT_SIZE_RANGE' pal_finder.log)" ] ; then echo WARNING primer3 terminated prematurely due to bad product size ranges + $(find_bad_primer_ranges Output/pr3in.txt bad_primer_ranges.txt) + N_BAD_PRIMERS=$(cat bad_primer_ranges.txt | wc -l) if [ -z "$BAD_PRIMER_RANGES" ] ; then # No output file so report to stderr - cat >&2 <&2 - cat >&2 <"$BAD_PRIMER_RANGES" + /bin/mv bad_primer_ranges.txt "$BAD_PRIMER_RANGES" fi +else + N_BAD_PRIMERS=0 fi # # Sort microsat_summary output echo "### Sorting microsat summary output ###" head -n 7 Output/microsat_summary.txt | sort >microsat_summary.sorted +echo "readsWithBadRanges:"$'\t'"$((N_BAD_PRIMERS * 2))" >>microsat_summary.sorted grep "^$" Output/microsat_summary.txt>>microsat_summary.sorted grep "^Microsat Type" Output/microsat_summary.txt >>microsat_summary.sorted -tail -n +11 Output/microsat_summary.txt >>microsat_summary.sorted +tail -n +11 Output/microsat_summary.txt | sort -r -n -k 5 >>microsat_summary.sorted mv microsat_summary.sorted Output/microsat_summary.txt # # Sort PAL_summary output diff -r 3f8bf1a0403b -r a3af1ff4cad1 pal_finder_wrapper.xml --- a/pal_finder_wrapper.xml Thu Mar 22 07:21:26 2018 -0400 +++ b/pal_finder_wrapper.xml Mon May 14 11:10:19 2018 -0400 @@ -9,7 +9,7 @@ biopython pandaseq - @@ -91,6 +95,13 @@ label="Select FASTQ dataset collection with R1/R2 pair" /> + + + + + + + @@ -106,7 +117,7 @@ - + @@ -158,8 +169,9 @@ label="Maximum acceptable difference between melting temperatures of left and right primers (PRIMER_PAIR_MAX_DIFF_TM)" help="Temperature should be in degrees Celsius" /> + - + @@ -254,18 +266,76 @@ - + + + + + + + + + + + + + + + + + + + + + + + + + - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -300,26 +370,49 @@ ------------- +.. class:: infomark + +**Known issues** + .. class:: warning -**Known problems** - -.. class:: infomark - -**Bad primer product size ranges** +**Low number of reads used for microsatellite detection/bad primer product size ranges** For some datasets pal_finder may generate 'bad' product size ranges (where the lower limit exceeds the upper limit) for one or more reads, for input into -primer3_core. +primer3_core. In these cases primer3_core will terminate prematurely, which can +result in a substantially lower number of reads being used for microsatellite +detection and potentially sub-optimal primer design. + +The number of reads generating the bad size ranges are reported in the +*Summary of microsat types* output dataset as 'readsWithBadRanges'. Ideally +the reported value should be zero. + +The conditions which cause this issue within pal_finder are still unclear, +however we believe it to be associated with short or low quality reads. If this +problem affects your data then: + +* Ensure that the input data are sufficiently trimmed and filtered (using + e.g. the Trimmomatic tool) before rerunning pal_finder. -If this occurs then the tool will terminate with an error. A list of the reads -for which the bad ranges were generated can be found in the error message -which can be accessed via the 'bug' icon from a failed dataset. +* A list of read IDs for which pal_finder generates bad product size ranges can + be output by turning on *Output IDs for input reads which generate bad primer + ranges*. This outputs an additional dataset with a list of read IDs which can + be used to remove read pairs from the input Fastq files (using e.g. the *Filter + sequences by ID* tool) before rerunning pal_finder. + +.. class:: warning -The conditions which cause this error are unclear. However we believe it to be -associated with short or low quality reads. It is recommended that the input -data are sufficiently trimmed and filtered (using e.g. the Trimmomatic tool) -before rerunning pal_finder. +**Pal_finder takes a long time to run for large input datasets** + +pal_finder was originally developed using MiSeq data, and is not optimised for +working with the larger Fastqs that are output from other platforms such as +HiSeq and NextSeq. As a consequence pal_finder may take a very long time to +complete when operating on larger datasets. + +If this is a problem then the tool can be run using a subset of the input reads +by unchecking the *Use all reads...* option and entering either an integer number +of reads to use, or a decimal fraction (e.g. 0.5 will select 50% of the reads). ------------- diff -r 3f8bf1a0403b -r a3af1ff4cad1 pal_finder_wrapper_utils.sh --- a/pal_finder_wrapper_utils.sh Thu Mar 22 07:21:26 2018 -0400 +++ b/pal_finder_wrapper_utils.sh Mon May 14 11:10:19 2018 -0400 @@ -38,17 +38,21 @@ # sequence ids where the PRIMER_PRODUCT_SIZE_RANGE has # upper limit which is smaller than lower limit local pr3in=$1 + local outfile=$2 local pattern="^(SEQUENCE_ID|PRIMER_PRODUCT_SIZE_RANGE)" for line in $(grep -E "$pattern" $pr3in | sed 's/ /^/' | sed 'N;s/\n/*/') do # Loop over pairs of SEQUENCE_ID and PRIMER_PRODUCT_SIZE_RANGE # keywords in the primer3 input if [ ! -z "$(echo $line | grep ^SEQUENCE_ID)" ] ; then - # Extract the values + # Lines look like: + # SEQUENCE_ID=(AT_1_16)(AT_1_16)M00879:99:000000000-AH9KG:1:2107:10006:2535*PRIMER_PRODUCT_SIZE_RANGE=194-329^59-194 local size_range=$(echo $line | cut -d'*' -f2 | cut -d'=' -f2 | tr '^' ' ') local seq_id=$(echo $line | cut -d'*' -f1 | cut -d'=' -f2) - else - local size_range=$(echo $line | cut -d'*' -f1 | cut -d'=' -f2) + elif [ ! -z "$(echo $line | grep ^PRIMER_PRODUCT_SIZE_RANGE)" ] ; then + # Lines look like: + # PRIMER_PRODUCT_SIZE_RANGE=194-329^59-194*SEQUENCE_ID=(AT_1_16)(AT_1_16)M00879:99:000000000-AH9KG:1:2107:10006:2535 + local size_range=$(echo $line | cut -d'*' -f1 | cut -d'=' -f2 | tr '^' ' ') local seq_id=$(echo $line | cut -d'*' -f2 | cut -d'=' -f2) fi seq_id=$(echo $seq_id | cut -d')' -f3) @@ -58,14 +62,14 @@ for range in $(echo $size_range) ; do local lower=$(echo $range | cut -d'-' -f1) local upper=$(echo $range | cut -d'-' -f2) - if [ $lower -gt $upper ] ; then + if [ "$lower" -gt "$upper" ] ; then bad_range=yes break fi done # Report if the range is wrong if [ ! -z "$bad_range" ] ; then - echo "$seq_id ($size_range)" + echo "${seq_id}"$'\t'"(${size_range})" >>$outfile fi done } diff -r 3f8bf1a0403b -r a3af1ff4cad1 test-data/illuminaPE_bad_primer_read_ids.out --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/illuminaPE_bad_primer_read_ids.out Mon May 14 11:10:19 2018 -0400 @@ -0,0 +1,1 @@ +M00879:99:000000000-AH9KG:1:2107:14372:5471 (74-71 77-74) diff -r 3f8bf1a0403b -r a3af1ff4cad1 test-data/illuminaPE_microsats_bad_ranges.out.re_match --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/illuminaPE_microsats_bad_ranges.out.re_match Mon May 14 11:10:19 2018 -0400 @@ -0,0 +1,7 @@ +readPairID\ Motifs\(bases\)\ Bases\ in\ all\ Motifs\ Possible\ Extended\ Possible\ Spanning\ Primers\ found\ \(1\=y\,0\=n\)\ F\ Primer\ Name\ Forward\ Primer\ R\ Primer\ Name\ Reverse\ Primer\ Amplicon\ Motifs\ Number\ motif\ bases\ in\ amplicon\ Primers\ on\ sep\ reads\ Extend\ with\ primers\ Spand\ with\ primers\ Occurances\ of\ Forward\ Primer\ in\ Reads\ Occurances\ of\ Reverse\ Primer\ in\ Reads\ Occurances\ of\ Amplifiable\ Primer\ Pair\ in\ Reads\ Occurances\ of\ Amplifiable\ Primer\ Pair\ in\ PALs +M00879\:99\:000000000\-AH9KG\:1\:2107\:10006\:2535\ AT\(16\)\ AT\(16\)\ \ 32\ AT\ \ \ 0\ \ \ \ \ \ \ \ \ \ \ \ \ +M00879\:99\:000000000\-AH9KG\:1\:2107\:10032\:7900\ .*\ \ 164\ \ \ 1\ test\_.*\ (CGAAAGATGCTATAGAAGCGATGGGG|TATCTATCTATCAATCCGCTCCCC)\ test\_.*\ (GGACATCGAGATAGAAAGGGGACCG|TGATTGGACATCGAGATAGAAAGGG)\ .*\ \ 80\ 1\ \ \ .*\ .*\ 1\ 1 +M00879\:99\:000000000\-AH9KG\:1\:2107\:10061\:6317\ .*\ \ 76\ \ \ 1\ test\_.*\ GAGAGAGTACATAGATATCTCACGGGGCG\ test\_.*\ GCAACGGCACAGATCTCTTCTACGG\ .*\ \ 22\ 1\ \ \ 1\ 1\ 1\ 1 +M00879\:99\:000000000\-AH9KG\:1\:2107\:10072\:8112\ .*\ \ 44\ \ \ 1\ test\_.*\ AGTTTGTTACAGGGCATGACAACGG\ test\_.*\ TCCTGTTATCTTCTTGTTGCTTGGC\ .*\ \ 22\ 1\ \ \ 1\ 1\ 1\ 1 +M00879\:99\:000000000\-AH9KG\:1\:2107\:10084\:6474\ .*\ \ 100\ \ \ 0\ \ \ \ \ \ \ \ \ \ \ \ \ +M00879\:99\:000000000\-AH9KG\:1\:2107\:14372\:5471\ .*\ \ 68\ .*\ \ \ 0\ \ \ \ \ \ \ \ \ \ \ \ \ diff -r 3f8bf1a0403b -r a3af1ff4cad1 test-data/illuminaPE_microsats_subset.out.re_match --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/illuminaPE_microsats_subset.out.re_match Mon May 14 11:10:19 2018 -0400 @@ -0,0 +1,6 @@ +readPairID\ Motifs\(bases\)\ Bases\ in\ all\ Motifs\ Possible\ Extended\ Possible\ Spanning\ Primers\ found\ \(1\=y\,0\=n\)\ F\ Primer\ Name\ Forward\ Primer\ R\ Primer\ Name\ Reverse\ Primer\ Amplicon\ Motifs\ Number\ motif\ bases\ in\ amplicon\ Primers\ on\ sep\ reads\ Extend\ with\ primers\ Spand\ with\ primers\ Occurances\ of\ Forward\ Primer\ in\ Reads\ Occurances\ of\ Reverse\ Primer\ in\ Reads\ Occurances\ of\ Amplifiable\ Primer\ Pair\ in\ Reads\ Occurances\ of\ Amplifiable\ Primer\ Pair\ in\ PALs +ILLUMINA\-545855\:49\:FC61RLR\:2\:1\:17449\:1584\ (AC|TG)\(36\)\ \ 36\ \ \ 0\ \ \ \ \ \ \ \ \ \ \ \ \ +ILLUMINA\-545855\:49\:FC61RLR\:2\:1\:5626\:1554\ AT\(14\)\ (AC|TG)\(16\)\ (AC|TG)\(16\)\ AT\(12\)\ \ 58\ \ \ 0\ \ \ \ \ \ \ \ \ \ \ \ \ +ILLUMINA\-545855\:49\:FC61RLR\:2\:1\:5879\:1238\ AT\(12\)\ \ 12\ \ \ 0\ \ \ \ \ \ \ \ \ \ \ \ \ +ILLUMINA\-545855\:49\:FC61RLR\:2\:1\:8157\:1636\ (AC|TG)\(12\)\ \ 12\ \ \ 1\ test\_.*\ AAGTACAGTGGGGAGGCTGG\ test\_.*\ TTTTCTACACAGCTCAAGTAGCCC\ (AC|TG)\(12\)\ \ 12\ 1\ \ \ 1\ 1\ 1\ 1 +ILLUMINA\-545855\:49\:FC61RLR\:2\:1\:8899\:1514\ (AC|TG)\(12\)\ (AC|TG)\(12\)\ \ 24\ \ \ 1\ test\_.*\ TCTTTATCTAAACACATCCTGAAATACC\ test\_.*\ AAACGCAATTATTTTGAGATGTCC\ (AC|TG)\(12\)\ (AC|TG)\(12\)\ \ 24\ 1\ \ \ 1\ 2\ 1\ 1 diff -r 3f8bf1a0403b -r a3af1ff4cad1 test-data/illuminaPE_r1_bad_ranges.fq --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/illuminaPE_r1_bad_ranges.fq Mon May 14 11:10:19 2018 -0400 @@ -0,0 +1,24 @@ +@M00879:99:000000000-AH9KG:1:2107:10006:2535 1:N:0:TAAGGCGA+TAGATCGC +ATATATATATATATATGTAGTATAATCTCACTGGAAAGGAGACGAAAACGAGCAGCTCCGAGCTTTCGACTTTATTTCAAGTCATCTTCAGGGCAACTGACAAATTTTTGTGTAGCAATAGTATATAGACACCAGACGAGATTCCTGACCTCACATCTGGGAGG ++ +CCCCCFGGGGGGGGGGGGGGGFCGGGGGGFGCG@FFGCDGGGGGGGGEGGGDGGFDCFGGGCDGGGGGGGGGGGGGGGGDGFGFGFGFFGGG@FEGGDEFGGGGGEFGGGGGGGFGFGGGGGGAFE?9FGGGGGGGG@F7F7AFDGGFFGFFGGG9EE9D8?>8 +@M00879:99:000000000-AH9KG:1:2107:10032:7900 1:N:0:TAAGGCGA+TAGATCGC +ATCTATGTATTTATCTATCTCCTCTCTCTCTCTCTCTCTCTCTCTCTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCAATCCGCTCCCCTTTCTATCTCGATGTCCAATCATTATACACACACCTACACGAAAGATGCTATAGAAGCGATGGGGGACTATAGGTGTATAGCAACTCTATACATCAACCAGTCTCTGCGCTCGTCCCCCTGTCG ++ +CCCCCGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGEGGGGGGGGGGGGGGGFGGGCFGG@FGGEFGGDGGGGGGGGGGGGGGA9EFFC,@@F@FG8FGG9?EGG,,4:?@FFAGG@EFF+,4,@,9E9,@ECEEEFC+8+4>B88;=E3,,2,@A68,7=@DDGED=A8=A8FGE+@DDD,++0@+=0+<3<>095?** +@M00879:99:000000000-AH9KG:1:2107:10061:6317 1:N:0:TAAGGCGA+TAGATCGC +TATATATATATATGAGAAAACTGGTGTCGGTGAGGACTCATCAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGTACATAGATATCTCACGGGGCGAGCTGCCGCTGCTGCTGCTGCCGCGGAGGGTGCAGAATAAGACGAGGAGGTGGAGAGAGGTGTGGAAAAAGTGCCGTAGAAGAGATCTGTGCCGTCGCACAGCTCTCGTGGTGCACCTATGGGAGAGGCGG ++ +CCCC@FFFFFGGFGGGGGGGG8EFG?FFGGEGGGFGGGGGGGGGGGGGGDFGGGGGGGGGGGGGGGGGGGGFG@CFFGGGGGGGGECGGGGGGGF,9,,C,E,?,,9,5<,,:=+78B+++@,,,+++6+,8,,A55A,,+++@+87:*3*@3D79;,,,451***>B4<**6,=***4=;8*+++*/*21+0**3//CEG86+A9:1<++2**)*)2)**+629*9*715:***00*:*755*0*.00 +@M00879:99:000000000-AH9KG:1:2107:10072:8112 1:N:0:TAAGGCGA+TAGATCGC +GTGTGTGTGTGTGTGTGTGTGTTGGTTGTGAAAGTTATCTCAGCTTGTTATATGGAATGACGACTGTCAAGCTTACCAGTTTCTTACATGGGATGACAAAGGCTAGGCTTACCAACTTGTTACATGTAATGACATCGGTAAAGCTTATCAGTTTGTTACAGGGCATGACAACGGTCAAGCTTACCAGCATGATACATGTAGTGACGAAGGCCAAGCCACAAGAAGATAACAGGCAC ++ +CCCCCFGGGGFFFGGFFGFGGGGGGGGGGDFGGGCGFGGGG,FFGGFFFCDCDEFFCFGGFGCEGDCG7>CD*;F +@M00879:99:000000000-AH9KG:1:2107:14372:5471 1:N:0:TAAGGCGA+TAGATCGC +GAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAG ++ +CCCCCGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG +@M00879:99:000000000-AH9KG:1:2107:10084:6474 1:N:0:TAAGGCGA+TAGATCGC +CAATTACCTAAGTATCTGAATAGTCATTTCCCTATTAGGGGTGATCATAGTCGAACGGTAGCACTCCCGCTGGTCACGCTCTGGACCAGAGTTCAAATCTTGGCTGGTCGAGGTTTACGTGTTTTATATCTATCTATCTATCTATCTATCTATCTATCTATATATATATATATATACATATATAT ++ +CCCCCGGGGGGGFGGGGGGGGGGFGGGGGGGGGCFGGGGGGFGGGGGGGGGGGGGGG@FGGGGGFGGGGGGDGGGGGGGGGGGGGGGGGGFECCEEEFGGGGGGGG>FGG>C+BE,BF:FGFFGGGGGGGGGGGGGGGGGFGGGGGGGFGGGGFDEEGGGGFGGGCFGGGGGGGGFDFDCFCD;E diff -r 3f8bf1a0403b -r a3af1ff4cad1 test-data/illuminaPE_r1_no_microsats.fq --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/illuminaPE_r1_no_microsats.fq Mon May 14 11:10:19 2018 -0400 @@ -0,0 +1,32 @@ +@ILLUMINA-545855:49:FC61RLR:2:1:10979:1695 1:N:0:TCCTGA +TACTGTTTAGAATAGACTGTTCTCCCACTATATTTTGCATTGGTGCATACTCAGCTTTAGTAATAAGTGTGATTCTGGTAGAGAGAGAGAGAGATACCAACCTCTTCTTCCCACTA ++ +IIIIIHIIIGHHIIDGHIIIIIIHIIIIIIIIIIIIHGIIIIHGIIIIHHHIHHGHIIHHEHHIGHGDHDHHHGGHEGAHGDGEHEGEGBGCEDBDDGEEFDEECGDEGDEDEDE8 +@ILLUMINA-545855:49:FC61RLR:2:1:19063:1614 1:N:0:TCCTGA +TATATATATATATACACATATATATATATATTTTTTACATTATTTCACTTCGCCCAAACTAGAGAGTCTAACAAAGTACAACCCAGCATATTAAAGTTCATCTCAGTTTTGTTCTG ++ +IIIIIIIGIEIIIIGIIIHHIHIGIGIIIIIHIIIIHGBEIIIIHIFHHIHFIIIIFHFIHIIHFF>EEEBDGDD;BD8DDBBDDBGHGHHHHEFE=DBCDEEEBEBEGHGAFH@E +@ILLUMINA-545855:49:FC61RLR:2:1:1978:1220 1:N:0:TCCTGA +TCATAAGAATGAGCAGTAAACAAAGGCAAAGGGGAGATAACACACACACACAAAATAAAAAAACATCAATTTCTAATACACGCCTTTATTATAAAGAAATAAATCACTGAAAAACA ++ +DDDDDBDBDDB7?BBB5BB>1?DDD?4(6367;>?2<164=;>8<0DDD>>A6A?>;;6>+6>><>4-8484888&@D@@DD>?<9DD;6DDC;;;=HHHGHHHHHHHHHHHHHHHHGHHHFHHHHFHFDHHHHHHHHHHHHHHHHGHHGGHHHHHHHHHHEHHHHGEHHHEFE>EB +@ILLUMINA-545855:49:FC61RLR:2:1:8157:1636 1:N:0:TCCTGA +TACTAGTCTAATAATTGCAGGCAGCTGAACTAGATAGGTCCTAAAGTACAGTGGGGAGGCTGGTGTGTGTGTGTGCATGGGATTGTCAGCCTTACCATCAGTCCTGATTTGTAGGT ++ +HHHHHHHHHHHHHFHHHHHGHHHHHHGHHHHGHHHHHH>HHGGGGFFHHHHGHGHHFHHHHGGBGDCAC>CBDBDDCFGED@BDDB@BBBBECCD@C=?9==@CECB=BBB8 +@ILLUMINA-545855:49:FC61RLR:2:1:8899:1514 1:N:0:TCCTGA +TCTTTATCTAAACACATCCTGAAATACCATCTGTTACACACACACACAGCAGTGGAAGTATAAAAAAAAATCTGGACATCTCAAAATAATTGCGTTTCTGAAGTGTTACATTTTTC ++ +IIIIIHIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIGIIIIIGIIIIIIIIIIIIIIHHGIIIIHIHHHHIHHHHHHHGHHHHHGFHEHEHHHHHHHII> diff -r 3f8bf1a0403b -r a3af1ff4cad1 test-data/illuminaPE_r2_bad_ranges.fq --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/illuminaPE_r2_bad_ranges.fq Mon May 14 11:10:19 2018 -0400 @@ -0,0 +1,24 @@ +@M00879:99:000000000-AH9KG:1:2107:10006:2535 2:N:0:TAAGGCGA+TAGATCGC +CCTCCCAGATGTGAGGTCAGGAATCTCGTCTGGTGTCTATATACTATTGCTACACAAAAATTTGTCAGTTGCCCTGAAGATGACTTGAAATAAAGTCGAAAGCTCGGAGCTGCTCGTTTTCGTCTCCTTTCCAGTGAGATTATACTACATATATATATATATAT ++ +CCCCCGGGGGFGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG8FFFGGGGGGFGGGGGGGCEFFGGGGGFGGGGGGGGGGGGGGGFGFCEGGFGG8EFEFDFGEFGFFGGCFGGFAFF,9AFDGGGGGGGGGGGGGGEADDFGFEA +@M00879:99:000000000-AH9KG:1:2107:10032:7900 2:N:0:TAAGGCGA+TAGATCGC +CTACAGGGGGCCGAGCACAGAGACTGGTTGATGTATAGAGTTGCTATACTCCTATAGTCCCCCATCGCTTCTATAGCATCTTTCGTGTAGGTGTGTCTATAATGATTGGACATCGAGATAGAAAGGGGACCGGATTGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGAGAGAGAGAGAGAGAGAGAGAGCGAGGAGATAGATAAATACATCGAT ++ +CCCCCGGGGDGGGEGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGFGGGGGGGGGGGGGGGGGGGCGFGGGGGGGGGGGGGGGGGGGGGGCGGGGGGGGGFFGGGFGDGGGFGFGGGGGGGGGGEGFFGFFCGFGGGCFG9FFGGGGFCFGGGGGGGGG8DAFGFFGFGGDGDFGFCFFD72DAFF>?BCFFF5>9A>DFFFFFF*):494?>BFEE4<*9*@:=?*-(- +@M00879:99:000000000-AH9KG:1:2107:10061:6317 2:N:0:TAAGGCGA+TAGATCGC +GCCTCGGGACCTTGGCACGCCGCCTCTCCCATAGATGCACCATGAGATCCGTGCAACGGCACAGATCTCTTCTACGGAACTTTTGCCACACCTCACTTCACCTTCTTGCCTTATTCTGCACCACCTGCAGCAGCAGCAGCAGCAGCAGCTCGCCCCGAGTTACATCTATGTACTCTCTCCCTCTCTCTCTCTCTCTCTCTCTCCCTCTCTCTCTCCCTCCCTCTCCTCTCACCCCACACTCACACCCGC ++ +CCCCCFGGGGGGGFFGGGGGGGGGGGGGGGGGGFGGGFEGGGGEGFGGGGGGGGGGGEGGGGGGGGGCGFFGFGGFG@FGGFEF9FGGGCGGEFEFGGGDGG@FE?EEFGGGGG,EE,EFGGFGGFDG,@FFFFG8D8=E8>EEEGC=D=D6CEGC61C=8:ECFG8AA9<2CCC*7):(54>3-(0(*(0*-(511(6(/(6)6/(,( +@M00879:99:000000000-AH9KG:1:2107:10072:8112 2:N:0:TAAGGCGA+TAGATCGC +GTTCCTGTTATCTTCTTGTTGCTTGGCCTTCGTCACTACATGTAACATGCTGGTAAGCTTGACCGTTGTCATTCCCTGTAACAAACTGATAAGCTTTACCGATGTCATTACATGTAACCAGTTGGTAAGCCTAGCCTTTGTCATCCCATGTAAGAAACTGGTAAGCTTGACAGTCGTCATTCCATATAACAAGCTGAGATAACTTTCACAACCAACACACACACACACACACACACCTGTCTCCTATAC ++ +CCCCCGGGGGGGGGGGGFFGFGGGG7FFGGGGGGGGGGGGGFGFFFAEGGGCFFG@AGGGC,EFGGGGGGGG@EEGGGGFGGFGGGFGGFGCFGGGGGGGGCEEGGCFEFGDEFGGFG,CFFFEGGGDG9EFFFGGGGFAFGGGGG84E=EFGGG;AGDDFFFGGFGG8=DAFGFG=D88FFG@9D@@FDD+;D56D?FFFFFFD=7*;2:)=855)=DF=>=AAD==@DDA)=@@5)):)3;9A***9 +@M00879:99:000000000-AH9KG:1:2107:14372:5471 2:N:0:TAAGGCGA+TAGATCGC +CTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTC ++ +ACCCCFGEA@:5>@########################################################################## +@ILLUMINA-545855:49:FC61RLR:2:1:1978:1220 2:N:0:TCCTGA +TCCTCTGACTAGGCAACAACAGCTTTTTTGCTCCTGGGCAGAGGTGTTCCGAGTGTATATTTTTTATAATTACGGCGCGCATTGGAAATTGATGTTATTTTATTTTGCGTGTGTGT ++ +B?/?################################################################################################################ +@ILLUMINA-545855:49:FC61RLR:2:1:5879:1238 2:N:0:TCCTGA +TCCCCACCCTGTCATGGTTCTATGTATATATATATAGCCATGTGTGTGGTACCAGGGATAGGTACCTGGGATTGGGGCAGTGACACTTTAGTGCCCCGTACACTACATGATGTTTT ++ +HHHHHHHHHHHHHHHHHEHHGGGGFDHGHGHHHHHHFHHHHHGHEHEHEBEHEFB8EEEFEDAGEDBCEBBB@>BEC>@B@DCBBBECBBBA>B<;BA@A@###### +@ILLUMINA-545855:49:FC61RLR:2:1:6204:1090 2:N:0:TCCTGA +TGCTTTGGTTCTAAGAGAAAAACAAGTGATGCACAAGCAATTCCTCGCCACCACCCAACTGATGCCCAGCCACCCCCCCAAGCAGTGAAAGAGAGAGAGAGATGAACCCCCTTCAA ++ +HHHDBHHHEFGHHHHEHFHEHDDDDD@HHEHHEEHEFFEEEGDEGGGGEGEB>EBC>@@@@@@BB@FEBFBBB@A>AAA<;>A>;3>=??>>BB>?>@?1?>:9*@########## +@ILLUMINA-545855:49:FC61RLR:2:1:8044:1926 2:N:0:TCCTGA +TCAGGCAAGGTCACTGCCACCACTGGGGAGTGCCTGTTTCTGAAGGGCCCAGCCAACTCTGTCACAAGCTTTAACCATAATAAATCAGGAGGAGAGACAGGAGGATGCTGTGCTTT ++ +IIIIIIIIIIHIIIIIIIIIIIIIIGIIIIFIIIIIGHHIIIIIGGEIHGHGHHHGGCHHCGGGGGGHGHGEGEGEGGGDGBECCBGGEDGB;8?BBBBDABC@:3==;=:<30:6 +@ILLUMINA-545855:49:FC61RLR:2:1:8157:1636 2:N:0:TCCTGA +TAAACAACCAAATGAAACCATCTTTTCTACACAGCTCAAGTAGCCCTACATACAACACAAGCCACCTACAAATCAGGACTGATGGTAAGGCTGACAATCCAATCCACCACAACAAC ++ +HFHHHHHHHHHHHHDHGHHDHHHHHHHHHHHHHHHHHHGGGHHGHHHHHHHHHHHGHHHHH@HHHFHGHFHHEHHHHHHDHBFEBBHFFDHE>EFHBEFD################ +@ILLUMINA-545855:49:FC61RLR:2:1:8899:1514 2:N:0:TCCTGA +TATCATTGAAATTTTTATAAAAACTGTGAAGAGAAAAATGTAACACTTCAGAAACGCAATTATTTTGAGATGTCCAGATTTTTTTTTATACTTCCACTGCTGTGTGTGTGTGTAAC ++ +IG+GGGIIIIIIIIIIDIIIIIIGHHIIHIIIIIEGHIHIHIIHIIIIIIIIIIIIIIHIIDIIIIIEDIIIDIHGHFIIIIIIIIIHIFFBHGIGBGGHBDBFEGEGCGEIEDEB