Mercurial > repos > pjbriggs > pal_finder
changeset 15:a3af1ff4cad1 draft
pal_finder 0.02.04.7 for testing.
author | pjbriggs |
---|---|
date | Mon, 14 May 2018 11:10:19 -0400 |
parents | 3f8bf1a0403b |
children | 856cafcbf422 |
files | README.rst pal_finder_macros.xml pal_finder_wrapper.sh pal_finder_wrapper.xml pal_finder_wrapper_utils.sh test-data/illuminaPE_bad_primer_read_ids.out test-data/illuminaPE_microsats_bad_ranges.out.re_match test-data/illuminaPE_microsats_subset.out.re_match test-data/illuminaPE_r1_bad_ranges.fq test-data/illuminaPE_r1_no_microsats.fq test-data/illuminaPE_r2_bad_ranges.fq test-data/illuminaPE_r2_no_microsats.fq |
diffstat | 12 files changed, 347 insertions(+), 40 deletions(-) [+] |
line wrap: on
line diff
--- a/README.rst Thu Mar 22 07:21:26 2018 -0400 +++ b/README.rst Mon May 14 11:10:19 2018 -0400 @@ -63,7 +63,9 @@ 0.02.04.7 - Trap for errors in ``pal_finder_v0.02.04.pl`` resulting in bad ranges being supplied to ``primer3_core`` for some reads via - ``PRIMER_PRODUCT_RANGE_SIZE``. + ``PRIMER_PRODUCT_RANGE_SIZE`` (and enable 'bad' reads to be output + to a dataset); add new option to use a random subset of reads for + microsatellite detection. 0.02.04.6 - Update to get dependencies using ``conda`` when installed from the toolshed (this removes the explicit dependency on Perl 5.16 introduced in 0.02.04.2, as a result the outputs from the tool are
--- a/pal_finder_macros.xml Thu Mar 22 07:21:26 2018 -0400 +++ b/pal_finder_macros.xml Mon May 14 11:10:19 2018 -0400 @@ -14,6 +14,7 @@ <has_line line="readsWithMicrosat:	13" /> <has_line line="totalBases:	2320" /> <has_line line="totalReads:	20	(2 x 10)" /> + <has_line line="readsWithBadRanges:	0" /> <has_line line="Microsat Type	monomer length	total loci	loci w/ primers	reads with loci	total bases	extended	extended w/ primers	spanning	spanning w/ primers" /> <has_line_matching expression="(AC|TG)\t2\t7\t4\t7\t116\t0?\t0?\t0?\t0?" /> <has_line_matching expression="(AT|CG)\t2\t8\t0\t6\t106\t0?\t0?\t0?\t0?" /> @@ -21,6 +22,44 @@ </assert_contents> </output> </xml> + <xml name="output_illumina_microsat_subset_summary"> + <output name="output_microsat_summary"> + <assert_contents> + <has_line line="allExtended:	0" /> + <has_line line="allSpan:	0" /> + <has_line line="broken:	0" /> + <has_line line="compound:	2" /> + <has_line line="readsWithMicrosat:	7" /> + <has_line line="totalBases:	1160" /> + <has_line line="totalReads:	10	(2 x 5)" /> + <has_line line="Microsat Type	monomer length	total loci	loci w/ primers	reads with loci	total bases	extended	extended w/ primers	spanning	spanning w/ primers" /> + <has_line_matching expression="(AC|TG)\t2\t6\t3\t6\t104\t0?\t0?\t0?\t0?" /> + <has_line_matching expression="(AT|CG)\t2\t3\t0\t3\t38\t0?\t0?\t0?\t0?" /> + <has_line_matching expression="(AG|TC)\t2\t0\t0\t0\t0\t0?\t0?\t0?\t0?" /> + </assert_contents> + </output> + </xml> + <xml name="output_illumina_microsat_summary_bad_ranges"> + <output name="output_microsat_summary"> + <assert_contents> + <has_line line="allExtended:	2" /> + <has_line line="allSpan:	0" /> + <has_line line="broken:	0" /> + <has_line line="compound:	4" /> + <has_line line="readsWithMicrosat:	12" /> + <has_line line="totalBases:	2231" /> + <has_line line="totalReads:	12	(2 x 6)" /> + <has_line line="readsWithBadRanges:	2" /> + <has_line line="Microsat Type	monomer length	total loci	loci w/ primers	reads with loci	total bases	extended	extended w/ primers	spanning	spanning w/ primers" /> + <!-- + I'd like to do a basic check on the remainder of the file but + there are too many lines for the 'assert_contents' method (and + the tag doesn't provide the functionality to do a simple line + count + --> + </assert_contents> + </output> + </xml> <xml name="output_454_microsat_summary"> <output name="output_microsat_summary"> <assert_contents>
--- a/pal_finder_wrapper.sh Thu Mar 22 07:21:26 2018 -0400 +++ b/pal_finder_wrapper.sh Mon May 14 11:10:19 2018 -0400 @@ -32,6 +32,7 @@ # -primers: run the 'primers' filter option # -occurrences: run the 'occurrences' filter option # -rankmotifs: run the 'rankmotifs' filter option +# --subset N: use a subset of reads of size N # # pal_finder is available from http://sourceforge.net/projects/palfinder/ # @@ -104,7 +105,8 @@ OUTPUT_ASSEMBLY= FILTERED_MICROSATS= FILTER_OPTIONS= -BAD_PRIMER_RANGES= +SUBSET= +RANDOM_SEED=568765 # # Collect command line arguments if [ $# -lt 2 ] ; then @@ -220,6 +222,10 @@ shift OUTPUT_ASSEMBLY=$1 ;; + --subset) + shift + SUBSET=$1 + ;; *) echo Unknown option: $1 >&2 exit 1 @@ -234,6 +240,25 @@ fatal "primer3_core not found" fi # +# Check the n-mers specification +if [ $MIN_6_MER_REPS -ne 0 ] ; then + if [ $MIN_5_MER_REPS -eq 0 ] ; then + fatal "Minimum number of 5-mers cannot be zero if number of 6-mers is non-zero" + fi +fi +if [ $MIN_5_MER_REPS -ne 0 ] ; then + if [ $MIN_4_MER_REPS -eq 0 ] ; then + fatal "Minimum number of 4-mers cannot be zero if number of 5-mers is non-zero" + fi +fi +if [ $MIN_4_MER_REPS -ne 0 ] ; then + if [ $MIN_3_MER_REPS -eq 0 ] ; then + fatal "Minimum number of 3-mers cannot be zero if number of 4-mers is non-zero" + fi +fi +if [ $MIN_2_MER_REPS -eq 0 ] ; then + fatal "Minimum number of 2-mer repeats cannot be zero" +fi # Set up the working dir if [ "$PLATFORM" == "Illumina" ] ; then # Paired end Illumina data as input @@ -253,6 +278,14 @@ PRIMER_MISPRIMING_LIBRARY=$(basename $PRIMER_MISPRIMING_LIBRARY) mkdir Output # +# Use a subset of reads +if [ ! -z "$SUBSET" ] ; then + echo "### Extracting subset of reads ###" + $(dirname $0)/fastq_subset.py -n $SUBSET -s $RANDOM_SEED $fastq_r1 $fastq_r2 + fastq_r1="subset_r1.fq" + fastq_r2="subset_r2.fq" +fi +# # Copy in the default config.txt file echo "### Creating config.txt file for pal_finder run ###" /bin/cp $PALFINDER_DATA_DIR/config.txt . @@ -311,8 +344,13 @@ fi tail -$MAX_LINES pal_finder.log # -# Check that log ends with "Done!!" message -if [ -z "$(tail -n 1 pal_finder.log | grep Done!!)" ] ; then +# Check for success/failure +if [ ! -z "$(tail -n 1 pal_finder.log | grep 'No microsatellites found in any reads. Ending script.')" ] ; then + # No microsatellites found + fatal ERROR pal_finder failed to locate any microsatellites + exit 1 +elif [ -z "$(tail -n 1 pal_finder.log | grep Done!!)" ] ; then + # Log doesn't end with "Done!!" (indicates failure) fatal ERROR pal_finder failed to complete successfully fi echo "### pal_finder finished ###" @@ -321,33 +359,38 @@ echo "### Checking for errors ###" if [ ! -z "$(grep 'primer3_core: Illegal element in PRIMER_PRODUCT_SIZE_RANGE' pal_finder.log)" ] ; then echo WARNING primer3 terminated prematurely due to bad product size ranges + $(find_bad_primer_ranges Output/pr3in.txt bad_primer_ranges.txt) + N_BAD_PRIMERS=$(cat bad_primer_ranges.txt | wc -l) if [ -z "$BAD_PRIMER_RANGES" ] ; then # No output file so report to stderr - cat >&2 <<EOF -ERROR primer3 terminated prematurely due to bad product size ranges + cat <<EOF Pal_finder generated bad ranges for the following read IDs: + EOF - echo $(find_bad_primer_ranges Output/pr3in.txt) >&2 - cat >&2 <<EOF + cat bad_primer_ranges.txt + cat <<EOF This error can occur when input data contains short R1 reads and has has not been properly trimmed and filtered. EOF else - # Dump bad ranges to file + # Move the bad ranges to the specified file echo "### Writing read IDs with bad primer ranges ###" - echo $(find_bad_primer_ranges Output/pr3in.txt) >"$BAD_PRIMER_RANGES" + /bin/mv bad_primer_ranges.txt "$BAD_PRIMER_RANGES" fi +else + N_BAD_PRIMERS=0 fi # # Sort microsat_summary output echo "### Sorting microsat summary output ###" head -n 7 Output/microsat_summary.txt | sort >microsat_summary.sorted +echo "readsWithBadRanges:"$'\t'"$((N_BAD_PRIMERS * 2))" >>microsat_summary.sorted grep "^$" Output/microsat_summary.txt>>microsat_summary.sorted grep "^Microsat Type" Output/microsat_summary.txt >>microsat_summary.sorted -tail -n +11 Output/microsat_summary.txt >>microsat_summary.sorted +tail -n +11 Output/microsat_summary.txt | sort -r -n -k 5 >>microsat_summary.sorted mv microsat_summary.sorted Output/microsat_summary.txt # # Sort PAL_summary output
--- a/pal_finder_wrapper.xml Thu Mar 22 07:21:26 2018 -0400 +++ b/pal_finder_wrapper.xml Mon May 14 11:10:19 2018 -0400 @@ -9,7 +9,7 @@ <requirement type="package" version="1.65">biopython</requirement> <requirement type="package" version="2.8.1">pandaseq</requirement> </requirements> - <command><![CDATA[ + <command detect_errors="exit_code"><![CDATA[ @CONDA_PAL_FINDER_SCRIPT_DIR@ && @CONDA_PAL_FINDER_DATA_DIR@ && bash $__tool_directory__/pal_finder_wrapper.sh @@ -64,6 +64,10 @@ #if str( $platform.assembly ) == '-assembly' $platform.assembly "$output_assembly" #end if + #set $use_all_reads = $platform.subset_conditional.use_all_reads + #if str( $use_all_reads ) != "yes" + --subset "$platform.subset_conditional.subset" + #end if #end if ]]></command> <inputs> @@ -91,6 +95,13 @@ label="Select FASTQ dataset collection with R1/R2 pair" /> </when> </conditional> + <conditional name="subset_conditional"> + <param name="use_all_reads" type="boolean" label="Use all reads for microsatellite detection?" checked="True" truevalue="yes" falsevalue="no" /> + <when value="no"> + <param name="subset" type="text" value="0.5" label="Number or fraction of reads to use" help="Either an integer number of reads or a decimal fraction (e.g. 0.5 to select 50% of reads)" /> + </when> + <when value="yes" /> + </conditional> <param name="filters" type="select" display="checkboxes" multiple="True" label="Filters to apply to the pal_finder results" help="Apply none, one or more filters to refine results"> @@ -106,7 +117,7 @@ <param name="input_fasta" type="data" format="fasta" label="454 fasta file with raw reads" /> </when> </conditional> - <param name="min_2mer_repeats" type="integer" value="6" label="Minimum number of 2-mer repeat units to detect" help="Set to zero to ignore repeats of this n-mer unit" /> + <param name="min_2mer_repeats" type="integer" value="6" label="Minimum number of 2-mer repeat units to detect" min="1" help="Must detect at least one repeat of this n-mer unit" /> <param name="min_3mer_repeats" type="integer" value="0" label="Minimum number of 3-mer repeat units" help="Set to zero to ignore repeats of this n-mer unit" /> <param name="min_4mer_repeats" type="integer" value="0" label="Minimum number of 4-mer repeat units" help="Set to zero to ignore repeats of this n-mer unit" /> <param name="min_5mer_repeats" type="integer" value="0" label="Minimum number of 5-mer repeat units" help="Set to zero to ignore repeats of this n-mer unit" /> @@ -158,8 +169,9 @@ label="Maximum acceptable difference between melting temperatures of left and right primers (PRIMER_PAIR_MAX_DIFF_TM)" help="Temperature should be in degrees Celsius" /> </when> + <when value="default" /> </conditional> - <param name="report_bad_primer_ranges" type="boolean" truevalue="True" falsevalue="False" label="Output IDs for input reads which generate bad primer ranges" help="Can be used to screen input Fastqs" /> + <param name="report_bad_primer_ranges" type="boolean" truevalue="True" falsevalue="False" label="Output IDs for input reads which generate bad primer product size ranges" help="Can be used to screen reads in input Fastqs " /> <param name="keep_config_file" type="boolean" truevalue="True" falsevalue="False" label="Output the config file to the history" help="Can be used to run pal_finder outside of Galaxy" /> @@ -254,18 +266,76 @@ <output name="output_pal_summary" compare="re_match" file="illuminaPE_microsats.out.re_match" /> <output name="output_filtered_microsats" compare="re_match" file="illuminaPE_filtered_microsats_rankmotifs.out.re_match" /> </test> - <!-- Test with Illumina input generating bad primer ranges - --> + <!-- Test with Illumina input using subset of reads --> + <test> + <param name="platform_type" value="illumina" /> + <param name="filters" value="" /> + <param name="assembly" value="false" /> + <param name="use_all_reads" value="no" /> + <param name="subset" value="0.5" /> + <param name="input_fastq_r1" value="illuminaPE_r1.fq" ftype="fastqsanger" /> + <param name="input_fastq_r2" value="illuminaPE_r2.fq" ftype="fastqsanger" /> + <expand macro="output_illumina_microsat_subset_summary" /> + <output name="output_pal_summary" compare="re_match" file="illuminaPE_microsats_subset.out.re_match" /> + </test> + <!-- Test with Illumina input filter that doesn't find any + microsatellites --> + <test expect_failure="true"> + <param name="platform_type" value="illumina" /> + <param name="filters" value="" /> + <param name="assembly" value="false" /> + <param name="min_2mer_repeats" value="8" /> + <param name="input_fastq_r1" value="illuminaPE_r1_no_microsats.fq" ftype="fastqsanger" /> + <param name="input_fastq_r2" value="illuminaPE_r2_no_microsats.fq" ftype="fastqsanger" /> + <assert_stderr> + <has_text text="pal_finder failed to locate any microsatellites" /> + </assert_stderr> + </test> + <!-- Test with Illumina input generating bad ranges --> <test> <param name="platform_type" value="illumina" /> <param name="filters" value="" /> <param name="assembly" value="false" /> - <param name="input_fastq_r1" value="illuminaPE_r1.fq" ftype="fastqsanger" /> - <param name="input_fastq_r2" value="illuminaPE_r2.fq" ftype="fastqsanger" /> - <param name="output_bad_primer_read_ids" value="true" /> - <expand macro="output_illumina_microsat_summary" /> - <output name="output_pal_summary" compare="re_match" file="illuminaPE_microsats.out.re_match" /> - <output name="output_bad_primer_read_ids" file="illuminaPE_bad_primer_ids.out" /> + <param name="min_2mer_repeats" value="8" /> + <param name="input_fastq_r1" value="illuminaPE_r1_bad_ranges.fq" ftype="fastqsanger" /> + <param name="input_fastq_r2" value="illuminaPE_r2_bad_ranges.fq" ftype="fastqsanger" /> + <param name="min_2mer_repeats" value="8" /> + <param name="min_3mer_repeats" value="8" /> + <param name="min_4mer_repeats" value="8" /> + <param name="min_5mer_repeats" value="8" /> + <param name="min_6mer_repeats" value="8" /> + <param name="primer_options" value="custom" /> + <param name="primer_opt_size" value="25" /> + <param name="primer_min_size" value="21" /> + <param name="primer_max_size" value="30" /> + <param name="primer_min_gc" value="40.0" /> + <param name="primer_max_gc" value="60.0" /> + <param name="primer_gc_clamp" value="3" /> + <param name="primer_max_end_gc" value="5" /> + <param name="primer_min_tm" value="60.0" /> + <param name="primer_max_tm" value="80.0" /> + <param name="primer_opt_tm" value="68.0" /> + <param name="primer_pair_max_diff_tm" value="3.0" /> + <param name="report_bad_primer_ranges" value="true" /> + <expand macro="output_illumina_microsat_summary_bad_ranges" /> + <output name="output_pal_summary" compare="re_match" file="illuminaPE_microsats_bad_ranges.out.re_match" /> + <output name="output_bad_primer_read_ids" file="illuminaPE_bad_primer_read_ids.out" /> + </test> + <!-- Test with bad n-mers specified --> + <test expect_failure="true"> + <param name="platform_type" value="illumina" /> + <param name="filters" value="" /> + <param name="assembly" value="false" /> + <param name="min_2mer_repeats" value="8" /> + <param name="min_3mer_repeats" value="8" /> + <param name="min_4mer_repeats" value="0" /> + <param name="min_5mer_repeats" value="8" /> + <param name="min_6mer_repeats" value="8" /> + <param name="input_fastq_r1" value="illuminaPE_r1_no_microsats.fq" ftype="fastqsanger" /> + <param name="input_fastq_r2" value="illuminaPE_r2_no_microsats.fq" ftype="fastqsanger" /> + <assert_stderr> + <has_text text="Minimum number of 4-mers cannot be zero if number of 5-mers is non-zero" /> + </assert_stderr> </test> <!-- Test with 454 input --> <test> @@ -300,26 +370,49 @@ ------------- +.. class:: infomark + +**Known issues** + .. class:: warning -**Known problems** - -.. class:: infomark - -**Bad primer product size ranges** +**Low number of reads used for microsatellite detection/bad primer product size ranges** For some datasets pal_finder may generate 'bad' product size ranges (where the lower limit exceeds the upper limit) for one or more reads, for input into -primer3_core. +primer3_core. In these cases primer3_core will terminate prematurely, which can +result in a substantially lower number of reads being used for microsatellite +detection and potentially sub-optimal primer design. + +The number of reads generating the bad size ranges are reported in the +*Summary of microsat types* output dataset as 'readsWithBadRanges'. Ideally +the reported value should be zero. + +The conditions which cause this issue within pal_finder are still unclear, +however we believe it to be associated with short or low quality reads. If this +problem affects your data then: + +* Ensure that the input data are sufficiently trimmed and filtered (using + e.g. the Trimmomatic tool) before rerunning pal_finder. -If this occurs then the tool will terminate with an error. A list of the reads -for which the bad ranges were generated can be found in the error message -which can be accessed via the 'bug' icon from a failed dataset. +* A list of read IDs for which pal_finder generates bad product size ranges can + be output by turning on *Output IDs for input reads which generate bad primer + ranges*. This outputs an additional dataset with a list of read IDs which can + be used to remove read pairs from the input Fastq files (using e.g. the *Filter + sequences by ID* tool) before rerunning pal_finder. + +.. class:: warning -The conditions which cause this error are unclear. However we believe it to be -associated with short or low quality reads. It is recommended that the input -data are sufficiently trimmed and filtered (using e.g. the Trimmomatic tool) -before rerunning pal_finder. +**Pal_finder takes a long time to run for large input datasets** + +pal_finder was originally developed using MiSeq data, and is not optimised for +working with the larger Fastqs that are output from other platforms such as +HiSeq and NextSeq. As a consequence pal_finder may take a very long time to +complete when operating on larger datasets. + +If this is a problem then the tool can be run using a subset of the input reads +by unchecking the *Use all reads...* option and entering either an integer number +of reads to use, or a decimal fraction (e.g. 0.5 will select 50% of the reads). -------------
--- a/pal_finder_wrapper_utils.sh Thu Mar 22 07:21:26 2018 -0400 +++ b/pal_finder_wrapper_utils.sh Mon May 14 11:10:19 2018 -0400 @@ -38,17 +38,21 @@ # sequence ids where the PRIMER_PRODUCT_SIZE_RANGE has # upper limit which is smaller than lower limit local pr3in=$1 + local outfile=$2 local pattern="^(SEQUENCE_ID|PRIMER_PRODUCT_SIZE_RANGE)" for line in $(grep -E "$pattern" $pr3in | sed 's/ /^/' | sed 'N;s/\n/*/') do # Loop over pairs of SEQUENCE_ID and PRIMER_PRODUCT_SIZE_RANGE # keywords in the primer3 input if [ ! -z "$(echo $line | grep ^SEQUENCE_ID)" ] ; then - # Extract the values + # Lines look like: + # SEQUENCE_ID=(AT_1_16)(AT_1_16)M00879:99:000000000-AH9KG:1:2107:10006:2535*PRIMER_PRODUCT_SIZE_RANGE=194-329^59-194 local size_range=$(echo $line | cut -d'*' -f2 | cut -d'=' -f2 | tr '^' ' ') local seq_id=$(echo $line | cut -d'*' -f1 | cut -d'=' -f2) - else - local size_range=$(echo $line | cut -d'*' -f1 | cut -d'=' -f2) + elif [ ! -z "$(echo $line | grep ^PRIMER_PRODUCT_SIZE_RANGE)" ] ; then + # Lines look like: + # PRIMER_PRODUCT_SIZE_RANGE=194-329^59-194*SEQUENCE_ID=(AT_1_16)(AT_1_16)M00879:99:000000000-AH9KG:1:2107:10006:2535 + local size_range=$(echo $line | cut -d'*' -f1 | cut -d'=' -f2 | tr '^' ' ') local seq_id=$(echo $line | cut -d'*' -f2 | cut -d'=' -f2) fi seq_id=$(echo $seq_id | cut -d')' -f3) @@ -58,14 +62,14 @@ for range in $(echo $size_range) ; do local lower=$(echo $range | cut -d'-' -f1) local upper=$(echo $range | cut -d'-' -f2) - if [ $lower -gt $upper ] ; then + if [ "$lower" -gt "$upper" ] ; then bad_range=yes break fi done # Report if the range is wrong if [ ! -z "$bad_range" ] ; then - echo "$seq_id ($size_range)" + echo "${seq_id}"$'\t'"(${size_range})" >>$outfile fi done }
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/illuminaPE_bad_primer_read_ids.out Mon May 14 11:10:19 2018 -0400 @@ -0,0 +1,1 @@ +M00879:99:000000000-AH9KG:1:2107:14372:5471 (74-71 77-74)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/illuminaPE_microsats_bad_ranges.out.re_match Mon May 14 11:10:19 2018 -0400 @@ -0,0 +1,7 @@ +readPairID\ Motifs\(bases\)\ Bases\ in\ all\ Motifs\ Possible\ Extended\ Possible\ Spanning\ Primers\ found\ \(1\=y\,0\=n\)\ F\ Primer\ Name\ Forward\ Primer\ R\ Primer\ Name\ Reverse\ Primer\ Amplicon\ Motifs\ Number\ motif\ bases\ in\ amplicon\ Primers\ on\ sep\ reads\ Extend\ with\ primers\ Spand\ with\ primers\ Occurances\ of\ Forward\ Primer\ in\ Reads\ Occurances\ of\ Reverse\ Primer\ in\ Reads\ Occurances\ of\ Amplifiable\ Primer\ Pair\ in\ Reads\ Occurances\ of\ Amplifiable\ Primer\ Pair\ in\ PALs +M00879\:99\:000000000\-AH9KG\:1\:2107\:10006\:2535\ AT\(16\)\ AT\(16\)\ \ 32\ AT\ \ \ 0\ \ \ \ \ \ \ \ \ \ \ \ \ +M00879\:99\:000000000\-AH9KG\:1\:2107\:10032\:7900\ .*\ \ 164\ \ \ 1\ test\_.*\ (CGAAAGATGCTATAGAAGCGATGGGG|TATCTATCTATCAATCCGCTCCCC)\ test\_.*\ (GGACATCGAGATAGAAAGGGGACCG|TGATTGGACATCGAGATAGAAAGGG)\ .*\ \ 80\ 1\ \ \ .*\ .*\ 1\ 1 +M00879\:99\:000000000\-AH9KG\:1\:2107\:10061\:6317\ .*\ \ 76\ \ \ 1\ test\_.*\ GAGAGAGTACATAGATATCTCACGGGGCG\ test\_.*\ GCAACGGCACAGATCTCTTCTACGG\ .*\ \ 22\ 1\ \ \ 1\ 1\ 1\ 1 +M00879\:99\:000000000\-AH9KG\:1\:2107\:10072\:8112\ .*\ \ 44\ \ \ 1\ test\_.*\ AGTTTGTTACAGGGCATGACAACGG\ test\_.*\ TCCTGTTATCTTCTTGTTGCTTGGC\ .*\ \ 22\ 1\ \ \ 1\ 1\ 1\ 1 +M00879\:99\:000000000\-AH9KG\:1\:2107\:10084\:6474\ .*\ \ 100\ \ \ 0\ \ \ \ \ \ \ \ \ \ \ \ \ +M00879\:99\:000000000\-AH9KG\:1\:2107\:14372\:5471\ .*\ \ 68\ .*\ \ \ 0\ \ \ \ \ \ \ \ \ \ \ \ \
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/illuminaPE_microsats_subset.out.re_match Mon May 14 11:10:19 2018 -0400 @@ -0,0 +1,6 @@ +readPairID\ Motifs\(bases\)\ Bases\ in\ all\ Motifs\ Possible\ Extended\ Possible\ Spanning\ Primers\ found\ \(1\=y\,0\=n\)\ F\ Primer\ Name\ Forward\ Primer\ R\ Primer\ Name\ Reverse\ Primer\ Amplicon\ Motifs\ Number\ motif\ bases\ in\ amplicon\ Primers\ on\ sep\ reads\ Extend\ with\ primers\ Spand\ with\ primers\ Occurances\ of\ Forward\ Primer\ in\ Reads\ Occurances\ of\ Reverse\ Primer\ in\ Reads\ Occurances\ of\ Amplifiable\ Primer\ Pair\ in\ Reads\ Occurances\ of\ Amplifiable\ Primer\ Pair\ in\ PALs +ILLUMINA\-545855\:49\:FC61RLR\:2\:1\:17449\:1584\ (AC|TG)\(36\)\ \ 36\ \ \ 0\ \ \ \ \ \ \ \ \ \ \ \ \ +ILLUMINA\-545855\:49\:FC61RLR\:2\:1\:5626\:1554\ AT\(14\)\ (AC|TG)\(16\)\ (AC|TG)\(16\)\ AT\(12\)\ \ 58\ \ \ 0\ \ \ \ \ \ \ \ \ \ \ \ \ +ILLUMINA\-545855\:49\:FC61RLR\:2\:1\:5879\:1238\ AT\(12\)\ \ 12\ \ \ 0\ \ \ \ \ \ \ \ \ \ \ \ \ +ILLUMINA\-545855\:49\:FC61RLR\:2\:1\:8157\:1636\ (AC|TG)\(12\)\ \ 12\ \ \ 1\ test\_.*\ AAGTACAGTGGGGAGGCTGG\ test\_.*\ TTTTCTACACAGCTCAAGTAGCCC\ (AC|TG)\(12\)\ \ 12\ 1\ \ \ 1\ 1\ 1\ 1 +ILLUMINA\-545855\:49\:FC61RLR\:2\:1\:8899\:1514\ (AC|TG)\(12\)\ (AC|TG)\(12\)\ \ 24\ \ \ 1\ test\_.*\ TCTTTATCTAAACACATCCTGAAATACC\ test\_.*\ AAACGCAATTATTTTGAGATGTCC\ (AC|TG)\(12\)\ (AC|TG)\(12\)\ \ 24\ 1\ \ \ 1\ 2\ 1\ 1
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/illuminaPE_r1_bad_ranges.fq Mon May 14 11:10:19 2018 -0400 @@ -0,0 +1,24 @@ +@M00879:99:000000000-AH9KG:1:2107:10006:2535 1:N:0:TAAGGCGA+TAGATCGC +ATATATATATATATATGTAGTATAATCTCACTGGAAAGGAGACGAAAACGAGCAGCTCCGAGCTTTCGACTTTATTTCAAGTCATCTTCAGGGCAACTGACAAATTTTTGTGTAGCAATAGTATATAGACACCAGACGAGATTCCTGACCTCACATCTGGGAGG ++ +CCCCCFGGGGGGGGGGGGGGGFCGGGGGGFGCG@FFGCDGGGGGGGGEGGGDGGFDCFGGGCDGGGGGGGGGGGGGGGGDGFGFGFGFFGGG@FEGGDEFGGGGGEFGGGGGGGFGFGGGGGGAFE?9FGGGGGGGG@F7F7AFDGGFFGFFGGG9EE9D8?>8 +@M00879:99:000000000-AH9KG:1:2107:10032:7900 1:N:0:TAAGGCGA+TAGATCGC +ATCTATGTATTTATCTATCTCCTCTCTCTCTCTCTCTCTCTCTCTCTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCAATCCGCTCCCCTTTCTATCTCGATGTCCAATCATTATACACACACCTACACGAAAGATGCTATAGAAGCGATGGGGGACTATAGGTGTATAGCAACTCTATACATCAACCAGTCTCTGCGCTCGTCCCCCTGTCG ++ +CCCCCGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGEGGGGGGGGGGGGGGGFGGGCFGG@FGGEFGGDGGGGGGGGGGGGGGA9EFFC,@@F@FG8FGG9?EGG,,4:?@FFAGG@EFF<B,CFBEGGD:EFGD7F+>+,4,@,9E9,@ECEEEFC+8+4>B88;=E3,,2,@A68,7=@DDGED=A8=A8FGE+@DDD,++0@+=0+<3<>095?** +@M00879:99:000000000-AH9KG:1:2107:10061:6317 1:N:0:TAAGGCGA+TAGATCGC +TATATATATATATGAGAAAACTGGTGTCGGTGAGGACTCATCAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGTACATAGATATCTCACGGGGCGAGCTGCCGCTGCTGCTGCTGCCGCGGAGGGTGCAGAATAAGACGAGGAGGTGGAGAGAGGTGTGGAAAAAGTGCCGTAGAAGAGATCTGTGCCGTCGCACAGCTCTCGTGGTGCACCTATGGGAGAGGCGG ++ +CCCC@FFFFFGGFGGGGGGGG8EFG?FFGGEGGGFGGGGGGGGGGGGGGDFGGGGGGGGGGGGGGGGGGGGFG@CFFGGGGGGGGECGGGGGGGF,9,,C,E,?,,9,5<,,:=+78B+++@,,,+++6+,8,,A55A,,+++@+87:*3*@3D79;,,,451***>B4<**6,=***4=;8*+++*/*21+0**3//CEG86+A9:1<++2**)*)2)**+629*9*715:***00*:*755*0*.00 +@M00879:99:000000000-AH9KG:1:2107:10072:8112 1:N:0:TAAGGCGA+TAGATCGC +GTGTGTGTGTGTGTGTGTGTGTTGGTTGTGAAAGTTATCTCAGCTTGTTATATGGAATGACGACTGTCAAGCTTACCAGTTTCTTACATGGGATGACAAAGGCTAGGCTTACCAACTTGTTACATGTAATGACATCGGTAAAGCTTATCAGTTTGTTACAGGGCATGACAACGGTCAAGCTTACCAGCATGATACATGTAGTGACGAAGGCCAAGCCACAAGAAGATAACAGGCAC ++ +CCCCCFGGGGFFFGGFFGFGGGGGGGGGGDFGGGCGFGGGG,FFGGFFFCDCDEFFCFGGFGCEGDCG<DGGFA@FCFCFDGFGCEFFGG,CEECFGFGFG,BAFFGGGGGGGAFEC,EFDGGGFGGGGGFGFGGGGEEFGBAFDFCGCAFGGGGEEFFGCFG+9EFGGGD88BECEGGG?EGGGG8,@EF,,@DDFA@3@:DA9CEEDE6AFGGF7D+5CEF591C:>7>CD*;F +@M00879:99:000000000-AH9KG:1:2107:14372:5471 1:N:0:TAAGGCGA+TAGATCGC +GAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAG ++ +CCCCCGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG +@M00879:99:000000000-AH9KG:1:2107:10084:6474 1:N:0:TAAGGCGA+TAGATCGC +CAATTACCTAAGTATCTGAATAGTCATTTCCCTATTAGGGGTGATCATAGTCGAACGGTAGCACTCCCGCTGGTCACGCTCTGGACCAGAGTTCAAATCTTGGCTGGTCGAGGTTTACGTGTTTTATATCTATCTATCTATCTATCTATCTATCTATCTATATATATATATATATACATATATAT ++ +CCCCCGGGGGGGFGGGGGGGGGGFGGGGGGGGGCFGGGGGGFGGGGGGGGGGGGGGG@FGGGGGFGGGGGGDGGGGGGGGGGGGGGGGGGFECCEEEFGGGGGGGG>FGG>C+BE,BF:FGFFGGGGGGGGGGGGGGGGGFGGGGGGGFGGGGFDEEGGGGFGGGCFGGGGGGGGFDFDCFCD;E
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/illuminaPE_r1_no_microsats.fq Mon May 14 11:10:19 2018 -0400 @@ -0,0 +1,32 @@ +@ILLUMINA-545855:49:FC61RLR:2:1:10979:1695 1:N:0:TCCTGA +TACTGTTTAGAATAGACTGTTCTCCCACTATATTTTGCATTGGTGCATACTCAGCTTTAGTAATAAGTGTGATTCTGGTAGAGAGAGAGAGAGATACCAACCTCTTCTTCCCACTA ++ +IIIIIHIIIGHHIIDGHIIIIIIHIIIIIIIIIIIIHGIIIIHGIIIIHHHIHHGHIIHHEHHIGHGDHDHHHGGHEGAHGDGEHEGEGBGCEDBDDGEEFDEECGDEGDEDEDE8 +@ILLUMINA-545855:49:FC61RLR:2:1:19063:1614 1:N:0:TCCTGA +TATATATATATATACACATATATATATATATTTTTTACATTATTTCACTTCGCCCAAACTAGAGAGTCTAACAAAGTACAACCCAGCATATTAAAGTTCATCTCAGTTTTGTTCTG ++ +IIIIIIIGIEIIIIGIIIHHIHIGIGIIIIIHIIIIHGBEIIIIHIFHHIHFIIIIFHFIHIIHFF>EEEBDGDD;BD8DDBBDDBGHGHHHHEFE=DBCDEEEBEBEGHGAFH@E +@ILLUMINA-545855:49:FC61RLR:2:1:1978:1220 1:N:0:TCCTGA +TCATAAGAATGAGCAGTAAACAAAGGCAAAGGGGAGATAACACACACACACAAAATAAAAAAACATCAATTTCTAATACACGCCTTTATTATAAAGAAATAAATCACTGAAAAACA ++ +DDDDDBDBDDB7?BBB5BB>1?<?88>DDD?4(6367;>?2<164=;>8<0DDD>>A6A?>;;6>+6>><>4-8484888&@D@@DD<D;D>>?<9DD;6DDC<DD;<88@##### +@ILLUMINA-545855:49:FC61RLR:2:1:5879:1238 1:N:0:TCCTGA +TCCCCACCCTGTCATGGTTCTATGTTTTTGTTTTTGTTTTTGTTTTTATGGTTTCCGTATTCCACATTAAAACCTTATGTAACGTACGGGCCAATAAATAGTTACTCGCCATATCC ++ +#################################################################################################################### +@ILLUMINA-545855:49:FC61RLR:2:1:6204:1090 1:N:0:TCCTGA +TGCTTTGGTTCTAAGAGAAAAACAATTATTATAAATGTTTATAATTGATGATAAGCATTTTTGTACAAAGCCAAGACCATTCTGAATGAAGCACCCAAAAAGCCCGGAGGCAACAA ++ +#################################################################################################################### +@ILLUMINA-545855:49:FC61RLR:2:1:8044:1926 1:N:0:TCCTGA +TAGATTTTTTTTTTTATATATATATAAATATAGATGTACATATATTTATATAAATATAAAAGCACAGCATCCTCCTGTCTCTCCTCCTGATTTATTATGGTTAAAGCTTGTGACAG ++ +HHHHHHHHHHHHHHHFHFGFHEFFDF92=@=>;;;=HHHGHHHHHHHHHHHHHHHHGHHHFHHHHFHFDHHHHHHHHHHHHHHHHGHHGGHHHHHHHHHHEHHHHGEHHHEFE>EB +@ILLUMINA-545855:49:FC61RLR:2:1:8157:1636 1:N:0:TCCTGA +TACTAGTCTAATAATTGCAGGCAGCTGAACTAGATAGGTCCTAAAGTACAGTGGGGAGGCTGGTGTGTGTGTGTGCATGGGATTGTCAGCCTTACCATCAGTCCTGATTTGTAGGT ++ +HHHHHHHHHHHHHFHHHHHGHHHHHHGHHHHGHHHHHH>HHGGGGFFHHHHGHGHHFHHHHGGBGDCAC>CBDBDDCFGED@BDDB@BBBBECCD@<CC>C=?9==@CECB=BBB8 +@ILLUMINA-545855:49:FC61RLR:2:1:8899:1514 1:N:0:TCCTGA +TCTTTATCTAAACACATCCTGAAATACCATCTGTTACACACACACACAGCAGTGGAAGTATAAAAAAAAATCTGGACATCTCAAAATAATTGCGTTTCTGAAGTGTTACATTTTTC ++ +IIIIIHIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIGIIIIIGIIIIIIIIIIIIIIHHGIIIIHIHHHHIHHHHHHHGHHHHHGFHEHEHHHHHHHII>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/illuminaPE_r2_bad_ranges.fq Mon May 14 11:10:19 2018 -0400 @@ -0,0 +1,24 @@ +@M00879:99:000000000-AH9KG:1:2107:10006:2535 2:N:0:TAAGGCGA+TAGATCGC +CCTCCCAGATGTGAGGTCAGGAATCTCGTCTGGTGTCTATATACTATTGCTACACAAAAATTTGTCAGTTGCCCTGAAGATGACTTGAAATAAAGTCGAAAGCTCGGAGCTGCTCGTTTTCGTCTCCTTTCCAGTGAGATTATACTACATATATATATATATAT ++ +CCCCCGGGGGFGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG8FFFGGGGGGFGGGGGGGCEFFGGGGGFGGGGGGGGGGGGGGGFGFCEGGFGG8EFEFDFGEFGFFGGCFGGFAFF,9AFDGGGGGGGGGGGGGGEADDFGFEA +@M00879:99:000000000-AH9KG:1:2107:10032:7900 2:N:0:TAAGGCGA+TAGATCGC +CTACAGGGGGCCGAGCACAGAGACTGGTTGATGTATAGAGTTGCTATACTCCTATAGTCCCCCATCGCTTCTATAGCATCTTTCGTGTAGGTGTGTCTATAATGATTGGACATCGAGATAGAAAGGGGACCGGATTGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGAGAGAGAGAGAGAGAGAGAGAGCGAGGAGATAGATAAATACATCGAT ++ +CCCCCGGGGDGGGEGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGFGGGGGGGGGGGGGGGGGGGCGFGGGGGGGGGGGGGGGGGGGGGGCGGGGGGGGGFFGGGFGDGGGFGFGGGGGGGGGGEGFFGFFCGFGGGCFG9FFGGGGFCFGGGGGGGGG8DAFGFFGFGGDGDFGFCFFD72DAFF>?BCFFF5>9A>DFFFFFF*):494?>BFEE4<*9*@:=?*-(- +@M00879:99:000000000-AH9KG:1:2107:10061:6317 2:N:0:TAAGGCGA+TAGATCGC +GCCTCGGGACCTTGGCACGCCGCCTCTCCCATAGATGCACCATGAGATCCGTGCAACGGCACAGATCTCTTCTACGGAACTTTTGCCACACCTCACTTCACCTTCTTGCCTTATTCTGCACCACCTGCAGCAGCAGCAGCAGCAGCAGCTCGCCCCGAGTTACATCTATGTACTCTCTCCCTCTCTCTCTCTCTCTCTCTCTCCCTCTCTCTCTCCCTCCCTCTCCTCTCACCCCACACTCACACCCGC ++ +CCCCCFGGGGGGGFFGGGGGGGGGGGGGGGGGGFGGGFEGGGGEGFGGGGGGGGGGGEGGGGGGGGGCGFFGFGGFG@FGGFEF9FGGGCGGEFEFGGGDGG@FE?EEFGGGGG,EE,EFGGFGGFDG,@FFFFG8D8=E8>EEEGC=D=D6CEGC61C=8:ECFG8AA9<2CCC<C?C+=0==DAF9C7;;@0;0@09*@*9*)3;):/;.7.>*7):(54>3-(0(*(0*-(511(6(/(6)6/(,( +@M00879:99:000000000-AH9KG:1:2107:10072:8112 2:N:0:TAAGGCGA+TAGATCGC +GTTCCTGTTATCTTCTTGTTGCTTGGCCTTCGTCACTACATGTAACATGCTGGTAAGCTTGACCGTTGTCATTCCCTGTAACAAACTGATAAGCTTTACCGATGTCATTACATGTAACCAGTTGGTAAGCCTAGCCTTTGTCATCCCATGTAAGAAACTGGTAAGCTTGACAGTCGTCATTCCATATAACAAGCTGAGATAACTTTCACAACCAACACACACACACACACACACACCTGTCTCCTATAC ++ +CCCCCGGGGGGGGGGGGFFGFGGGG7FFGGGGGGGGGGGGGFGFFFAEGGGCFFG@AGGGC,EFGGGGGGGG@EEGGGGFGGFGGGFGGFGCFGGGGGGGGCEEGGCFEFGDEFGGFG,CFFFEGGGDG9EFFFGGGGFAFGGGGG84E=EFGGG;AGDDFFFGGFGG8=DAFGFG=D88FFG@9D@@FDD+;D56D?FFFFFFD=7*;2:)=855)=DF=>=AAD==@DDA)=@@5)):)3;9A***9 +@M00879:99:000000000-AH9KG:1:2107:14372:5471 2:N:0:TAAGGCGA+TAGATCGC +CTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTC ++ +ACCCCFGE<FFFGGGGCFFFEF@EGGGGGGGGGGG +@M00879:99:000000000-AH9KG:1:2107:10084:6474 2:N:0:TAAGGCGA+TAGATCGC +ATATATATGTATATATATATATATATAGATAGATAGATAGATAGATAGATAGATAGATATAAAACACGTAAACCTCGACCAGCCAAGATTTGAACTCTGGTCCAGAGCGTGACCAGCGGGAGTGCTACCGTTCGACTATGATCACCCCTAATAGGGAACTGACTATTCAGATACTTAGGTAATTG ++ +CCCCCGGGGGGGGGGGGGGGGGGGCFGFGGGCGGGFGFGFGCFGGFGGGGGGGGGGGGGGGGGGGGFCFFFGGGG<FEFGGFCEFGGGGGGFFAGFGGCCFGGEDGFDGGGGGGGGGG7@FG=AEFGGGGDGC8EGGGGFFEFEGGGGGGGDCF8@FE+==AF9=FFGGFGDGFDA=?DDG+3?9
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/illuminaPE_r2_no_microsats.fq Mon May 14 11:10:19 2018 -0400 @@ -0,0 +1,32 @@ +@ILLUMINA-545855:49:FC61RLR:2:1:10979:1695 2:N:0:TCCTGA +TACTGTTTAGAAAGCCTGTTCCAGAACTTGATCACTGTCACAGAAAATCTTTCTTACTATCCAGACTGAAGCTACCCTGGTGCAGCTTTGTGCTGTTACCTTGAGTCATGTCATCA ++ +IIIIIHIIIIIHIIIIHIHIIIIIIIHIIIHIIIHIHGIIIIIIHEHHHHHHHHIIHIIHFHHEHHIGHHGHGIHGHHFGAGIEHHGEGHBFIFDBHHHGDFHBDBHGGFGD8EB@ +@ILLUMINA-545855:49:FC61RLR:2:1:19063:1614 2:N:0:TCCTGA +TATATATATATATAAACATATATATATATATTTTTTTCTCATTTCAGAACAAAAGTGAGATGAACTTTAATATGGTGGGGTGTATTTTGAGAGACTCTCTAGTTTGGGAGGAGTGA ++ +DDDDDDDDDDDD:D@D+DDDDDDDDDDDD6DDDDD>A@:5>@########################################################################## +@ILLUMINA-545855:49:FC61RLR:2:1:1978:1220 2:N:0:TCCTGA +TCCTCTGACTAGGCAACAACAGCTTTTTTGCTCCTGGGCAGAGGTGTTCCGAGTGTATATTTTTTATAATTACGGCGCGCATTGGAAATTGATGTTATTTTATTTTGCGTGTGTGT ++ +B?/?################################################################################################################ +@ILLUMINA-545855:49:FC61RLR:2:1:5879:1238 2:N:0:TCCTGA +TCCCCACCCTGTCATGGTTCTATGTATATATATATAGCCATGTGTGTGGTACCAGGGATAGGTACCTGGGATTGGGGCAGTGACACTTTAGTGCCCCGTACACTACATGATGTTTT ++ +HHHHHHHHHHHHHHHHHEHHGGGGFDHGHGHHHHHHFHHHHHGHEHEHEBEHEFB8EEEFEDAGEDBCEBBB@>BEC>@B@DCBBBECBB<AECBBBC>BA>B<;BA@A@###### +@ILLUMINA-545855:49:FC61RLR:2:1:6204:1090 2:N:0:TCCTGA +TGCTTTGGTTCTAAGAGAAAAACAAGTGATGCACAAGCAATTCCTCGCCACCACCCAACTGATGCCCAGCCACCCCCCCAAGCAGTGAAAGAGAGAGAGAGATGAACCCCCTTCAA ++ +HHHDBHHHEFGHHHHEHFHEHDDDDD@HHEHHEEHEFFEEEGDEGGGGEGEB>EBC>@@@@@@BB@FEBFBBB@A>AAA<;>A>;3>=??>>BB>?>@?1?>:9*@########## +@ILLUMINA-545855:49:FC61RLR:2:1:8044:1926 2:N:0:TCCTGA +TCAGGCAAGGTCACTGCCACCACTGGGGAGTGCCTGTTTCTGAAGGGCCCAGCCAACTCTGTCACAAGCTTTAACCATAATAAATCAGGAGGAGAGACAGGAGGATGCTGTGCTTT ++ +IIIIIIIIIIHIIIIIIIIIIIIIIGIIIIFIIIIIGHHIIIIIGGEIHGHGHHHGGCHHCGGGGGGHGHGEGEGEGGGDGBECCBGGEDGB;8?BBBBDABC@:3==;=:<30:6 +@ILLUMINA-545855:49:FC61RLR:2:1:8157:1636 2:N:0:TCCTGA +TAAACAACCAAATGAAACCATCTTTTCTACACAGCTCAAGTAGCCCTACATACAACACAAGCCACCTACAAATCAGGACTGATGGTAAGGCTGACAATCCAATCCACCACAACAAC ++ +HFHHHHHHHHHHHHDHGHHDHHHHHHHHHHHHHHHHHHGGGHHGHHHHHHHHHHHGHHHHH@HHHFHGHFHHEHHHHHHDHBFEBBHFFDHE>EFHBEFD################ +@ILLUMINA-545855:49:FC61RLR:2:1:8899:1514 2:N:0:TCCTGA +TATCATTGAAATTTTTATAAAAACTGTGAAGAGAAAAATGTAACACTTCAGAAACGCAATTATTTTGAGATGTCCAGATTTTTTTTTATACTTCCACTGCTGTGTGTGTGTGTAAC ++ +IG+GGGIIIIIIIIIIDIIIIIIGHHIIHIIIIIEGHIHIHIIHIIIIIIIIIIIIIIHIIDIIIIIEDIIIDIHGHFIIIIIIIIIHIFFBHGIGBGGHBDBFEGEGCGEIEDEB