changeset 14:3f8bf1a0403b draft

Uploaded version with bad primer ranger detection (WIP).
author pjbriggs
date Thu, 22 Mar 2018 07:21:26 -0400
parents 88c972081f15
children a3af1ff4cad1
files README.rst pal_finder_wrapper.sh pal_finder_wrapper.xml pal_finder_wrapper_utils.sh
diffstat 4 files changed, 166 insertions(+), 43 deletions(-) [+]
line wrap: on
line diff
--- a/README.rst	Thu Mar 15 11:52:19 2018 -0400
+++ b/README.rst	Thu Mar 22 07:21:26 2018 -0400
@@ -61,6 +61,9 @@
 Version    Changes
 ---------- ----------------------------------------------------------------------
 
+0.02.04.7  - Trap for errors in ``pal_finder_v0.02.04.pl`` resulting in bad
+             ranges being supplied to ``primer3_core`` for some reads via
+             ``PRIMER_PRODUCT_RANGE_SIZE``.
 0.02.04.6  - Update to get dependencies using ``conda`` when installed from the
              toolshed (this removes the explicit dependency on Perl 5.16
              introduced in 0.02.04.2, as a result the outputs from the tool are
--- a/pal_finder_wrapper.sh	Thu Mar 15 11:52:19 2018 -0400
+++ b/pal_finder_wrapper.sh	Thu Mar 22 07:21:26 2018 -0400
@@ -26,7 +26,8 @@
 # --primer-opt-tm VALUE: optimum melting temperature (Celsius)
 # --primer-pair-max-diff-tm VALUE: max difference between melting temps of left & right primers
 # --output_config_file FNAME: write a copy of the config.txt file to FNAME
-# --filter_microsats FNAME: write output of filter options FNAME
+# --bad_primer_ranges FNAME: write a list of the read IDs generating bad primer ranges to FNAME
+# --filter_microsats FNAME: write output of filter options to FNAME
 # -assembly FNAME: run the 'assembly' filter option and write to FNAME
 # -primers: run the 'primers' filter option
 # -occurrences: run the 'occurrences' filter option
@@ -53,6 +54,9 @@
 # Maximum size reporting log file contents
 MAX_LINES=500
 #
+# Get helper functions
+. $(dirname $0)/pal_finder_wrapper_utils.sh
+#
 # Initialise locations of scripts, data and executables
 #
 # Set these in the environment to overide at execution time
@@ -63,31 +67,18 @@
 # Filter script is in the same directory as this script
 PALFINDER_FILTER=$(dirname $0)/pal_filter.py
 if [ ! -f $PALFINDER_FILTER ] ; then
-    echo No $PALFINDER_FILTER script >&2
-    exit 1
+    fatal No $PALFINDER_FILTER script
 fi
 #
 # Check that we have all the components
-function have_program() {
-    local program=$1
-    local got_program=$(which $program 2>&1 | grep "no $(basename $program) in")
-    if [ -z "$got_program" ] ; then
-	echo yes
-    else
-	echo no
-    fi	
-}
 if [ "$(have_program $PRIMER3_CORE_EXE)" == "no" ] ; then
-    echo "ERROR primer3_core missing: ${PRIMER3_CORE_EXE} not found" >&2
-    exit 1
+    fatal "primer3_core missing: ${PRIMER3_CORE_EXE} not found"
 fi
 if [ ! -f "${PALFINDER_DATA_DIR}/config.txt" ] ; then
-    echo "ERROR pal_finder config.txt not found in ${PALFINDER_DATA_DIR}" >&2
-    exit 1
+    fatal "pal_finder config.txt not found in ${PALFINDER_DATA_DIR}"
 fi
 if [ ! -f "${PALFINDER_SCRIPT_DIR}/pal_finder_v0.02.04.pl" ] ; then
-    echo "ERROR pal_finder_v0.02.04.pl not found in ${PALFINDER_SCRIPT_DIR}" >&2
-    exit 1
+    fatal "pal_finder_v0.02.04.pl not found in ${PALFINDER_SCRIPT_DIR}"
 fi
 #
 # Initialise parameters used in the config.txt file
@@ -113,12 +104,13 @@
 OUTPUT_ASSEMBLY=
 FILTERED_MICROSATS=
 FILTER_OPTIONS=
+BAD_PRIMER_RANGES=
 #
 # Collect command line arguments
 if [ $# -lt 2 ] ; then
   echo "Usage: $0 FASTQ_R1 FASTQ_R2 MICROSAT_SUMMARY PAL_SUMMARY [OPTIONS]"
   echo "       $0 --454    FASTA    MICROSAT_SUMMARY PAL_SUMMARY [OPTIONS]"
-  exits
+  fatal "Bad command line"
 fi
 if [ "$1" == "--454" ] ; then
     PLATFORM="454"
@@ -212,6 +204,10 @@
 	    shift
 	    OUTPUT_CONFIG_FILE=$1
 	    ;;
+	--bad_primer_ranges)
+	    shift
+	    BAD_PRIMER_RANGES=$1
+	    ;;
 	--filter_microsats)
 	    shift
 	    FILTERED_MICROSATS=$1
@@ -235,16 +231,14 @@
 # Check that primer3_core is available
 got_primer3=`which $PRIMER3_CORE_EXE 2>&1 | grep -v "no primer3_core in"`
 if [ -z "$got_primer3" ] ; then
-  echo ERROR primer3_core not found >&2
-  exit 1
+  fatal "primer3_core not found"
 fi
 #
 # Set up the working dir
 if [ "$PLATFORM" == "Illumina" ] ; then
     # Paired end Illumina data as input
     if [ $FASTQ_R1 == $FASTQ_R2 ] ; then
-	echo ERROR R1 and R2 fastqs are the same file >&2
-	exit 1
+	fatal ERROR R1 and R2 fastqs are the same file
     fi
     ln -s $FASTQ_R1
     ln -s $FASTQ_R2
@@ -264,17 +258,6 @@
 /bin/cp $PALFINDER_DATA_DIR/config.txt .
 #
 # Update the config.txt file with new values
-function set_config_value() {
-    local key=$1
-    local value=$2
-    local config_txt=$3
-    if [ -z "$value" ] ; then
-	echo "No value for $key, left as default"
-    else
-	echo Setting "$key" to "$value"
-	sed -i 's,^'"$key"' .*,'"$key"'  '"$value"',' $config_txt
-    fi
-}
 # Input files
 set_config_value platform $PLATFORM config.txt
 if [ "$PLATFORM" == "Illumina" ] ; then
@@ -299,6 +282,7 @@
 # Primer3 settings
 set_config_value primer3input Output/pr3in.txt config.txt
 set_config_value primer3output Output/pr3out.txt config.txt
+set_config_value keepPrimer3files 1 config.txt
 set_config_value primer3executable $PRIMER3_CORE_EXE config.txt
 set_config_value prNamePrefix ${PRIMER_PREFIX}_ config.txt
 set_config_value PRIMER_MISPRIMING_LIBRARY "$PRIMER_MISPRIMING_LIBRARY" config.txt
@@ -329,8 +313,33 @@
 #
 # Check that log ends with "Done!!" message
 if [ -z "$(tail -n 1 pal_finder.log | grep Done!!)" ] ; then
-    echo ERROR pal_finder failed to complete successfully >&2
-    exit 1
+    fatal ERROR pal_finder failed to complete successfully
+fi
+echo "### pal_finder finished ###"
+#
+# Check for errors in pal_finder output
+echo "### Checking for errors ###"
+if [ ! -z "$(grep 'primer3_core: Illegal element in PRIMER_PRODUCT_SIZE_RANGE' pal_finder.log)" ] ; then
+    echo WARNING primer3 terminated prematurely due to bad product size ranges
+    if [ -z "$BAD_PRIMER_RANGES" ] ; then
+	# No output file so report to stderr
+	cat >&2 <<EOF
+ERROR primer3 terminated prematurely due to bad product size ranges
+
+Pal_finder generated bad ranges for the following read IDs:
+EOF
+	echo $(find_bad_primer_ranges Output/pr3in.txt) >&2
+	cat >&2 <<EOF
+
+This error can occur when input data contains short R1 reads and has
+has not been properly trimmed and filtered.
+
+EOF
+    else
+	# Dump bad ranges to file
+	echo "### Writing read IDs with bad primer ranges ###"
+	echo $(find_bad_primer_ranges Output/pr3in.txt) >"$BAD_PRIMER_RANGES"
+    fi
 fi
 #
 # Sort microsat_summary output
@@ -362,11 +371,9 @@
     fi
     tail -$MAX_LINES pal_filter.log
     if [ $? -ne 0 ] ; then
-	echo ERROR $PALFINDER_FILTER exited with non-zero status >&2
-	exit 1
+	fatal $PALFINDER_FILTER exited with non-zero status
     elif [ ! -f PAL_summary.filtered ] ; then
-	echo ERROR no output from $PALFINDER_FILTER >&2
-	exit 1
+	fatal no output from $PALFINDER_FILTER
     fi
 fi
 #
@@ -386,8 +393,7 @@
     if [ -f "$assembly" ] ; then
 	/bin/mv $assembly "$OUTPUT_ASSEMBLY"
     else
-	echo ERROR no assembly output found >&2
-	exit 1
+	fatal no assembly output found
     fi
 fi
 if [ ! -z "$OUTPUT_CONFIG_FILE" ] && [ -f config.txt ] ; then
--- a/pal_finder_wrapper.xml	Thu Mar 15 11:52:19 2018 -0400
+++ b/pal_finder_wrapper.xml	Thu Mar 22 07:21:26 2018 -0400
@@ -1,4 +1,4 @@
-<tool id="microsat_pal_finder" name="pal_finder" version="0.02.04.6">
+<tool id="microsat_pal_finder" name="pal_finder" version="0.02.04.7">
   <description>Find microsatellite repeat elements from sequencing reads and design PCR primers to amplify them</description>
   <macros>
     <import>pal_finder_macros.xml</import>
@@ -26,6 +26,9 @@
     --454 "$platform.input_fasta"
   #end if
   $output_microsat_summary $output_pal_summary
+  #if $report_bad_primer_ranges
+    --bad_primer_ranges "$output_bad_primer_read_ids"
+  #end if
   #if $keep_config_file
     --output_config_file "$output_config_file"
   #end if
@@ -156,6 +159,7 @@
 	       help="Temperature should be in degrees Celsius" />
       </when>
     </conditional>
+    <param name="report_bad_primer_ranges" type="boolean" truevalue="True" falsevalue="False" label="Output IDs for input reads which generate bad primer ranges" help="Can be used to screen input Fastqs" />
     <param name="keep_config_file" type="boolean" truevalue="True" falsevalue="False"
 	   label="Output the config file to the history"
 	   help="Can be used to run pal_finder outside of Galaxy" />
@@ -169,6 +173,9 @@
     <data name="output_assembly" format="tabular" label="${tool.name} on ${on_string} for ${primer_prefix}: assembly">
       <filter>platform['assembly'] is True</filter>
     </data>
+    <data name="output_bad_primer_read_ids" format="tabular" label="${tool.name} on ${on_string} for ${primer_prefix}: read IDs generating bad primer ranges">
+      <filter>report_bad_primer_ranges is True</filter>
+    </data>
     <data name="output_config_file" format="txt" label="${tool.name} on ${on_string} for ${primer_prefix}: config file">
       <filter>keep_config_file is True</filter>
     </data>
@@ -247,6 +254,19 @@
       <output name="output_pal_summary" compare="re_match" file="illuminaPE_microsats.out.re_match" />
       <output name="output_filtered_microsats" compare="re_match" file="illuminaPE_filtered_microsats_rankmotifs.out.re_match" />
     </test>
+    <!-- Test with Illumina input generating bad primer ranges
+    -->
+    <test>
+      <param name="platform_type" value="illumina" />
+      <param name="filters" value="" />
+      <param name="assembly" value="false" />
+      <param name="input_fastq_r1" value="illuminaPE_r1.fq" ftype="fastqsanger" />
+      <param name="input_fastq_r2" value="illuminaPE_r2.fq" ftype="fastqsanger" />
+      <param name="output_bad_primer_read_ids" value="true" />
+      <expand macro="output_illumina_microsat_summary" />
+      <output name="output_pal_summary" compare="re_match" file="illuminaPE_microsats.out.re_match" />
+      <output name="output_bad_primer_read_ids" file="illuminaPE_bad_primer_ids.out" />
+    </test>
     <!-- Test with 454 input -->
     <test>
       <param name="platform_type" value="454" />
@@ -280,6 +300,29 @@
 
 -------------
 
+.. class:: warning
+
+**Known problems**
+
+.. class:: infomark
+
+**Bad primer product size ranges**
+
+For some datasets pal_finder may generate 'bad' product size ranges (where the
+lower limit exceeds the upper limit) for one or more reads, for input into
+primer3_core.
+
+If this occurs then the tool will terminate with an error. A list of the reads
+for which the bad ranges were generated can be found in the error message
+which can be accessed via the 'bug' icon from a failed dataset.
+
+The conditions which cause this error are unclear. However we believe it to be
+associated with short or low quality reads. It is recommended that the input
+data are sufficiently trimmed and filtered (using e.g. the Trimmomatic tool)
+before rerunning pal_finder.
+
+-------------
+
 .. class:: infomark
 
 **Credits**
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pal_finder_wrapper_utils.sh	Thu Mar 22 07:21:26 2018 -0400
@@ -0,0 +1,71 @@
+#!/bin/bash
+#
+# Helper functions for the pal_finder_wrapper.sh script
+#
+# Utility function for terminating on fatal error
+function fatal() {
+    echo "FATAL $@" >&2
+    exit 1
+}
+#
+# Check that specified program is available
+function have_program() {
+    local program=$1
+    local got_program=$(which $program 2>&1 | grep "no $(basename $program) in")
+    if [ -z "$got_program" ] ; then
+	echo yes
+    else
+	echo no
+    fi
+}
+#
+# Set the value for a parameter in the pal_finder config file
+function set_config_value() {
+    local key=$1
+    local value=$2
+    local config_txt=$3
+    if [ -z "$value" ] ; then
+       echo "No value for $key, left as default"
+    else
+       echo Setting "$key" to "$value"
+       sed -i 's,^'"$key"' .*,'"$key"'  '"$value"',' $config_txt
+    fi
+}
+#
+# Identify 'bad' PRIMER_PRODUCT_SIZE_RANGE from pr3in.txt file
+function find_bad_primer_ranges() {
+    # Parses a pr3in.txt file from pal_finder and reports
+    # sequence ids where the PRIMER_PRODUCT_SIZE_RANGE has
+    # upper limit which is smaller than lower limit
+    local pr3in=$1
+    local pattern="^(SEQUENCE_ID|PRIMER_PRODUCT_SIZE_RANGE)"
+    for line in $(grep -E "$pattern" $pr3in | sed 's/ /^/' | sed 'N;s/\n/*/')
+    do
+	# Loop over pairs of SEQUENCE_ID and PRIMER_PRODUCT_SIZE_RANGE
+	# keywords in the primer3 input
+	if [ ! -z "$(echo $line | grep ^SEQUENCE_ID)" ] ; then
+	    # Extract the values
+	    local size_range=$(echo $line | cut -d'*' -f2 | cut -d'=' -f2 | tr '^' ' ')
+	    local seq_id=$(echo $line | cut -d'*' -f1 | cut -d'=' -f2)
+	else
+	    local size_range=$(echo $line | cut -d'*' -f1 | cut -d'=' -f2)
+	    local seq_id=$(echo $line | cut -d'*' -f2 | cut -d'=' -f2)
+	fi
+	seq_id=$(echo $seq_id | cut -d')' -f3)
+	# Check the upper and lower limits in each range
+	# to see if it's okay
+	local bad_range=
+	for range in $(echo $size_range) ; do
+	    local lower=$(echo $range | cut -d'-' -f1)
+	    local upper=$(echo $range | cut -d'-' -f2)
+	    if [ $lower -gt $upper ] ; then
+		bad_range=yes
+		break
+	    fi
+	done
+	# Report if the range is wrong
+	if [ ! -z "$bad_range" ] ; then
+	    echo "$seq_id ($size_range)"
+	fi
+    done
+}