Mercurial > repos > pjbriggs > pal_finder
changeset 14:3f8bf1a0403b draft
Uploaded version with bad primer ranger detection (WIP).
author | pjbriggs |
---|---|
date | Thu, 22 Mar 2018 07:21:26 -0400 |
parents | 88c972081f15 |
children | a3af1ff4cad1 |
files | README.rst pal_finder_wrapper.sh pal_finder_wrapper.xml pal_finder_wrapper_utils.sh |
diffstat | 4 files changed, 166 insertions(+), 43 deletions(-) [+] |
line wrap: on
line diff
--- a/README.rst Thu Mar 15 11:52:19 2018 -0400 +++ b/README.rst Thu Mar 22 07:21:26 2018 -0400 @@ -61,6 +61,9 @@ Version Changes ---------- ---------------------------------------------------------------------- +0.02.04.7 - Trap for errors in ``pal_finder_v0.02.04.pl`` resulting in bad + ranges being supplied to ``primer3_core`` for some reads via + ``PRIMER_PRODUCT_RANGE_SIZE``. 0.02.04.6 - Update to get dependencies using ``conda`` when installed from the toolshed (this removes the explicit dependency on Perl 5.16 introduced in 0.02.04.2, as a result the outputs from the tool are
--- a/pal_finder_wrapper.sh Thu Mar 15 11:52:19 2018 -0400 +++ b/pal_finder_wrapper.sh Thu Mar 22 07:21:26 2018 -0400 @@ -26,7 +26,8 @@ # --primer-opt-tm VALUE: optimum melting temperature (Celsius) # --primer-pair-max-diff-tm VALUE: max difference between melting temps of left & right primers # --output_config_file FNAME: write a copy of the config.txt file to FNAME -# --filter_microsats FNAME: write output of filter options FNAME +# --bad_primer_ranges FNAME: write a list of the read IDs generating bad primer ranges to FNAME +# --filter_microsats FNAME: write output of filter options to FNAME # -assembly FNAME: run the 'assembly' filter option and write to FNAME # -primers: run the 'primers' filter option # -occurrences: run the 'occurrences' filter option @@ -53,6 +54,9 @@ # Maximum size reporting log file contents MAX_LINES=500 # +# Get helper functions +. $(dirname $0)/pal_finder_wrapper_utils.sh +# # Initialise locations of scripts, data and executables # # Set these in the environment to overide at execution time @@ -63,31 +67,18 @@ # Filter script is in the same directory as this script PALFINDER_FILTER=$(dirname $0)/pal_filter.py if [ ! -f $PALFINDER_FILTER ] ; then - echo No $PALFINDER_FILTER script >&2 - exit 1 + fatal No $PALFINDER_FILTER script fi # # Check that we have all the components -function have_program() { - local program=$1 - local got_program=$(which $program 2>&1 | grep "no $(basename $program) in") - if [ -z "$got_program" ] ; then - echo yes - else - echo no - fi -} if [ "$(have_program $PRIMER3_CORE_EXE)" == "no" ] ; then - echo "ERROR primer3_core missing: ${PRIMER3_CORE_EXE} not found" >&2 - exit 1 + fatal "primer3_core missing: ${PRIMER3_CORE_EXE} not found" fi if [ ! -f "${PALFINDER_DATA_DIR}/config.txt" ] ; then - echo "ERROR pal_finder config.txt not found in ${PALFINDER_DATA_DIR}" >&2 - exit 1 + fatal "pal_finder config.txt not found in ${PALFINDER_DATA_DIR}" fi if [ ! -f "${PALFINDER_SCRIPT_DIR}/pal_finder_v0.02.04.pl" ] ; then - echo "ERROR pal_finder_v0.02.04.pl not found in ${PALFINDER_SCRIPT_DIR}" >&2 - exit 1 + fatal "pal_finder_v0.02.04.pl not found in ${PALFINDER_SCRIPT_DIR}" fi # # Initialise parameters used in the config.txt file @@ -113,12 +104,13 @@ OUTPUT_ASSEMBLY= FILTERED_MICROSATS= FILTER_OPTIONS= +BAD_PRIMER_RANGES= # # Collect command line arguments if [ $# -lt 2 ] ; then echo "Usage: $0 FASTQ_R1 FASTQ_R2 MICROSAT_SUMMARY PAL_SUMMARY [OPTIONS]" echo " $0 --454 FASTA MICROSAT_SUMMARY PAL_SUMMARY [OPTIONS]" - exits + fatal "Bad command line" fi if [ "$1" == "--454" ] ; then PLATFORM="454" @@ -212,6 +204,10 @@ shift OUTPUT_CONFIG_FILE=$1 ;; + --bad_primer_ranges) + shift + BAD_PRIMER_RANGES=$1 + ;; --filter_microsats) shift FILTERED_MICROSATS=$1 @@ -235,16 +231,14 @@ # Check that primer3_core is available got_primer3=`which $PRIMER3_CORE_EXE 2>&1 | grep -v "no primer3_core in"` if [ -z "$got_primer3" ] ; then - echo ERROR primer3_core not found >&2 - exit 1 + fatal "primer3_core not found" fi # # Set up the working dir if [ "$PLATFORM" == "Illumina" ] ; then # Paired end Illumina data as input if [ $FASTQ_R1 == $FASTQ_R2 ] ; then - echo ERROR R1 and R2 fastqs are the same file >&2 - exit 1 + fatal ERROR R1 and R2 fastqs are the same file fi ln -s $FASTQ_R1 ln -s $FASTQ_R2 @@ -264,17 +258,6 @@ /bin/cp $PALFINDER_DATA_DIR/config.txt . # # Update the config.txt file with new values -function set_config_value() { - local key=$1 - local value=$2 - local config_txt=$3 - if [ -z "$value" ] ; then - echo "No value for $key, left as default" - else - echo Setting "$key" to "$value" - sed -i 's,^'"$key"' .*,'"$key"' '"$value"',' $config_txt - fi -} # Input files set_config_value platform $PLATFORM config.txt if [ "$PLATFORM" == "Illumina" ] ; then @@ -299,6 +282,7 @@ # Primer3 settings set_config_value primer3input Output/pr3in.txt config.txt set_config_value primer3output Output/pr3out.txt config.txt +set_config_value keepPrimer3files 1 config.txt set_config_value primer3executable $PRIMER3_CORE_EXE config.txt set_config_value prNamePrefix ${PRIMER_PREFIX}_ config.txt set_config_value PRIMER_MISPRIMING_LIBRARY "$PRIMER_MISPRIMING_LIBRARY" config.txt @@ -329,8 +313,33 @@ # # Check that log ends with "Done!!" message if [ -z "$(tail -n 1 pal_finder.log | grep Done!!)" ] ; then - echo ERROR pal_finder failed to complete successfully >&2 - exit 1 + fatal ERROR pal_finder failed to complete successfully +fi +echo "### pal_finder finished ###" +# +# Check for errors in pal_finder output +echo "### Checking for errors ###" +if [ ! -z "$(grep 'primer3_core: Illegal element in PRIMER_PRODUCT_SIZE_RANGE' pal_finder.log)" ] ; then + echo WARNING primer3 terminated prematurely due to bad product size ranges + if [ -z "$BAD_PRIMER_RANGES" ] ; then + # No output file so report to stderr + cat >&2 <<EOF +ERROR primer3 terminated prematurely due to bad product size ranges + +Pal_finder generated bad ranges for the following read IDs: +EOF + echo $(find_bad_primer_ranges Output/pr3in.txt) >&2 + cat >&2 <<EOF + +This error can occur when input data contains short R1 reads and has +has not been properly trimmed and filtered. + +EOF + else + # Dump bad ranges to file + echo "### Writing read IDs with bad primer ranges ###" + echo $(find_bad_primer_ranges Output/pr3in.txt) >"$BAD_PRIMER_RANGES" + fi fi # # Sort microsat_summary output @@ -362,11 +371,9 @@ fi tail -$MAX_LINES pal_filter.log if [ $? -ne 0 ] ; then - echo ERROR $PALFINDER_FILTER exited with non-zero status >&2 - exit 1 + fatal $PALFINDER_FILTER exited with non-zero status elif [ ! -f PAL_summary.filtered ] ; then - echo ERROR no output from $PALFINDER_FILTER >&2 - exit 1 + fatal no output from $PALFINDER_FILTER fi fi # @@ -386,8 +393,7 @@ if [ -f "$assembly" ] ; then /bin/mv $assembly "$OUTPUT_ASSEMBLY" else - echo ERROR no assembly output found >&2 - exit 1 + fatal no assembly output found fi fi if [ ! -z "$OUTPUT_CONFIG_FILE" ] && [ -f config.txt ] ; then
--- a/pal_finder_wrapper.xml Thu Mar 15 11:52:19 2018 -0400 +++ b/pal_finder_wrapper.xml Thu Mar 22 07:21:26 2018 -0400 @@ -1,4 +1,4 @@ -<tool id="microsat_pal_finder" name="pal_finder" version="0.02.04.6"> +<tool id="microsat_pal_finder" name="pal_finder" version="0.02.04.7"> <description>Find microsatellite repeat elements from sequencing reads and design PCR primers to amplify them</description> <macros> <import>pal_finder_macros.xml</import> @@ -26,6 +26,9 @@ --454 "$platform.input_fasta" #end if $output_microsat_summary $output_pal_summary + #if $report_bad_primer_ranges + --bad_primer_ranges "$output_bad_primer_read_ids" + #end if #if $keep_config_file --output_config_file "$output_config_file" #end if @@ -156,6 +159,7 @@ help="Temperature should be in degrees Celsius" /> </when> </conditional> + <param name="report_bad_primer_ranges" type="boolean" truevalue="True" falsevalue="False" label="Output IDs for input reads which generate bad primer ranges" help="Can be used to screen input Fastqs" /> <param name="keep_config_file" type="boolean" truevalue="True" falsevalue="False" label="Output the config file to the history" help="Can be used to run pal_finder outside of Galaxy" /> @@ -169,6 +173,9 @@ <data name="output_assembly" format="tabular" label="${tool.name} on ${on_string} for ${primer_prefix}: assembly"> <filter>platform['assembly'] is True</filter> </data> + <data name="output_bad_primer_read_ids" format="tabular" label="${tool.name} on ${on_string} for ${primer_prefix}: read IDs generating bad primer ranges"> + <filter>report_bad_primer_ranges is True</filter> + </data> <data name="output_config_file" format="txt" label="${tool.name} on ${on_string} for ${primer_prefix}: config file"> <filter>keep_config_file is True</filter> </data> @@ -247,6 +254,19 @@ <output name="output_pal_summary" compare="re_match" file="illuminaPE_microsats.out.re_match" /> <output name="output_filtered_microsats" compare="re_match" file="illuminaPE_filtered_microsats_rankmotifs.out.re_match" /> </test> + <!-- Test with Illumina input generating bad primer ranges + --> + <test> + <param name="platform_type" value="illumina" /> + <param name="filters" value="" /> + <param name="assembly" value="false" /> + <param name="input_fastq_r1" value="illuminaPE_r1.fq" ftype="fastqsanger" /> + <param name="input_fastq_r2" value="illuminaPE_r2.fq" ftype="fastqsanger" /> + <param name="output_bad_primer_read_ids" value="true" /> + <expand macro="output_illumina_microsat_summary" /> + <output name="output_pal_summary" compare="re_match" file="illuminaPE_microsats.out.re_match" /> + <output name="output_bad_primer_read_ids" file="illuminaPE_bad_primer_ids.out" /> + </test> <!-- Test with 454 input --> <test> <param name="platform_type" value="454" /> @@ -280,6 +300,29 @@ ------------- +.. class:: warning + +**Known problems** + +.. class:: infomark + +**Bad primer product size ranges** + +For some datasets pal_finder may generate 'bad' product size ranges (where the +lower limit exceeds the upper limit) for one or more reads, for input into +primer3_core. + +If this occurs then the tool will terminate with an error. A list of the reads +for which the bad ranges were generated can be found in the error message +which can be accessed via the 'bug' icon from a failed dataset. + +The conditions which cause this error are unclear. However we believe it to be +associated with short or low quality reads. It is recommended that the input +data are sufficiently trimmed and filtered (using e.g. the Trimmomatic tool) +before rerunning pal_finder. + +------------- + .. class:: infomark **Credits**
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pal_finder_wrapper_utils.sh Thu Mar 22 07:21:26 2018 -0400 @@ -0,0 +1,71 @@ +#!/bin/bash +# +# Helper functions for the pal_finder_wrapper.sh script +# +# Utility function for terminating on fatal error +function fatal() { + echo "FATAL $@" >&2 + exit 1 +} +# +# Check that specified program is available +function have_program() { + local program=$1 + local got_program=$(which $program 2>&1 | grep "no $(basename $program) in") + if [ -z "$got_program" ] ; then + echo yes + else + echo no + fi +} +# +# Set the value for a parameter in the pal_finder config file +function set_config_value() { + local key=$1 + local value=$2 + local config_txt=$3 + if [ -z "$value" ] ; then + echo "No value for $key, left as default" + else + echo Setting "$key" to "$value" + sed -i 's,^'"$key"' .*,'"$key"' '"$value"',' $config_txt + fi +} +# +# Identify 'bad' PRIMER_PRODUCT_SIZE_RANGE from pr3in.txt file +function find_bad_primer_ranges() { + # Parses a pr3in.txt file from pal_finder and reports + # sequence ids where the PRIMER_PRODUCT_SIZE_RANGE has + # upper limit which is smaller than lower limit + local pr3in=$1 + local pattern="^(SEQUENCE_ID|PRIMER_PRODUCT_SIZE_RANGE)" + for line in $(grep -E "$pattern" $pr3in | sed 's/ /^/' | sed 'N;s/\n/*/') + do + # Loop over pairs of SEQUENCE_ID and PRIMER_PRODUCT_SIZE_RANGE + # keywords in the primer3 input + if [ ! -z "$(echo $line | grep ^SEQUENCE_ID)" ] ; then + # Extract the values + local size_range=$(echo $line | cut -d'*' -f2 | cut -d'=' -f2 | tr '^' ' ') + local seq_id=$(echo $line | cut -d'*' -f1 | cut -d'=' -f2) + else + local size_range=$(echo $line | cut -d'*' -f1 | cut -d'=' -f2) + local seq_id=$(echo $line | cut -d'*' -f2 | cut -d'=' -f2) + fi + seq_id=$(echo $seq_id | cut -d')' -f3) + # Check the upper and lower limits in each range + # to see if it's okay + local bad_range= + for range in $(echo $size_range) ; do + local lower=$(echo $range | cut -d'-' -f1) + local upper=$(echo $range | cut -d'-' -f2) + if [ $lower -gt $upper ] ; then + bad_range=yes + break + fi + done + # Report if the range is wrong + if [ ! -z "$bad_range" ] ; then + echo "$seq_id ($size_range)" + fi + done +}