diff beast.sh @ 0:d67268158946 draft

planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
author bcclaywell
date Mon, 12 Oct 2015 17:43:33 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/beast.sh	Mon Oct 12 17:43:33 2015 -0400
@@ -0,0 +1,159 @@
+#!/bin/bash
+
+source $(dirname $0)/util.sh
+source $1
+
+
+# COMPUTE METADATA IF NEEDED!
+# ===========================
+
+if [[ $METADATA_SPECIFICATION == "regex" ]]
+then
+  # First make sure we actually have an alignment to operate on
+  if [[ ${ALIGNMENT} == "" ]]
+  then
+    echo "Must have alignment in order to specify metadata via regular expression" > /dev/stderr
+    exit 1
+  fi
+  DEME_COLUMN="deme"
+  # If we have a regex, will need to create the specification file to pass along
+  if [[ $DATE_REGEX != "" ]]
+  then
+    DATE_REGEX_FLAG="-t $DATE_REGEX"
+    # Need to let things know downstream that there is a date column in the constructed metadata
+    DATE_COLUMN="date"
+  else
+    DATE_REGEX_FLAG=""
+    DATE_COLUMN=""
+  fi
+  metadata_from_seqnames.py -d "$DEME_REGEX" ${DATE_REGEX_FLAG} \
+    ${ALIGNMENT} tmp_deme_spec.csv
+  METADATA_FILE=tmp_deme_spec.csv
+fi
+
+
+
+# HANDLE DOWNSAMPLING
+# ===================
+
+if [[ $DOWNSAMPLING_METHOD != "none" ]]
+then
+  if [[ $METADATA_FILE == "" || $ALIGNMENT == "" ]]
+  then
+    echo "Must specify deme and alignment data in order to downsample" > /dev/stdout
+    exit 1
+  fi
+  if [[ $DOWNSAMPLING_RANDOM_SEED == "" ]]
+  then
+    DOWNSAMPLING_RANDOM_SEED=$RANDOM
+    echo "You did not specify a random seed for this run, so this one is being used, in case you'd like to
+      reproduce your results: $DOWNSAMPLING_RANDOM_SEED"
+  else
+    DS_RANDOM_SEED_FLAG="-s $DOWNSAMPLING_RANDOM_SEED"
+  fi
+  # This script does the downsampling
+  deme_downsample.py $DS_RANDOM_SEED_FLAG -m $DOWNSAMPLING_METHOD -k $DOWNSAMPLING_K -c $DEME_COLUMN \
+    $ALIGNMENT $METADATA_FILE \
+    $DOWNSAMPLED_ALIGNMENT downsampled_metadata.csv
+  # Assign these to the unsampled variable names so the code below follows the same flow regardless
+  ALIGNMENT=$DOWNSAMPLED_ALIGNMENT
+  METADATA_FILE=downsampled_metadata.csv
+  # If we are downsampling and specified a metadata file, make sure to return a downsampled_metadata file
+  if [[ $METADATA_SPECIFICATION == "file" ]]
+  then
+    cp downsampled_metadata.csv $DOWNSAMPLED_METADATA
+  fi
+fi
+
+
+
+# CONSTRUCT BEASTFILE AND RUN BEAST
+# =================================
+
+# Make a metadata flag we can pass into the format command
+if [[ $METADATA_FILE != "" ]]
+then
+  # Then use either the file given to us, or the one we constructed from regexprs
+  META_FLAG="-m $METADATA_FILE"
+else
+  META_FLAG=""
+fi
+
+# Make a metadata flag we can pass into the format command
+if [[ $ALIGNMENT != "" ]]
+then
+  # Then use either the file given to us, or the one we constructed from regexprs
+  ALIGNMENT_FLAG="-a $ALIGNMENT"
+else
+  ALIGNMENT_FLAG=""
+fi
+
+# Set the default BEASTfile
+if [ $BEASTFILE_SPECIFICATION == "default" ]
+then
+  # ARGO_TOOL_DIR gets defined in utils; magick sauce...
+  BEASTFILE_TEMPLATE=$ARGO_TOOL_DIR/default_beastfile_template.xml
+fi
+
+# Fork on a bunch of things based on whether this is a resume run or not
+if [ $RESUME_SELECTOR == "true" ]
+then
+  # There should be a specified beastfile if we're resuming, otherwise raise
+  if [ $BEASTFILE_SPECIFICATION == "default" ]
+  then
+    echo "Must specify the beastfile output by last run if doing a resume run" > /dev/stdout
+    exit 1
+  fi
+  # Take care of getting things set up for proper formatting and resuming
+  FORMAT_ARGS="$SAMPLES_FLAG" # We don't want to accept any beastfile modifications except samples
+  RESUME_FLAG="-resume"
+  cp $RESUME_LOGFILE posterior.log
+  cp $RESUME_TREEFILE posterior.trait.trees
+  # We assure this state location works by renaming the beastfile to beastfile.xml later
+  cp $RESUME_STATEFILE beastfile.xml.state
+else
+  # Otherwise, set the full flag collection
+  RESUME_FLAG=""
+  FORMAT_ARGS="$ALIGNMENT_FLAG $META_FLAG $SAMPLES_FLAG $SAMPLING_INTERVAL_FLAG"
+  # Add deme information only if we know what column it is (which will only be if we have metadata or regexs)
+  if [[ $DATE_COLUMN != "" ]]
+  then
+    FORMAT_ARGS="$FORMAT_ARGS -d $DEME_COLUMN"
+  fi
+  # Add the date information only if a date column is specified
+  if [[ $DATE_COLUMN != "" ]]
+  then
+    FORMAT_ARGS="$FORMAT_ARGS -D $DATE_COLUMN"
+  fi
+fi
+
+# Format our beastfile
+format_beastfile.py $BEASTFILE_TEMPLATE $FORMAT_ARGS $FORMATTED_BEASTFILE
+
+# Actually run BEAST and set the output vars to their locations
+cp $FORMATTED_BEASTFILE beastfile.xml
+
+# BEAST should be in our path from the argo_env requirement
+beast $RANDOM_SEED_FLAG $RESUME_FLAG beastfile.xml
+
+# Copy files over to the locations Galaxy has specified for them
+cp posterior.log $LOGFILE
+cp posterior.trait.trees $TREEFILE
+cp beastfile.xml.state $STATEFILE
+
+
+# LOGFILE TRIMMING FOR RESUME RUNS
+# ================================
+
+if [ $RESUME_SELECTOR == "true" ]
+then
+  posterior-subset -t logfile -c $RESUME_SAMPLES $LOGFILE $TRIMMED_LOGFILE
+  posterior-subset -t treefile -c $RESUME_SAMPLES $TREEFILE $TRIMMED_TREEFILE
+  ess.py --html-out $TRIMMED_LOGFILE $ESS
+else
+  ess.py --html-out $LOGFILE $ESS
+fi
+
+
+
+