view baralign.sh @ 18:e4d75f9efb90 draft

planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
author nick
date Thu, 02 Feb 2017 18:44:31 -0500
parents
children
line wrap: on
line source

#!/usr/bin/env bash
if [ x$BASH = x ] || [ ! $BASH_VERSINFO ] || [ $BASH_VERSINFO -lt 4 ]; then
  echo "Error: Must use bash version 4+." >&2
  exit 1
fi
set -ue

Usage="Usage: \$ $(basename $0) [-R] families_file ref_dir out_file
families_file: The families.tsv produced by make-barcodes.awk and sorted.
ref_dir:  The directory to put the reference file (\"barcodes.fa\") and its index
          files in.
out_file: The path to put the output alignment BAM file at.
-R: Don't include reversed barcodes (alpha+beta -> beta+alpha) in the alignment target."

function main {

  # Read in arguments and check them.

  reverse=true
  while getopts ":rh" opt; do
  case "$opt" in
      r) reverse='';;
      h) echo "$USAGE"
         exit;;
    esac
  done
  # Get positional arguments.
  families=${@:$OPTIND:1}
  refdir=${@:$OPTIND+1:1}
  outfile=${@:$OPTIND+2:1}

  if ! [[ -f $families ]]; then
    fail "Error: families_file \"$families\" not found."
  fi
  if ! [[ -d $refdir ]]; then
    echo "Info: ref_dir \"$refdir\" not found. Creating.." >&2
    mkdir $refdir
  fi
  outbase=$(echo $outfile | sed -E 's/\.bam$//')
  if [[ $outbase == $outfile ]]; then
    fail "Error: out_file \"$outfile\" does not end in .bam."
  fi
  if [[ -e $outfile ]] || [[ -e $outbase.sam ]] || [[ -e $outbase.tmp.sam ]]; then
    fail "Error: out_file \"$outfile\" conflicts with existing filename(s)."
  fi

  for cmd in bowtie2 bowtie2-build samtools awk; do
    if ! which $cmd >/dev/null 2>/dev/null; then
      fail "Error: command \"$cmd\" not found."
    fi
  done

  echo "
families: $families
refdir:   $refdir
outfile:  $outfile
outbase:  $outbase"

  # Create FASTA with barcodes as "reads" for alignment.
  awk '$1 != last {
    count++
    print ">" count
    print $1
  }
  {
    last = $1
  }' $families > $refdir/barcodes.fa

  # Create "reference" to align the barcodes to.
  if [[ $reverse ]]; then
    # If we're including reversed barcodes, create a new FASTA which includes reversed barcodes
    # as well as their forward versions.
    awk '
      $1 != last {
        count++
        bar = $1
        print ">" count
        print bar
        print ">" count ":rev"
        print swap_halves(bar)
      }
      {
        last = $1
      }
      function swap_halves(str) {
        half = length(str)/2
        alpha = substr(str, 1, half)
        beta = substr(str, half+1)
        return beta alpha
      }' $families > $refdir/barcodes-ref.fa
  else
    # If we're not including reversed barcodes, the original FASTA is all we need. Just link to it.
    ln -s $refdir/barcodes.fa $refdir/barcodes-ref.fa
  fi

  # Perform alignment.
  bowtie2-build --packed $refdir/barcodes-ref.fa $refdir/barcodes-ref >/dev/null
  bowtie2 -a -x $refdir/barcodes-ref -f -U $refdir/barcodes.fa -S $outbase.sam
  samtools view -Sb $outbase.sam > $outbase.tmp.bam
  samtools sort $outbase.tmp.bam $outbase
  if [[ -s $outfile ]]; then
    samtools index $outbase.bam
    rm $outbase.sam $outbase.tmp.bam
    echo "Success. Output located in \"$outfile\"." >&2
  else
    echo "Warning: No output file \"$outfile\" found." >&2
  fi
}

function fail {
  echo "$@" >&2
  exit 1
}

main "$@"