Mercurial > repos > nick > duplex
diff baralign.sh @ 18:e4d75f9efb90 draft
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
| author | nick |
|---|---|
| date | Thu, 02 Feb 2017 18:44:31 -0500 |
| parents | |
| children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/baralign.sh Thu Feb 02 18:44:31 2017 -0500 @@ -0,0 +1,115 @@ +#!/usr/bin/env bash +if [ x$BASH = x ] || [ ! $BASH_VERSINFO ] || [ $BASH_VERSINFO -lt 4 ]; then + echo "Error: Must use bash version 4+." >&2 + exit 1 +fi +set -ue + +Usage="Usage: \$ $(basename $0) [-R] families_file ref_dir out_file +families_file: The families.tsv produced by make-barcodes.awk and sorted. +ref_dir: The directory to put the reference file (\"barcodes.fa\") and its index + files in. +out_file: The path to put the output alignment BAM file at. +-R: Don't include reversed barcodes (alpha+beta -> beta+alpha) in the alignment target." + +function main { + + # Read in arguments and check them. + + reverse=true + while getopts ":rh" opt; do + case "$opt" in + r) reverse='';; + h) echo "$USAGE" + exit;; + esac + done + # Get positional arguments. + families=${@:$OPTIND:1} + refdir=${@:$OPTIND+1:1} + outfile=${@:$OPTIND+2:1} + + if ! [[ -f $families ]]; then + fail "Error: families_file \"$families\" not found." + fi + if ! [[ -d $refdir ]]; then + echo "Info: ref_dir \"$refdir\" not found. Creating.." >&2 + mkdir $refdir + fi + outbase=$(echo $outfile | sed -E 's/\.bam$//') + if [[ $outbase == $outfile ]]; then + fail "Error: out_file \"$outfile\" does not end in .bam." + fi + if [[ -e $outfile ]] || [[ -e $outbase.sam ]] || [[ -e $outbase.tmp.sam ]]; then + fail "Error: out_file \"$outfile\" conflicts with existing filename(s)." + fi + + for cmd in bowtie2 bowtie2-build samtools awk; do + if ! which $cmd >/dev/null 2>/dev/null; then + fail "Error: command \"$cmd\" not found." + fi + done + + echo " +families: $families +refdir: $refdir +outfile: $outfile +outbase: $outbase" + + # Create FASTA with barcodes as "reads" for alignment. + awk '$1 != last { + count++ + print ">" count + print $1 + } + { + last = $1 + }' $families > $refdir/barcodes.fa + + # Create "reference" to align the barcodes to. + if [[ $reverse ]]; then + # If we're including reversed barcodes, create a new FASTA which includes reversed barcodes + # as well as their forward versions. + awk ' + $1 != last { + count++ + bar = $1 + print ">" count + print bar + print ">" count ":rev" + print swap_halves(bar) + } + { + last = $1 + } + function swap_halves(str) { + half = length(str)/2 + alpha = substr(str, 1, half) + beta = substr(str, half+1) + return beta alpha + }' $families > $refdir/barcodes-ref.fa + else + # If we're not including reversed barcodes, the original FASTA is all we need. Just link to it. + ln -s $refdir/barcodes.fa $refdir/barcodes-ref.fa + fi + + # Perform alignment. + bowtie2-build --packed $refdir/barcodes-ref.fa $refdir/barcodes-ref >/dev/null + bowtie2 -a -x $refdir/barcodes-ref -f -U $refdir/barcodes.fa -S $outbase.sam + samtools view -Sb $outbase.sam > $outbase.tmp.bam + samtools sort $outbase.tmp.bam $outbase + if [[ -s $outfile ]]; then + samtools index $outbase.bam + rm $outbase.sam $outbase.tmp.bam + echo "Success. Output located in \"$outfile\"." >&2 + else + echo "Warning: No output file \"$outfile\" found." >&2 + fi +} + +function fail { + echo "$@" >&2 + exit 1 +} + +main "$@"
