Mercurial > repos > nick > duplex
comparison baralign.sh @ 18:e4d75f9efb90 draft
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
| author | nick |
|---|---|
| date | Thu, 02 Feb 2017 18:44:31 -0500 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 17:836fa4fe9494 | 18:e4d75f9efb90 |
|---|---|
| 1 #!/usr/bin/env bash | |
| 2 if [ x$BASH = x ] || [ ! $BASH_VERSINFO ] || [ $BASH_VERSINFO -lt 4 ]; then | |
| 3 echo "Error: Must use bash version 4+." >&2 | |
| 4 exit 1 | |
| 5 fi | |
| 6 set -ue | |
| 7 | |
| 8 Usage="Usage: \$ $(basename $0) [-R] families_file ref_dir out_file | |
| 9 families_file: The families.tsv produced by make-barcodes.awk and sorted. | |
| 10 ref_dir: The directory to put the reference file (\"barcodes.fa\") and its index | |
| 11 files in. | |
| 12 out_file: The path to put the output alignment BAM file at. | |
| 13 -R: Don't include reversed barcodes (alpha+beta -> beta+alpha) in the alignment target." | |
| 14 | |
| 15 function main { | |
| 16 | |
| 17 # Read in arguments and check them. | |
| 18 | |
| 19 reverse=true | |
| 20 while getopts ":rh" opt; do | |
| 21 case "$opt" in | |
| 22 r) reverse='';; | |
| 23 h) echo "$USAGE" | |
| 24 exit;; | |
| 25 esac | |
| 26 done | |
| 27 # Get positional arguments. | |
| 28 families=${@:$OPTIND:1} | |
| 29 refdir=${@:$OPTIND+1:1} | |
| 30 outfile=${@:$OPTIND+2:1} | |
| 31 | |
| 32 if ! [[ -f $families ]]; then | |
| 33 fail "Error: families_file \"$families\" not found." | |
| 34 fi | |
| 35 if ! [[ -d $refdir ]]; then | |
| 36 echo "Info: ref_dir \"$refdir\" not found. Creating.." >&2 | |
| 37 mkdir $refdir | |
| 38 fi | |
| 39 outbase=$(echo $outfile | sed -E 's/\.bam$//') | |
| 40 if [[ $outbase == $outfile ]]; then | |
| 41 fail "Error: out_file \"$outfile\" does not end in .bam." | |
| 42 fi | |
| 43 if [[ -e $outfile ]] || [[ -e $outbase.sam ]] || [[ -e $outbase.tmp.sam ]]; then | |
| 44 fail "Error: out_file \"$outfile\" conflicts with existing filename(s)." | |
| 45 fi | |
| 46 | |
| 47 for cmd in bowtie2 bowtie2-build samtools awk; do | |
| 48 if ! which $cmd >/dev/null 2>/dev/null; then | |
| 49 fail "Error: command \"$cmd\" not found." | |
| 50 fi | |
| 51 done | |
| 52 | |
| 53 echo " | |
| 54 families: $families | |
| 55 refdir: $refdir | |
| 56 outfile: $outfile | |
| 57 outbase: $outbase" | |
| 58 | |
| 59 # Create FASTA with barcodes as "reads" for alignment. | |
| 60 awk '$1 != last { | |
| 61 count++ | |
| 62 print ">" count | |
| 63 print $1 | |
| 64 } | |
| 65 { | |
| 66 last = $1 | |
| 67 }' $families > $refdir/barcodes.fa | |
| 68 | |
| 69 # Create "reference" to align the barcodes to. | |
| 70 if [[ $reverse ]]; then | |
| 71 # If we're including reversed barcodes, create a new FASTA which includes reversed barcodes | |
| 72 # as well as their forward versions. | |
| 73 awk ' | |
| 74 $1 != last { | |
| 75 count++ | |
| 76 bar = $1 | |
| 77 print ">" count | |
| 78 print bar | |
| 79 print ">" count ":rev" | |
| 80 print swap_halves(bar) | |
| 81 } | |
| 82 { | |
| 83 last = $1 | |
| 84 } | |
| 85 function swap_halves(str) { | |
| 86 half = length(str)/2 | |
| 87 alpha = substr(str, 1, half) | |
| 88 beta = substr(str, half+1) | |
| 89 return beta alpha | |
| 90 }' $families > $refdir/barcodes-ref.fa | |
| 91 else | |
| 92 # If we're not including reversed barcodes, the original FASTA is all we need. Just link to it. | |
| 93 ln -s $refdir/barcodes.fa $refdir/barcodes-ref.fa | |
| 94 fi | |
| 95 | |
| 96 # Perform alignment. | |
| 97 bowtie2-build --packed $refdir/barcodes-ref.fa $refdir/barcodes-ref >/dev/null | |
| 98 bowtie2 -a -x $refdir/barcodes-ref -f -U $refdir/barcodes.fa -S $outbase.sam | |
| 99 samtools view -Sb $outbase.sam > $outbase.tmp.bam | |
| 100 samtools sort $outbase.tmp.bam $outbase | |
| 101 if [[ -s $outfile ]]; then | |
| 102 samtools index $outbase.bam | |
| 103 rm $outbase.sam $outbase.tmp.bam | |
| 104 echo "Success. Output located in \"$outfile\"." >&2 | |
| 105 else | |
| 106 echo "Warning: No output file \"$outfile\" found." >&2 | |
| 107 fi | |
| 108 } | |
| 109 | |
| 110 function fail { | |
| 111 echo "$@" >&2 | |
| 112 exit 1 | |
| 113 } | |
| 114 | |
| 115 main "$@" |
