comparison baralign.sh @ 18:e4d75f9efb90 draft

planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
author nick
date Thu, 02 Feb 2017 18:44:31 -0500
parents
children
comparison
equal deleted inserted replaced
17:836fa4fe9494 18:e4d75f9efb90
1 #!/usr/bin/env bash
2 if [ x$BASH = x ] || [ ! $BASH_VERSINFO ] || [ $BASH_VERSINFO -lt 4 ]; then
3 echo "Error: Must use bash version 4+." >&2
4 exit 1
5 fi
6 set -ue
7
8 Usage="Usage: \$ $(basename $0) [-R] families_file ref_dir out_file
9 families_file: The families.tsv produced by make-barcodes.awk and sorted.
10 ref_dir: The directory to put the reference file (\"barcodes.fa\") and its index
11 files in.
12 out_file: The path to put the output alignment BAM file at.
13 -R: Don't include reversed barcodes (alpha+beta -> beta+alpha) in the alignment target."
14
15 function main {
16
17 # Read in arguments and check them.
18
19 reverse=true
20 while getopts ":rh" opt; do
21 case "$opt" in
22 r) reverse='';;
23 h) echo "$USAGE"
24 exit;;
25 esac
26 done
27 # Get positional arguments.
28 families=${@:$OPTIND:1}
29 refdir=${@:$OPTIND+1:1}
30 outfile=${@:$OPTIND+2:1}
31
32 if ! [[ -f $families ]]; then
33 fail "Error: families_file \"$families\" not found."
34 fi
35 if ! [[ -d $refdir ]]; then
36 echo "Info: ref_dir \"$refdir\" not found. Creating.." >&2
37 mkdir $refdir
38 fi
39 outbase=$(echo $outfile | sed -E 's/\.bam$//')
40 if [[ $outbase == $outfile ]]; then
41 fail "Error: out_file \"$outfile\" does not end in .bam."
42 fi
43 if [[ -e $outfile ]] || [[ -e $outbase.sam ]] || [[ -e $outbase.tmp.sam ]]; then
44 fail "Error: out_file \"$outfile\" conflicts with existing filename(s)."
45 fi
46
47 for cmd in bowtie2 bowtie2-build samtools awk; do
48 if ! which $cmd >/dev/null 2>/dev/null; then
49 fail "Error: command \"$cmd\" not found."
50 fi
51 done
52
53 echo "
54 families: $families
55 refdir: $refdir
56 outfile: $outfile
57 outbase: $outbase"
58
59 # Create FASTA with barcodes as "reads" for alignment.
60 awk '$1 != last {
61 count++
62 print ">" count
63 print $1
64 }
65 {
66 last = $1
67 }' $families > $refdir/barcodes.fa
68
69 # Create "reference" to align the barcodes to.
70 if [[ $reverse ]]; then
71 # If we're including reversed barcodes, create a new FASTA which includes reversed barcodes
72 # as well as their forward versions.
73 awk '
74 $1 != last {
75 count++
76 bar = $1
77 print ">" count
78 print bar
79 print ">" count ":rev"
80 print swap_halves(bar)
81 }
82 {
83 last = $1
84 }
85 function swap_halves(str) {
86 half = length(str)/2
87 alpha = substr(str, 1, half)
88 beta = substr(str, half+1)
89 return beta alpha
90 }' $families > $refdir/barcodes-ref.fa
91 else
92 # If we're not including reversed barcodes, the original FASTA is all we need. Just link to it.
93 ln -s $refdir/barcodes.fa $refdir/barcodes-ref.fa
94 fi
95
96 # Perform alignment.
97 bowtie2-build --packed $refdir/barcodes-ref.fa $refdir/barcodes-ref >/dev/null
98 bowtie2 -a -x $refdir/barcodes-ref -f -U $refdir/barcodes.fa -S $outbase.sam
99 samtools view -Sb $outbase.sam > $outbase.tmp.bam
100 samtools sort $outbase.tmp.bam $outbase
101 if [[ -s $outfile ]]; then
102 samtools index $outbase.bam
103 rm $outbase.sam $outbase.tmp.bam
104 echo "Success. Output located in \"$outfile\"." >&2
105 else
106 echo "Warning: No output file \"$outfile\" found." >&2
107 fi
108 }
109
110 function fail {
111 echo "$@" >&2
112 exit 1
113 }
114
115 main "$@"