comparison JAMM.sh @ 1:243f75d0ed6e draft default tip

Uploaded. Includes new release 1.0.7 with fixed optional controls.
author messersc
date Thu, 19 Feb 2015 05:39:45 -0500
parents d42f4d78c85e
children
comparison
equal deleted inserted replaced
0:d42f4d78c85e 1:243f75d0ed6e
1 #!/bin/bash 1 ########################################################################
2 ######################################## 2 # JAMMv1.0.7rev1 is a peak finder for joint analysis of NGS replicates.
3 #### Peak finding Pipeline for NGS Data 3 # Copyright (C) 2014-2015 Mahmoud Ibrahim
4 #### Bash script 4 #
5 ######################################## 5 # This program is free software: you can redistribute it and/or modify
6 6 # it under the terms of the GNU General Public License as published by
7 export LC_ALL=en_US.UTF-8 7 # the Free Software Foundation, either version 3 of the License, or
8 # (at your option) any later version.
9 #
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU General Public License for more details.
14 #
15 # You should have received a copy of the GNU General Public License
16 # along with this program. If not, see <http://www.gnu.org/licenses/>.
17 #
18 # Contact: mahmoud.ibrahim@mdc-berlin.de
19 ########################################################################
20
8 21
9 ##Finding out the path 22 ##Finding out the path
10 sPath="`dirname \"$0\"`" 23 sPath="`dirname \"$0\"`"
11 sPath="`( cd \"$sPath\" && pwd )`" 24 sPath="`( cd \"$sPath\" && pwd )`"
12 25
13 26
14 27
15 usage() 28 usage()
16 { 29 {
17 cat << EOF 30 cat << EOF
18 Welcome to JAMM v1.0.6rev2 (GNU GPLv3). Copyright (C) 2014 Mahmoud Ibrahim. 31 Welcome to JAMM v1.0.7rev1 (GNU GPLv3). Copyright (C) 2014-2015 Mahmoud Ibrahim.
19 32
20 This program comes with ABSOLUTELY NO WARRANTY; for details visit http://www.gnu.org/licenses/gpl.html. This is free software, and you are welcome to redistribute it under certain conditions; visit http://www.gnu.org/licenses/gpl.html for details. 33 This program comes with ABSOLUTELY NO WARRANTY; for details visit http://www.gnu.org/licenses/gpl.html. This is free software, and you are welcome to redistribute it under certain conditions; visit http://www.gnu.org/licenses/gpl.html for details.
21 34
22 OPTIONS: 35 OPTIONS:
23 -s directory containing Sample files (required) 36 -s directory containing Sample files (required)
24 -g Genome size file (required) 37 -g Genome size file (required)
25 -o Output directory (required) 38 -o Output directory (required)
26 -c directory containing input or Control files 39 -c directory containing input or Control files
40 -f Fragment length(s) (default: estimated)
41 -r Resolution, peak or region or window (default: peak)
27 -m Mode, normal or narrow (default: normal) 42 -m Mode, normal or narrow (default: normal)
28 -r Resolution, peak or region (default: peak) 43 -i clustering Initialization window selection, deterministic or stochastic (default: deterministic)
29 -f Fragment length(s) (default: estimated) 44 -b Bin Size (default: estimated)
30 -w minimum Window size (default: 2 --- Note: this means minimum_window_size = bin_size x the_value_of_-w) 45 -w minimum Window size (default: 2 --- Note: this means minimum_window_size = bin_size x the_value_of_-w)
46 -e window Enrichment cutoff, auto or any numeric value (default: 1 --- Set this to "auto" to estimate the window enrichment cutoff)
47 -d keep PCR Dupicates in single-end mode, y or n (default: n --- if -t is "paired", this option has no effect)
48 -t Type, single or paired (default: single, requires BED files. paired requires BEDPE files)
31 -p Number of processors used by R scripts (default: 1) 49 -p Number of processors used by R scripts (default: 1)
32 -b Bin Size (default: estimated) 50
33 -t Type, single or paired (default: single, requires BED files. paired requires BEDPE files)
34 -e window Enrichment cutoff, auto or any numeric value (default: 1 --- Set this to auto to estimate the appropriate window enrichment cutoff)
35 EOF 51 EOF
36 } 52 }
37 53
38 54
39 # ========================= 55 # =========================
46 resol="peak" 62 resol="peak"
47 cores="1" 63 cores="1"
48 window="2" 64 window="2"
49 type="single" 65 type="single"
50 windowe="1" 66 windowe="1"
51 67 initModel="deterministic"
68 uniq="n"
52 69
53 #Defaults -- Do not change 70 #Defaults -- Do not change
54 sdir="" 71 sdir=""
55 gsize="" 72 gsize=""
56 out="" 73 out=""
57 binsize="ns" 74 binsize="ns"
58 fraglen="ns" 75 fraglen="ns"
59 ran=$RANDOM 76 ran=$RANDOM
60 wdir=$(mktemp -d) 77 wdir=$(mktemp -d)
61 78 export LANG=C #locale defaults
62 while getopts "s:g:o:c:m:r:f:p:w:b:t:e:" OPTION 79 export LC_ALL=C #locale defaults
80
81 while getopts "s:g:o:c:m:r:f:p:w:b:t:e:i:d:" OPTION
63 do 82 do
64 case $OPTION in 83 case $OPTION in
65 s) sdir=$OPTARG 84 s) sdir=$OPTARG
66 ;; 85 ;;
67 g) gsize=$OPTARG 86 g) gsize=$OPTARG
83 b) binsize=$OPTARG 102 b) binsize=$OPTARG
84 ;; 103 ;;
85 t) type=$OPTARG 104 t) type=$OPTARG
86 ;; 105 ;;
87 e) windowe=$OPTARG 106 e) windowe=$OPTARG
107 ;;
108 i) initModel=$OPTARG
109 ;;
110 d) uniq=$OPTARG
88 ;; 111 ;;
89 ?) 112 ?)
90 usage 113 usage
91 exit 114 exit
92 ;; 115 ;;
102 if [[ -z $sdir ]] || [[ -z $gsize ]] || [[ -z $out ]] 125 if [[ -z $sdir ]] || [[ -z $gsize ]] || [[ -z $out ]]
103 then 126 then
104 usage 127 usage
105 exit 1 128 exit 1
106 fi 129 fi
130 if [[ -d "$out/peaks" ]]; then
131 printf "\n\nOutput directory $out/peaks already exists. I can't override existing results!\n\n"
132 exit 0
133 fi
134 if [ $fraglen == "ns" ]; then
135 if [[ -d "$out/xcorr" ]]; then
136 printf "\n\nOutput directory $out/xcorr already exists. I can't override existing results!\n\n"
137 exit 0
138 fi
139 fi
107 #=======================> DONE! 140 #=======================> DONE!
141
108 142
109 143
110 144
111 # ============================= 145 # =============================
112 # Step One: Initial Processing 146 # Step One: Initial Processing
113 # ============================= 147 # =============================
114 printf "\n\n========================================\nStarted JAMM Pipeline v1.0.6rev2...Hang on!\n========================================\n\n" 148 printf "\n\n============================================\nStarted JAMM Pipeline v1.0.7rev1...Hang on!\n============================================\n\n"
115 149
116 if [ ! -d "$wdir" ]; then 150 if [ ! -d "$wdir" ]; then
117 mkdir $wdir #make working directory 151 mkdir $wdir #make working directory
118 fi 152 fi
119 if [ ! -d "$out" ]; then 153 if [ ! -d "$out" ]; then
159 193
160 printf "Done!\n" 194 printf "Done!\n"
161 fi 195 fi
162 196
163 #determine average read lengths 197 #determine average read lengths
164 printf "Getting average read lengths..." 198 printf "Getting average read lengths...\n"
165 if [ ! -z $bdir ]; then 199 readL=""
166 readC=$(awk '{a=$3-$2;print a;}' "$wdir/bkgd.$ran/ctrl.bed" | perl -lane '$a+=$_;END{print $a/$.}' | awk '{a=$1+0.5;print a;}' | cut -d"." -f1) 200 if [ ! -z $bdir ]; then
167 fi 201 readC=$(awk '{a=$3-$2;print a;}' "$wdir/bkgd.$ran/ctrl.bed" | perl -lane '$a+=$_;END{print $a/$.}' | awk '{a=$1+0.5;print a;}' | cut -d"." -f1)
168 readL="" 202 printf "Control: $readC\n"
169 for s in $sdir/*.bed; do #and for each sample file 203 fi
170 read=$(awk '{a=$3-$2;print a;}' "$s" | perl -lane '$a+=$_;END{print $a/$.}' | awk '{a=$1+0.5;print a;}' | cut -d"." -f1) 204 readL=""
171 readL="$readL,$read" 205 for s in $sdir/*.bed; do #and for each sample file
172 done 206 file=$(basename $s)
173 readL=${readL#","} 207 samplefile=$(echo $file | awk -F"." '{print $1}');
174 printf "Done!\n" 208 read=$(awk '{a=$3-$2;print a;}' "$s" | perl -lane '$a+=$_;END{print $a/$.}' | awk '{a=$1+0.5;print a;}' | cut -d"." -f1)
209 printf "$samplefile: $read\n"
210 readL="$readL,$read"
211 done
212 readL=${readL#","}
175 #=======================> DONE! 213 #=======================> DONE!
176 214
177 215
178 216
179 # ============================= 217 # =============================
297 fi 335 fi
298 done 336 done
299 #remove leading comma 337 #remove leading comma
300 samplelist=${samplelist#","} 338 samplelist=${samplelist#","}
301 frag=${frag#","} 339 frag=${frag#","}
302
303 Rscript "$sPath/bincalculator.r" -ibed="$samplelist" -s="$gsize" -rl="$readL" -d="$wdir" -p="$cores" -f="$frag" -type="$type" 340 Rscript "$sPath/bincalculator.r" -ibed="$samplelist" -s="$gsize" -rl="$readL" -d="$wdir" -p="$cores" -f="$frag" -type="$type"
304 fi 341 fi
305 if [ $binsize != "ns" ]; then 342 if [ $binsize != "ns" ]; then
306 printf "You set a Bin Size: $binsize \n" 343 printf "You set a Bin Size: $binsize \n"
307 fi 344 fi
358 if [ $fraglen != "ns" ]; then 395 if [ $fraglen != "ns" ]; then
359 shift=$(echo $frag | cut -f "$k" -d ",") 396 shift=$(echo $frag | cut -f "$k" -d ",")
360 read=$(echo $readL | cut -f "$k" -d ",") 397 read=$(echo $readL | cut -f "$k" -d ",")
361 k=$(($k+1)) 398 k=$(($k+1))
362 fi 399 fi
363 perl "$sPath/readshifter.pl" "$wdir/samples.$ran/$samplefile" $shift $read > "$wdir/samples.$ran/ext.$samplefile" 400 if [ $uniq == "y" ]; then
401 perl "$sPath/readshifter.pl" "$wdir/samples.$ran/$samplefile" $shift $read > "$wdir/samples.$ran/ext.$samplefile"
402 fi
403 if [ $uniq == "n" ]; then
404 perl "$sPath/readshifter.pl" "$wdir/samples.$ran/$samplefile" $shift $read | uniq > "$wdir/samples.$ran/ext.$samplefile"
405 fi
364 fi 406 fi
365 done 407 done
366 408
367 #control file 409 #control file
368 bkgdfile="None" 410 bkgdfile="None"
373 fi 415 fi
374 if [ $fraglen != "ns" ]; then 416 if [ $fraglen != "ns" ]; then
375 l=$(($dupnum+1)) 417 l=$(($dupnum+1))
376 bshift=$(echo $frag | cut -f "$l" -d ",") 418 bshift=$(echo $frag | cut -f "$l" -d ",")
377 fi 419 fi
378 perl "$sPath/readshifter.pl" "$wdir/bkgd.$ran/bkgd.$chr.ctrl.bed" $bshift $readC > "$wdir/bkgd.$ran/ext.bkgd.$chr.ctrl.bed" 420
421 if [ $uniq == "y" ]; then
422 perl "$sPath/readshifter.pl" "$wdir/bkgd.$ran/bkgd.$chr.ctrl.bed" $bshift $readC > "$wdir/bkgd.$ran/ext.bkgd.$chr.ctrl.bed"
423 fi
424 if [ $uniq == "n" ]; then
425 perl "$sPath/readshifter.pl" "$wdir/bkgd.$ran/bkgd.$chr.ctrl.bed" $bshift $readC | uniq > "$wdir/bkgd.$ran/ext.bkgd.$chr.ctrl.bed"
426 fi
379 bkgdfile="$wdir/bkgd.$ran/ext.bkgd.$chr.ctrl.bed" 427 bkgdfile="$wdir/bkgd.$ran/ext.bkgd.$chr.ctrl.bed"
380 fi 428 fi
381 429
382 #remove leading comma 430 #remove leading comma
383 samplelist=${samplelist#","} 431 samplelist=${samplelist#","}
384 frag=${frag#","} 432 frag=${frag#","}
385 433
386 #call the peak calling R script 434 #call the peak calling R script
387 Rscript "$sPath/peakfinder.r" -sfile="$f" -bednames="$samplelist" -frag="$frag" -bkgd=$bkgdfile -out="$wdir/peaks.$ran/" -clustnummer="$clustno" -resolution="$resol" -window="$window" -p="$cores" -bin="$binsize" -type="$type" -chrcount="$counting" -windowe="$windowe" 435 Rscript "$sPath/peakfinder.r" -sfile="$f" -bednames="$samplelist" -frag="$frag" -bkgd=$bkgdfile -out="$wdir/peaks.$ran/" -clustnummer="$clustno" -resolution="$resol" -window="$window" -p="$cores" -bin="$binsize" -type="$type" -chrcount="$counting" -initModel="$initModel" -windowe="$windowe"
388 counting=$(($counting+1)); 436 counting=$(($counting+1));
389 cp "$wdir/peaks.$ran/$chr.peaks.bed" "$out/peaks/$chr.peaks.bed" 437 cp "$wdir/peaks.$ran/$chr.peaks.bed" "$out/peaks/$chr.peaks.bed"
390 rm "$wdir/peaks.$ran/$chr.peaks.bed" 438 rm "$wdir/peaks.$ran/$chr.peaks.bed"
391 done 439 done
392 counting=1; 440 counting=1;
432 #remove leading comma 480 #remove leading comma
433 samplelist=${samplelist#","} 481 samplelist=${samplelist#","}
434 frag=${frag#","} 482 frag=${frag#","}
435 483
436 #call the peak calling R script 484 #call the peak calling R script
437 Rscript "$sPath/peakfinder.r" -sfile=$f -bednames=$samplelist -frag="NA" -bkgd=$bkgdfile -out="$wdir/peaks.$ran/" -clustnummer="$clustno" -resolution="$resol" -window="$window" -p="$cores" -bin="$binsize" -type="$type" -chrcount="$counting" -windowe="$windowe" 485 Rscript "$sPath/peakfinder.r" -sfile=$f -bednames=$samplelist -frag="NA" -bkgd=$bkgdfile -out="$wdir/peaks.$ran/" -clustnummer="$clustno" -resolution="$resol" -window="$window" -p="$cores" -bin="$binsize" -type="$type" -chrcount="$counting" -initModel="$initModel" -windowe="$windowe"
438 counting=$(($counting+1)); 486 counting=$(($counting+1));
439 cp "$wdir/peaks.$ran/$chr.peaks.bed" "$out/peaks/$chr.peaks.bed" 487 cp "$wdir/peaks.$ran/$chr.peaks.bed" "$out/peaks/$chr.peaks.bed"
440 rm "$wdir/peaks.$ran/$chr.peaks.bed" 488 rm "$wdir/peaks.$ran/$chr.peaks.bed"
441 done 489 done
442 counting=1; 490 counting=1;
445 493
446 cp $wdir/peaks.$ran/min.peaksize $out/peaks/min.peaksize 494 cp $wdir/peaks.$ran/min.peaksize $out/peaks/min.peaksize
447 495
448 496
449 #concatenate, sort and filter 497 #concatenate, sort and filter
450 cat $out/peaks/*.bed > $out/peaks/all.narrowPeak 498 cat $out/peaks/*.bed > $out/peaks/all.narrowPeak
451 Rscript "$sPath/peakhelper.r" -filelist="$out/peaks/all.narrowPeak" 499 if [[ -s $out/peaks/all.narrowPeak ]]; then
452 perl "$sPath/peakfilter.pl" $out/peaks/all.narrowPeak | sort -nr -k7 > $out/peaks/filtered.peaks.narrowPeak 500 Rscript "$sPath/peakhelper.r" -filelist="$out/peaks/all.narrowPeak"
453 cut -f1-10 $out/peaks/all.narrowPeak | awk -F"\t" -v j=0 '$7 > j' | sort -nr -k7 > $out/peaks/all.peaks.narrowPeak 501 perl "$sPath/peakfilter.pl" $out/peaks/all.narrowPeak | sort -nr -k7 > $out/peaks/filtered.peaks.narrowPeak
502 cut -f1-10 $out/peaks/all.narrowPeak | awk -F"\t" -v j=0 '$7 > j' | sort -nr -k7 > $out/peaks/all.peaks.narrowPeak
503 fi
454 rm $out/peaks/all.narrowPeak 504 rm $out/peaks/all.narrowPeak
455 rm $out/peaks/*.bed 505 rm $out/peaks/*.bed
456 rm $out/peaks/min.peaksize 506 rm $out/peaks/min.peaksize
457 #=======================> DONE! 507 #=======================> DONE!
458 508