Mercurial > repos > messersc > jamm
comparison JAMM.sh @ 1:243f75d0ed6e draft default tip
Uploaded.
Includes new release 1.0.7 with fixed optional controls.
author | messersc |
---|---|
date | Thu, 19 Feb 2015 05:39:45 -0500 |
parents | d42f4d78c85e |
children |
comparison
equal
deleted
inserted
replaced
0:d42f4d78c85e | 1:243f75d0ed6e |
---|---|
1 #!/bin/bash | 1 ######################################################################## |
2 ######################################## | 2 # JAMMv1.0.7rev1 is a peak finder for joint analysis of NGS replicates. |
3 #### Peak finding Pipeline for NGS Data | 3 # Copyright (C) 2014-2015 Mahmoud Ibrahim |
4 #### Bash script | 4 # |
5 ######################################## | 5 # This program is free software: you can redistribute it and/or modify |
6 | 6 # it under the terms of the GNU General Public License as published by |
7 export LC_ALL=en_US.UTF-8 | 7 # the Free Software Foundation, either version 3 of the License, or |
8 # (at your option) any later version. | |
9 # | |
10 # This program is distributed in the hope that it will be useful, | |
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
13 # GNU General Public License for more details. | |
14 # | |
15 # You should have received a copy of the GNU General Public License | |
16 # along with this program. If not, see <http://www.gnu.org/licenses/>. | |
17 # | |
18 # Contact: mahmoud.ibrahim@mdc-berlin.de | |
19 ######################################################################## | |
20 | |
8 | 21 |
9 ##Finding out the path | 22 ##Finding out the path |
10 sPath="`dirname \"$0\"`" | 23 sPath="`dirname \"$0\"`" |
11 sPath="`( cd \"$sPath\" && pwd )`" | 24 sPath="`( cd \"$sPath\" && pwd )`" |
12 | 25 |
13 | 26 |
14 | 27 |
15 usage() | 28 usage() |
16 { | 29 { |
17 cat << EOF | 30 cat << EOF |
18 Welcome to JAMM v1.0.6rev2 (GNU GPLv3). Copyright (C) 2014 Mahmoud Ibrahim. | 31 Welcome to JAMM v1.0.7rev1 (GNU GPLv3). Copyright (C) 2014-2015 Mahmoud Ibrahim. |
19 | 32 |
20 This program comes with ABSOLUTELY NO WARRANTY; for details visit http://www.gnu.org/licenses/gpl.html. This is free software, and you are welcome to redistribute it under certain conditions; visit http://www.gnu.org/licenses/gpl.html for details. | 33 This program comes with ABSOLUTELY NO WARRANTY; for details visit http://www.gnu.org/licenses/gpl.html. This is free software, and you are welcome to redistribute it under certain conditions; visit http://www.gnu.org/licenses/gpl.html for details. |
21 | 34 |
22 OPTIONS: | 35 OPTIONS: |
23 -s directory containing Sample files (required) | 36 -s directory containing Sample files (required) |
24 -g Genome size file (required) | 37 -g Genome size file (required) |
25 -o Output directory (required) | 38 -o Output directory (required) |
26 -c directory containing input or Control files | 39 -c directory containing input or Control files |
40 -f Fragment length(s) (default: estimated) | |
41 -r Resolution, peak or region or window (default: peak) | |
27 -m Mode, normal or narrow (default: normal) | 42 -m Mode, normal or narrow (default: normal) |
28 -r Resolution, peak or region (default: peak) | 43 -i clustering Initialization window selection, deterministic or stochastic (default: deterministic) |
29 -f Fragment length(s) (default: estimated) | 44 -b Bin Size (default: estimated) |
30 -w minimum Window size (default: 2 --- Note: this means minimum_window_size = bin_size x the_value_of_-w) | 45 -w minimum Window size (default: 2 --- Note: this means minimum_window_size = bin_size x the_value_of_-w) |
46 -e window Enrichment cutoff, auto or any numeric value (default: 1 --- Set this to "auto" to estimate the window enrichment cutoff) | |
47 -d keep PCR Dupicates in single-end mode, y or n (default: n --- if -t is "paired", this option has no effect) | |
48 -t Type, single or paired (default: single, requires BED files. paired requires BEDPE files) | |
31 -p Number of processors used by R scripts (default: 1) | 49 -p Number of processors used by R scripts (default: 1) |
32 -b Bin Size (default: estimated) | 50 |
33 -t Type, single or paired (default: single, requires BED files. paired requires BEDPE files) | |
34 -e window Enrichment cutoff, auto or any numeric value (default: 1 --- Set this to auto to estimate the appropriate window enrichment cutoff) | |
35 EOF | 51 EOF |
36 } | 52 } |
37 | 53 |
38 | 54 |
39 # ========================= | 55 # ========================= |
46 resol="peak" | 62 resol="peak" |
47 cores="1" | 63 cores="1" |
48 window="2" | 64 window="2" |
49 type="single" | 65 type="single" |
50 windowe="1" | 66 windowe="1" |
51 | 67 initModel="deterministic" |
68 uniq="n" | |
52 | 69 |
53 #Defaults -- Do not change | 70 #Defaults -- Do not change |
54 sdir="" | 71 sdir="" |
55 gsize="" | 72 gsize="" |
56 out="" | 73 out="" |
57 binsize="ns" | 74 binsize="ns" |
58 fraglen="ns" | 75 fraglen="ns" |
59 ran=$RANDOM | 76 ran=$RANDOM |
60 wdir=$(mktemp -d) | 77 wdir=$(mktemp -d) |
61 | 78 export LANG=C #locale defaults |
62 while getopts "s:g:o:c:m:r:f:p:w:b:t:e:" OPTION | 79 export LC_ALL=C #locale defaults |
80 | |
81 while getopts "s:g:o:c:m:r:f:p:w:b:t:e:i:d:" OPTION | |
63 do | 82 do |
64 case $OPTION in | 83 case $OPTION in |
65 s) sdir=$OPTARG | 84 s) sdir=$OPTARG |
66 ;; | 85 ;; |
67 g) gsize=$OPTARG | 86 g) gsize=$OPTARG |
83 b) binsize=$OPTARG | 102 b) binsize=$OPTARG |
84 ;; | 103 ;; |
85 t) type=$OPTARG | 104 t) type=$OPTARG |
86 ;; | 105 ;; |
87 e) windowe=$OPTARG | 106 e) windowe=$OPTARG |
107 ;; | |
108 i) initModel=$OPTARG | |
109 ;; | |
110 d) uniq=$OPTARG | |
88 ;; | 111 ;; |
89 ?) | 112 ?) |
90 usage | 113 usage |
91 exit | 114 exit |
92 ;; | 115 ;; |
102 if [[ -z $sdir ]] || [[ -z $gsize ]] || [[ -z $out ]] | 125 if [[ -z $sdir ]] || [[ -z $gsize ]] || [[ -z $out ]] |
103 then | 126 then |
104 usage | 127 usage |
105 exit 1 | 128 exit 1 |
106 fi | 129 fi |
130 if [[ -d "$out/peaks" ]]; then | |
131 printf "\n\nOutput directory $out/peaks already exists. I can't override existing results!\n\n" | |
132 exit 0 | |
133 fi | |
134 if [ $fraglen == "ns" ]; then | |
135 if [[ -d "$out/xcorr" ]]; then | |
136 printf "\n\nOutput directory $out/xcorr already exists. I can't override existing results!\n\n" | |
137 exit 0 | |
138 fi | |
139 fi | |
107 #=======================> DONE! | 140 #=======================> DONE! |
141 | |
108 | 142 |
109 | 143 |
110 | 144 |
111 # ============================= | 145 # ============================= |
112 # Step One: Initial Processing | 146 # Step One: Initial Processing |
113 # ============================= | 147 # ============================= |
114 printf "\n\n========================================\nStarted JAMM Pipeline v1.0.6rev2...Hang on!\n========================================\n\n" | 148 printf "\n\n============================================\nStarted JAMM Pipeline v1.0.7rev1...Hang on!\n============================================\n\n" |
115 | 149 |
116 if [ ! -d "$wdir" ]; then | 150 if [ ! -d "$wdir" ]; then |
117 mkdir $wdir #make working directory | 151 mkdir $wdir #make working directory |
118 fi | 152 fi |
119 if [ ! -d "$out" ]; then | 153 if [ ! -d "$out" ]; then |
159 | 193 |
160 printf "Done!\n" | 194 printf "Done!\n" |
161 fi | 195 fi |
162 | 196 |
163 #determine average read lengths | 197 #determine average read lengths |
164 printf "Getting average read lengths..." | 198 printf "Getting average read lengths...\n" |
165 if [ ! -z $bdir ]; then | 199 readL="" |
166 readC=$(awk '{a=$3-$2;print a;}' "$wdir/bkgd.$ran/ctrl.bed" | perl -lane '$a+=$_;END{print $a/$.}' | awk '{a=$1+0.5;print a;}' | cut -d"." -f1) | 200 if [ ! -z $bdir ]; then |
167 fi | 201 readC=$(awk '{a=$3-$2;print a;}' "$wdir/bkgd.$ran/ctrl.bed" | perl -lane '$a+=$_;END{print $a/$.}' | awk '{a=$1+0.5;print a;}' | cut -d"." -f1) |
168 readL="" | 202 printf "Control: $readC\n" |
169 for s in $sdir/*.bed; do #and for each sample file | 203 fi |
170 read=$(awk '{a=$3-$2;print a;}' "$s" | perl -lane '$a+=$_;END{print $a/$.}' | awk '{a=$1+0.5;print a;}' | cut -d"." -f1) | 204 readL="" |
171 readL="$readL,$read" | 205 for s in $sdir/*.bed; do #and for each sample file |
172 done | 206 file=$(basename $s) |
173 readL=${readL#","} | 207 samplefile=$(echo $file | awk -F"." '{print $1}'); |
174 printf "Done!\n" | 208 read=$(awk '{a=$3-$2;print a;}' "$s" | perl -lane '$a+=$_;END{print $a/$.}' | awk '{a=$1+0.5;print a;}' | cut -d"." -f1) |
209 printf "$samplefile: $read\n" | |
210 readL="$readL,$read" | |
211 done | |
212 readL=${readL#","} | |
175 #=======================> DONE! | 213 #=======================> DONE! |
176 | 214 |
177 | 215 |
178 | 216 |
179 # ============================= | 217 # ============================= |
297 fi | 335 fi |
298 done | 336 done |
299 #remove leading comma | 337 #remove leading comma |
300 samplelist=${samplelist#","} | 338 samplelist=${samplelist#","} |
301 frag=${frag#","} | 339 frag=${frag#","} |
302 | |
303 Rscript "$sPath/bincalculator.r" -ibed="$samplelist" -s="$gsize" -rl="$readL" -d="$wdir" -p="$cores" -f="$frag" -type="$type" | 340 Rscript "$sPath/bincalculator.r" -ibed="$samplelist" -s="$gsize" -rl="$readL" -d="$wdir" -p="$cores" -f="$frag" -type="$type" |
304 fi | 341 fi |
305 if [ $binsize != "ns" ]; then | 342 if [ $binsize != "ns" ]; then |
306 printf "You set a Bin Size: $binsize \n" | 343 printf "You set a Bin Size: $binsize \n" |
307 fi | 344 fi |
358 if [ $fraglen != "ns" ]; then | 395 if [ $fraglen != "ns" ]; then |
359 shift=$(echo $frag | cut -f "$k" -d ",") | 396 shift=$(echo $frag | cut -f "$k" -d ",") |
360 read=$(echo $readL | cut -f "$k" -d ",") | 397 read=$(echo $readL | cut -f "$k" -d ",") |
361 k=$(($k+1)) | 398 k=$(($k+1)) |
362 fi | 399 fi |
363 perl "$sPath/readshifter.pl" "$wdir/samples.$ran/$samplefile" $shift $read > "$wdir/samples.$ran/ext.$samplefile" | 400 if [ $uniq == "y" ]; then |
401 perl "$sPath/readshifter.pl" "$wdir/samples.$ran/$samplefile" $shift $read > "$wdir/samples.$ran/ext.$samplefile" | |
402 fi | |
403 if [ $uniq == "n" ]; then | |
404 perl "$sPath/readshifter.pl" "$wdir/samples.$ran/$samplefile" $shift $read | uniq > "$wdir/samples.$ran/ext.$samplefile" | |
405 fi | |
364 fi | 406 fi |
365 done | 407 done |
366 | 408 |
367 #control file | 409 #control file |
368 bkgdfile="None" | 410 bkgdfile="None" |
373 fi | 415 fi |
374 if [ $fraglen != "ns" ]; then | 416 if [ $fraglen != "ns" ]; then |
375 l=$(($dupnum+1)) | 417 l=$(($dupnum+1)) |
376 bshift=$(echo $frag | cut -f "$l" -d ",") | 418 bshift=$(echo $frag | cut -f "$l" -d ",") |
377 fi | 419 fi |
378 perl "$sPath/readshifter.pl" "$wdir/bkgd.$ran/bkgd.$chr.ctrl.bed" $bshift $readC > "$wdir/bkgd.$ran/ext.bkgd.$chr.ctrl.bed" | 420 |
421 if [ $uniq == "y" ]; then | |
422 perl "$sPath/readshifter.pl" "$wdir/bkgd.$ran/bkgd.$chr.ctrl.bed" $bshift $readC > "$wdir/bkgd.$ran/ext.bkgd.$chr.ctrl.bed" | |
423 fi | |
424 if [ $uniq == "n" ]; then | |
425 perl "$sPath/readshifter.pl" "$wdir/bkgd.$ran/bkgd.$chr.ctrl.bed" $bshift $readC | uniq > "$wdir/bkgd.$ran/ext.bkgd.$chr.ctrl.bed" | |
426 fi | |
379 bkgdfile="$wdir/bkgd.$ran/ext.bkgd.$chr.ctrl.bed" | 427 bkgdfile="$wdir/bkgd.$ran/ext.bkgd.$chr.ctrl.bed" |
380 fi | 428 fi |
381 | 429 |
382 #remove leading comma | 430 #remove leading comma |
383 samplelist=${samplelist#","} | 431 samplelist=${samplelist#","} |
384 frag=${frag#","} | 432 frag=${frag#","} |
385 | 433 |
386 #call the peak calling R script | 434 #call the peak calling R script |
387 Rscript "$sPath/peakfinder.r" -sfile="$f" -bednames="$samplelist" -frag="$frag" -bkgd=$bkgdfile -out="$wdir/peaks.$ran/" -clustnummer="$clustno" -resolution="$resol" -window="$window" -p="$cores" -bin="$binsize" -type="$type" -chrcount="$counting" -windowe="$windowe" | 435 Rscript "$sPath/peakfinder.r" -sfile="$f" -bednames="$samplelist" -frag="$frag" -bkgd=$bkgdfile -out="$wdir/peaks.$ran/" -clustnummer="$clustno" -resolution="$resol" -window="$window" -p="$cores" -bin="$binsize" -type="$type" -chrcount="$counting" -initModel="$initModel" -windowe="$windowe" |
388 counting=$(($counting+1)); | 436 counting=$(($counting+1)); |
389 cp "$wdir/peaks.$ran/$chr.peaks.bed" "$out/peaks/$chr.peaks.bed" | 437 cp "$wdir/peaks.$ran/$chr.peaks.bed" "$out/peaks/$chr.peaks.bed" |
390 rm "$wdir/peaks.$ran/$chr.peaks.bed" | 438 rm "$wdir/peaks.$ran/$chr.peaks.bed" |
391 done | 439 done |
392 counting=1; | 440 counting=1; |
432 #remove leading comma | 480 #remove leading comma |
433 samplelist=${samplelist#","} | 481 samplelist=${samplelist#","} |
434 frag=${frag#","} | 482 frag=${frag#","} |
435 | 483 |
436 #call the peak calling R script | 484 #call the peak calling R script |
437 Rscript "$sPath/peakfinder.r" -sfile=$f -bednames=$samplelist -frag="NA" -bkgd=$bkgdfile -out="$wdir/peaks.$ran/" -clustnummer="$clustno" -resolution="$resol" -window="$window" -p="$cores" -bin="$binsize" -type="$type" -chrcount="$counting" -windowe="$windowe" | 485 Rscript "$sPath/peakfinder.r" -sfile=$f -bednames=$samplelist -frag="NA" -bkgd=$bkgdfile -out="$wdir/peaks.$ran/" -clustnummer="$clustno" -resolution="$resol" -window="$window" -p="$cores" -bin="$binsize" -type="$type" -chrcount="$counting" -initModel="$initModel" -windowe="$windowe" |
438 counting=$(($counting+1)); | 486 counting=$(($counting+1)); |
439 cp "$wdir/peaks.$ran/$chr.peaks.bed" "$out/peaks/$chr.peaks.bed" | 487 cp "$wdir/peaks.$ran/$chr.peaks.bed" "$out/peaks/$chr.peaks.bed" |
440 rm "$wdir/peaks.$ran/$chr.peaks.bed" | 488 rm "$wdir/peaks.$ran/$chr.peaks.bed" |
441 done | 489 done |
442 counting=1; | 490 counting=1; |
445 | 493 |
446 cp $wdir/peaks.$ran/min.peaksize $out/peaks/min.peaksize | 494 cp $wdir/peaks.$ran/min.peaksize $out/peaks/min.peaksize |
447 | 495 |
448 | 496 |
449 #concatenate, sort and filter | 497 #concatenate, sort and filter |
450 cat $out/peaks/*.bed > $out/peaks/all.narrowPeak | 498 cat $out/peaks/*.bed > $out/peaks/all.narrowPeak |
451 Rscript "$sPath/peakhelper.r" -filelist="$out/peaks/all.narrowPeak" | 499 if [[ -s $out/peaks/all.narrowPeak ]]; then |
452 perl "$sPath/peakfilter.pl" $out/peaks/all.narrowPeak | sort -nr -k7 > $out/peaks/filtered.peaks.narrowPeak | 500 Rscript "$sPath/peakhelper.r" -filelist="$out/peaks/all.narrowPeak" |
453 cut -f1-10 $out/peaks/all.narrowPeak | awk -F"\t" -v j=0 '$7 > j' | sort -nr -k7 > $out/peaks/all.peaks.narrowPeak | 501 perl "$sPath/peakfilter.pl" $out/peaks/all.narrowPeak | sort -nr -k7 > $out/peaks/filtered.peaks.narrowPeak |
502 cut -f1-10 $out/peaks/all.narrowPeak | awk -F"\t" -v j=0 '$7 > j' | sort -nr -k7 > $out/peaks/all.peaks.narrowPeak | |
503 fi | |
454 rm $out/peaks/all.narrowPeak | 504 rm $out/peaks/all.narrowPeak |
455 rm $out/peaks/*.bed | 505 rm $out/peaks/*.bed |
456 rm $out/peaks/min.peaksize | 506 rm $out/peaks/min.peaksize |
457 #=======================> DONE! | 507 #=======================> DONE! |
458 | 508 |