comparison SignalGenerator.sh @ 1:243f75d0ed6e draft default tip

Uploaded. Includes new release 1.0.7 with fixed optional controls.
author messersc
date Thu, 19 Feb 2015 05:39:45 -0500
parents d42f4d78c85e
children
comparison
equal deleted inserted replaced
0:d42f4d78c85e 1:243f75d0ed6e
1 #!/usr/bin/bash 1 ########################################################################
2 2 # JAMMv1.0.7rev1 is a peak finder for joint analysis of NGS replicates.
3 3 # Copyright (C) 2014-2015 Mahmoud Ibrahim
4 4 #
5 # This program is free software: you can redistribute it and/or modify
6 # it under the terms of the GNU General Public License as published by
7 # the Free Software Foundation, either version 3 of the License, or
8 # (at your option) any later version.
9 #
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU General Public License for more details.
14 #
15 # You should have received a copy of the GNU General Public License
16 # along with this program. If not, see <http://www.gnu.org/licenses/>.
17 #
18 # Contact: mahmoud.ibrahim@mdc-berlin.de
19 ########################################################################
5 20
6 ##Finding out the path 21 ##Finding out the path
7 sPath="`dirname \"$0\"`" 22 sPath="`dirname \"$0\"`"
8 sPath="`( cd \"$sPath\" && pwd )`" 23 sPath="`( cd \"$sPath\" && pwd )`"
9 24
10 25
11 26
12 usage() 27 usage()
13 { 28 {
14 cat << EOF 29 cat << EOF
15 Welcome to JAMM v1.0.6rev2 Signal Generator Script (GNU GPLv3). Copyright (C) 2014 Mahmoud Ibrahim. 30 Welcome to JAMM v1.0.7rev1 Signal Generator Script (GNU GPLv3). Copyright (C) 2014-2015 Mahmoud Ibrahim.
16 31
17 This program comes with ABSOLUTELY NO WARRANTY; for details visit http://www.gnu.org/licenses/gpl.html. This is free software, and you are welcome to redistribute it under certain conditions; visit http://www.gnu.org/licenses/gpl.html for details. 32 This program comes with ABSOLUTELY NO WARRANTY; for details visit http://www.gnu.org/licenses/gpl.html. This is free software, and you are welcome to redistribute it under certain conditions; visit http://www.gnu.org/licenses/gpl.html for details.
18 33
19 OPTIONS: 34 OPTIONS:
20 -s Directory containing sample files (required) 35 -s Directory containing sample files (required)
21 -g Genome size file (required) 36 -g Genome size file (required)
22 -o Output Directory (required) 37 -o Output Directory (required)
23 -c directory containing input or Control files 38 -c directory containing input or Control files
24 -r file with Regions to get signal for (required) 39 -r file with Regions to get signal for (required)
25 -b Bin size for signal generation (default: 10) 40 -b Bin size for signal generation (default: 10)
26 -f Fragment lengths (required) 41 -f Fragment lengths (required if -t is "single")
27 -p Number of processors used by R scripts (default: 1) 42 -p Number of processors used by R scripts (default: 1)
43 -t Alignment type, paired or single (default: single)
28 44
29 EOF 45 EOF
30 } 46 }
31 47
32 48
42 regs="" 58 regs=""
43 fraglen="" 59 fraglen=""
44 wdir=$(mktemp -d) 60 wdir=$(mktemp -d)
45 ran=$RANDOM 61 ran=$RANDOM
46 cores="1" 62 cores="1"
47 63 type="single"
48 64
49 while getopts "s:o:c:r:b:f:g:p:" OPTION 65 while getopts "s:o:c:r:b:f:g:p:t:" OPTION
50 do 66 do
51 case $OPTION in 67 case $OPTION in
52 s) sdir=$OPTARG 68 s) sdir=$OPTARG
53 ;; 69 ;;
54 g) gsize=$OPTARG 70 g) gsize=$OPTARG
63 ;; 79 ;;
64 f) fraglen=$OPTARG 80 f) fraglen=$OPTARG
65 ;; 81 ;;
66 p) cores=$OPTARG 82 p) cores=$OPTARG
67 ;; 83 ;;
84 t) type=$OPTARG
85 ;;
68 ?) 86 ?)
69 usage 87 usage
70 exit 88 exit
71 ;; 89 ;;
72 esac 90 esac
73 done 91 done
74 92
75 93
76 if [[ -z $fraglen ]] || [[ -z $sdir ]] || [[ -z $regs ]] || [[ -z $out ]] 94 if [[ -z $sdir ]] || [[ -z $regs ]] || [[ -z $out ]]
77 then 95 then
78 usage 96 usage
79 exit 1 97 exit 1
80 fi 98 fi
99
100 if [[ -z $fraglen ]]
101 then
102 if [ $type == "single" ]
103 then
104 usage
105 exit 1
106 fi
107 fi
108 if [[ -d "$out/signal" ]]; then
109 printf "\n\nOutput directory $out/signal already exists. I can't override existing results!\n\n"
110 exit 0
111 fi
81 #=======================> DONE! 112 #=======================> DONE!
82 113
83 114
84 115
85 # ============================= 116 # =============================
86 # Step One: Initial Processing 117 # Step One: Initial Processing
87 # ============================= 118 # =============================
88 printf "\n\n============================================\nStarted JAMM Signal Generator Pipeline...Hang on!\n============================================\n\n" 119 printf "\n\n=======================================================\nStarted JAMM1.0.7rev1 Signal Generator Pipeline...Hang on!\n=======================================================\n\n"
89 120
90 if [ ! -d "$wdir" ]; then 121 if [ ! -d "$wdir" ]; then
91 mkdir $wdir #make working directory 122 mkdir $wdir #make working directory
92 fi 123 fi
93 if [ ! -d "$out" ]; then 124 if [ ! -d "$out" ]; then
129 sizefile=$(basename $f) 160 sizefile=$(basename $f)
130 chr=$(echo $sizefile | awk -F"." '{print $2}' | awk -F"." '{print $1}'); 161 chr=$(echo $sizefile | awk -F"." '{print $2}' | awk -F"." '{print $1}');
131 awk -v chr="$chr" -v ext="$wdir/bkgd.$ran/" -F"\t" '$1 == chr { print $2"\t"$6 >> ext"bkgd."chr".ctrl.bed" }' "$wdir/bkgd.$ran/ctrl.bed" 162 awk -v chr="$chr" -v ext="$wdir/bkgd.$ran/" -F"\t" '$1 == chr { print $2"\t"$6 >> ext"bkgd."chr".ctrl.bed" }' "$wdir/bkgd.$ran/ctrl.bed"
132 done 163 done
133 164
134 rm $wdir/bkgd.$ran/ctrl.bed
135 165
136 printf "Done!\n" 166 printf "Done!\n"
137 fi 167 fi
138 168
139 169
140 #determine average read lengths 170 #determine average read lengths
171 if [ $type == "single" ]; then
141 printf "Getting average read lengths..." 172 printf "Getting average read lengths..."
142 if [ ! -z $bdir ]; then 173 if [ ! -z $bdir ]; then
143 readC=$(awk '{a=$3-$2;print a;}' "$wdir/bkgd.$ran/ctrl.bed" | perl -lane '$a+=$_;END{print $a/$.}' | awk '{print int($1)}') 174 readC=$(awk '{a=$3-$2;print a;}' "$wdir/bkgd.$ran/ctrl.bed" | perl -lane '$a+=$_;END{print $a/$.}' | awk '{print int($1)}')
144 fi 175 fi
145 readL="" 176 readL=""
147 read=$(awk '{a=$3-$2;print a;}' "$s" | perl -lane '$a+=$_;END{print $a/$.}' | awk '{print int($1)}') 178 read=$(awk '{a=$3-$2;print a;}' "$s" | perl -lane '$a+=$_;END{print $a/$.}' | awk '{print int($1)}')
148 readL="$readL,$read" 179 readL="$readL,$read"
149 done 180 done
150 readL=${readL#","} 181 readL=${readL#","}
151 printf "Done!\n" 182 printf "Done!\n"
183 fi
152 #=======================> DONE! 184 #=======================> DONE!
153 185
154 186
155 187
156 188
163 mkdir $out/signal #store signal 195 mkdir $out/signal #store signal
164 196
165 197
166 printf "Generating Signal...(bin size: $signal)\n" 198 printf "Generating Signal...(bin size: $signal)\n"
167 199
200 if [ $type == "single" ]; then
168 counting=1; 201 counting=1;
169 for f in $wdir/sizes.$ran/*; do #for each chromosome 202 for f in $wdir/sizes.$ran/*; do #for each chromosome
170 samplelist="" 203 samplelist=""
171 frag="" 204 frag=""
172 frag=$fraglen 205 frag=$fraglen
205 #remove leading comma 238 #remove leading comma
206 samplelist=${samplelist#","} 239 samplelist=${samplelist#","}
207 frag=${frag#","} 240 frag=${frag#","}
208 241
209 #call the peak calling R script 242 #call the peak calling R script
210 Rscript "$sPath/signalmaker.r" -chromoS="$chrSize" -chromo="$chr" -bednames=$samplelist -frag=$frag -bkgd=$bkgdfile -out="$wdir/signal.$ran/" -sig="$signal" -regions="$regs" -p="$cores" -chrcount="$counting" 243 Rscript "$sPath/signalmaker.r" -chromoS="$chrSize" -chromo="$chr" -bednames=$samplelist -frag=$frag -bkgd=$bkgdfile -out="$wdir/signal.$ran/" -sig="$signal" -regions="$regs" -p="$cores" -chrcount="$counting" -t="$type"
211 cat $wdir/signal.$ran/*.bedSignal | awk -F"\t" -v j=0 '$4 > j' > "$out/signal/$chr.bedGraph" 244 cat $wdir/signal.$ran/*.bedSignal | awk -F"\t" -v j=0 '$4 > j' > "$out/signal/$chr.bedGraph"
212 rm $wdir/signal.$ran/*.bedSignal 245 rm $wdir/signal.$ran/*.bedSignal
213 counting=$(($counting+1)); 246 counting=$(($counting+1));
214 done 247 done
215 counting=1; 248 counting=1;
249 fi
250
251
252
253 if [ $type == "paired" ]; then
254 counting=1;
255 for f in $wdir/sizes.$ran/*; do #for each chromosome
256 samplelist=""
257
258 sizefile=$(basename $f)
259 chr=$(echo $sizefile | awk -F"." '{print $2}' | awk -F"." '{print $1}');
260 chrSize=$(cat $f | cut -f2);
261 printf "Chromosome $chr: "
262
263 #list of sample bed files and fragment lengths
264 for s in $wdir/samples.$ran/*.bed; do #and for each sample file
265 samplefile=$(basename $s)
266 chr2=$(echo $samplefile | awk -F"." '{print $2}');
267 if [ $chr == $chr2 ] #belonging to this chromosome
268 then
269 samplelist="$samplelist,$wdir/samples.$ran/$samplefile"
270 samplename=$(echo $samplefile | awk -F"." '{ print $3 }')
271 samplefilename=$(echo $samplefile | cut -d'.' -f 3-)
272 x="$sdir/$samplefilename"
273 fi
274 done
275
276 #control file
277 bkgdfile="None"
278 if [ ! -z $bdir ]; then
279 bkgdfile="$wdir/bkgd.$ran/bkgd.$chr.ctrl.bed"
280 fi
281
282 #remove leading comma
283 samplelist=${samplelist#","}
284 frag=${frag#","}
285
286 #call the peak calling R script
287 Rscript "$sPath/signalmaker.r" -chromoS="$chrSize" -chromo="$chr" -bednames=$samplelist -frag=$frag -bkgd=$bkgdfile -out="$wdir/signal.$ran/" -sig="$signal" -regions="$regs" -p="$cores" -chrcount="$counting" -t="$type"
288 cat $wdir/signal.$ran/*.bedSignal | awk -F"\t" -v j=0 '$4 > j' > "$out/signal/$chr.bedGraph"
289 rm $wdir/signal.$ran/*.bedSignal
290 counting=$(($counting+1));
291 done
292 counting=1;
293 fi
216 #=======================> DONE! 294 #=======================> DONE!
217 295
218 rm -rf $wdir 296 rm -rf $wdir
219 297
220 printf "\n\n========================================\nWe're done...Congratulations!\n========================================\n\n" 298 printf "\n\n========================================\nWe're done...Congratulations!\n========================================\n\n"