Mercurial > repos > messersc > jamm
comparison SignalGenerator.sh @ 1:243f75d0ed6e draft default tip
Uploaded.
Includes new release 1.0.7 with fixed optional controls.
author | messersc |
---|---|
date | Thu, 19 Feb 2015 05:39:45 -0500 |
parents | d42f4d78c85e |
children |
comparison
equal
deleted
inserted
replaced
0:d42f4d78c85e | 1:243f75d0ed6e |
---|---|
1 #!/usr/bin/bash | 1 ######################################################################## |
2 | 2 # JAMMv1.0.7rev1 is a peak finder for joint analysis of NGS replicates. |
3 | 3 # Copyright (C) 2014-2015 Mahmoud Ibrahim |
4 | 4 # |
5 # This program is free software: you can redistribute it and/or modify | |
6 # it under the terms of the GNU General Public License as published by | |
7 # the Free Software Foundation, either version 3 of the License, or | |
8 # (at your option) any later version. | |
9 # | |
10 # This program is distributed in the hope that it will be useful, | |
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
13 # GNU General Public License for more details. | |
14 # | |
15 # You should have received a copy of the GNU General Public License | |
16 # along with this program. If not, see <http://www.gnu.org/licenses/>. | |
17 # | |
18 # Contact: mahmoud.ibrahim@mdc-berlin.de | |
19 ######################################################################## | |
5 | 20 |
6 ##Finding out the path | 21 ##Finding out the path |
7 sPath="`dirname \"$0\"`" | 22 sPath="`dirname \"$0\"`" |
8 sPath="`( cd \"$sPath\" && pwd )`" | 23 sPath="`( cd \"$sPath\" && pwd )`" |
9 | 24 |
10 | 25 |
11 | 26 |
12 usage() | 27 usage() |
13 { | 28 { |
14 cat << EOF | 29 cat << EOF |
15 Welcome to JAMM v1.0.6rev2 Signal Generator Script (GNU GPLv3). Copyright (C) 2014 Mahmoud Ibrahim. | 30 Welcome to JAMM v1.0.7rev1 Signal Generator Script (GNU GPLv3). Copyright (C) 2014-2015 Mahmoud Ibrahim. |
16 | 31 |
17 This program comes with ABSOLUTELY NO WARRANTY; for details visit http://www.gnu.org/licenses/gpl.html. This is free software, and you are welcome to redistribute it under certain conditions; visit http://www.gnu.org/licenses/gpl.html for details. | 32 This program comes with ABSOLUTELY NO WARRANTY; for details visit http://www.gnu.org/licenses/gpl.html. This is free software, and you are welcome to redistribute it under certain conditions; visit http://www.gnu.org/licenses/gpl.html for details. |
18 | 33 |
19 OPTIONS: | 34 OPTIONS: |
20 -s Directory containing sample files (required) | 35 -s Directory containing sample files (required) |
21 -g Genome size file (required) | 36 -g Genome size file (required) |
22 -o Output Directory (required) | 37 -o Output Directory (required) |
23 -c directory containing input or Control files | 38 -c directory containing input or Control files |
24 -r file with Regions to get signal for (required) | 39 -r file with Regions to get signal for (required) |
25 -b Bin size for signal generation (default: 10) | 40 -b Bin size for signal generation (default: 10) |
26 -f Fragment lengths (required) | 41 -f Fragment lengths (required if -t is "single") |
27 -p Number of processors used by R scripts (default: 1) | 42 -p Number of processors used by R scripts (default: 1) |
43 -t Alignment type, paired or single (default: single) | |
28 | 44 |
29 EOF | 45 EOF |
30 } | 46 } |
31 | 47 |
32 | 48 |
42 regs="" | 58 regs="" |
43 fraglen="" | 59 fraglen="" |
44 wdir=$(mktemp -d) | 60 wdir=$(mktemp -d) |
45 ran=$RANDOM | 61 ran=$RANDOM |
46 cores="1" | 62 cores="1" |
47 | 63 type="single" |
48 | 64 |
49 while getopts "s:o:c:r:b:f:g:p:" OPTION | 65 while getopts "s:o:c:r:b:f:g:p:t:" OPTION |
50 do | 66 do |
51 case $OPTION in | 67 case $OPTION in |
52 s) sdir=$OPTARG | 68 s) sdir=$OPTARG |
53 ;; | 69 ;; |
54 g) gsize=$OPTARG | 70 g) gsize=$OPTARG |
63 ;; | 79 ;; |
64 f) fraglen=$OPTARG | 80 f) fraglen=$OPTARG |
65 ;; | 81 ;; |
66 p) cores=$OPTARG | 82 p) cores=$OPTARG |
67 ;; | 83 ;; |
84 t) type=$OPTARG | |
85 ;; | |
68 ?) | 86 ?) |
69 usage | 87 usage |
70 exit | 88 exit |
71 ;; | 89 ;; |
72 esac | 90 esac |
73 done | 91 done |
74 | 92 |
75 | 93 |
76 if [[ -z $fraglen ]] || [[ -z $sdir ]] || [[ -z $regs ]] || [[ -z $out ]] | 94 if [[ -z $sdir ]] || [[ -z $regs ]] || [[ -z $out ]] |
77 then | 95 then |
78 usage | 96 usage |
79 exit 1 | 97 exit 1 |
80 fi | 98 fi |
99 | |
100 if [[ -z $fraglen ]] | |
101 then | |
102 if [ $type == "single" ] | |
103 then | |
104 usage | |
105 exit 1 | |
106 fi | |
107 fi | |
108 if [[ -d "$out/signal" ]]; then | |
109 printf "\n\nOutput directory $out/signal already exists. I can't override existing results!\n\n" | |
110 exit 0 | |
111 fi | |
81 #=======================> DONE! | 112 #=======================> DONE! |
82 | 113 |
83 | 114 |
84 | 115 |
85 # ============================= | 116 # ============================= |
86 # Step One: Initial Processing | 117 # Step One: Initial Processing |
87 # ============================= | 118 # ============================= |
88 printf "\n\n============================================\nStarted JAMM Signal Generator Pipeline...Hang on!\n============================================\n\n" | 119 printf "\n\n=======================================================\nStarted JAMM1.0.7rev1 Signal Generator Pipeline...Hang on!\n=======================================================\n\n" |
89 | 120 |
90 if [ ! -d "$wdir" ]; then | 121 if [ ! -d "$wdir" ]; then |
91 mkdir $wdir #make working directory | 122 mkdir $wdir #make working directory |
92 fi | 123 fi |
93 if [ ! -d "$out" ]; then | 124 if [ ! -d "$out" ]; then |
129 sizefile=$(basename $f) | 160 sizefile=$(basename $f) |
130 chr=$(echo $sizefile | awk -F"." '{print $2}' | awk -F"." '{print $1}'); | 161 chr=$(echo $sizefile | awk -F"." '{print $2}' | awk -F"." '{print $1}'); |
131 awk -v chr="$chr" -v ext="$wdir/bkgd.$ran/" -F"\t" '$1 == chr { print $2"\t"$6 >> ext"bkgd."chr".ctrl.bed" }' "$wdir/bkgd.$ran/ctrl.bed" | 162 awk -v chr="$chr" -v ext="$wdir/bkgd.$ran/" -F"\t" '$1 == chr { print $2"\t"$6 >> ext"bkgd."chr".ctrl.bed" }' "$wdir/bkgd.$ran/ctrl.bed" |
132 done | 163 done |
133 | 164 |
134 rm $wdir/bkgd.$ran/ctrl.bed | |
135 | 165 |
136 printf "Done!\n" | 166 printf "Done!\n" |
137 fi | 167 fi |
138 | 168 |
139 | 169 |
140 #determine average read lengths | 170 #determine average read lengths |
171 if [ $type == "single" ]; then | |
141 printf "Getting average read lengths..." | 172 printf "Getting average read lengths..." |
142 if [ ! -z $bdir ]; then | 173 if [ ! -z $bdir ]; then |
143 readC=$(awk '{a=$3-$2;print a;}' "$wdir/bkgd.$ran/ctrl.bed" | perl -lane '$a+=$_;END{print $a/$.}' | awk '{print int($1)}') | 174 readC=$(awk '{a=$3-$2;print a;}' "$wdir/bkgd.$ran/ctrl.bed" | perl -lane '$a+=$_;END{print $a/$.}' | awk '{print int($1)}') |
144 fi | 175 fi |
145 readL="" | 176 readL="" |
147 read=$(awk '{a=$3-$2;print a;}' "$s" | perl -lane '$a+=$_;END{print $a/$.}' | awk '{print int($1)}') | 178 read=$(awk '{a=$3-$2;print a;}' "$s" | perl -lane '$a+=$_;END{print $a/$.}' | awk '{print int($1)}') |
148 readL="$readL,$read" | 179 readL="$readL,$read" |
149 done | 180 done |
150 readL=${readL#","} | 181 readL=${readL#","} |
151 printf "Done!\n" | 182 printf "Done!\n" |
183 fi | |
152 #=======================> DONE! | 184 #=======================> DONE! |
153 | 185 |
154 | 186 |
155 | 187 |
156 | 188 |
163 mkdir $out/signal #store signal | 195 mkdir $out/signal #store signal |
164 | 196 |
165 | 197 |
166 printf "Generating Signal...(bin size: $signal)\n" | 198 printf "Generating Signal...(bin size: $signal)\n" |
167 | 199 |
200 if [ $type == "single" ]; then | |
168 counting=1; | 201 counting=1; |
169 for f in $wdir/sizes.$ran/*; do #for each chromosome | 202 for f in $wdir/sizes.$ran/*; do #for each chromosome |
170 samplelist="" | 203 samplelist="" |
171 frag="" | 204 frag="" |
172 frag=$fraglen | 205 frag=$fraglen |
205 #remove leading comma | 238 #remove leading comma |
206 samplelist=${samplelist#","} | 239 samplelist=${samplelist#","} |
207 frag=${frag#","} | 240 frag=${frag#","} |
208 | 241 |
209 #call the peak calling R script | 242 #call the peak calling R script |
210 Rscript "$sPath/signalmaker.r" -chromoS="$chrSize" -chromo="$chr" -bednames=$samplelist -frag=$frag -bkgd=$bkgdfile -out="$wdir/signal.$ran/" -sig="$signal" -regions="$regs" -p="$cores" -chrcount="$counting" | 243 Rscript "$sPath/signalmaker.r" -chromoS="$chrSize" -chromo="$chr" -bednames=$samplelist -frag=$frag -bkgd=$bkgdfile -out="$wdir/signal.$ran/" -sig="$signal" -regions="$regs" -p="$cores" -chrcount="$counting" -t="$type" |
211 cat $wdir/signal.$ran/*.bedSignal | awk -F"\t" -v j=0 '$4 > j' > "$out/signal/$chr.bedGraph" | 244 cat $wdir/signal.$ran/*.bedSignal | awk -F"\t" -v j=0 '$4 > j' > "$out/signal/$chr.bedGraph" |
212 rm $wdir/signal.$ran/*.bedSignal | 245 rm $wdir/signal.$ran/*.bedSignal |
213 counting=$(($counting+1)); | 246 counting=$(($counting+1)); |
214 done | 247 done |
215 counting=1; | 248 counting=1; |
249 fi | |
250 | |
251 | |
252 | |
253 if [ $type == "paired" ]; then | |
254 counting=1; | |
255 for f in $wdir/sizes.$ran/*; do #for each chromosome | |
256 samplelist="" | |
257 | |
258 sizefile=$(basename $f) | |
259 chr=$(echo $sizefile | awk -F"." '{print $2}' | awk -F"." '{print $1}'); | |
260 chrSize=$(cat $f | cut -f2); | |
261 printf "Chromosome $chr: " | |
262 | |
263 #list of sample bed files and fragment lengths | |
264 for s in $wdir/samples.$ran/*.bed; do #and for each sample file | |
265 samplefile=$(basename $s) | |
266 chr2=$(echo $samplefile | awk -F"." '{print $2}'); | |
267 if [ $chr == $chr2 ] #belonging to this chromosome | |
268 then | |
269 samplelist="$samplelist,$wdir/samples.$ran/$samplefile" | |
270 samplename=$(echo $samplefile | awk -F"." '{ print $3 }') | |
271 samplefilename=$(echo $samplefile | cut -d'.' -f 3-) | |
272 x="$sdir/$samplefilename" | |
273 fi | |
274 done | |
275 | |
276 #control file | |
277 bkgdfile="None" | |
278 if [ ! -z $bdir ]; then | |
279 bkgdfile="$wdir/bkgd.$ran/bkgd.$chr.ctrl.bed" | |
280 fi | |
281 | |
282 #remove leading comma | |
283 samplelist=${samplelist#","} | |
284 frag=${frag#","} | |
285 | |
286 #call the peak calling R script | |
287 Rscript "$sPath/signalmaker.r" -chromoS="$chrSize" -chromo="$chr" -bednames=$samplelist -frag=$frag -bkgd=$bkgdfile -out="$wdir/signal.$ran/" -sig="$signal" -regions="$regs" -p="$cores" -chrcount="$counting" -t="$type" | |
288 cat $wdir/signal.$ran/*.bedSignal | awk -F"\t" -v j=0 '$4 > j' > "$out/signal/$chr.bedGraph" | |
289 rm $wdir/signal.$ran/*.bedSignal | |
290 counting=$(($counting+1)); | |
291 done | |
292 counting=1; | |
293 fi | |
216 #=======================> DONE! | 294 #=======================> DONE! |
217 | 295 |
218 rm -rf $wdir | 296 rm -rf $wdir |
219 | 297 |
220 printf "\n\n========================================\nWe're done...Congratulations!\n========================================\n\n" | 298 printf "\n\n========================================\nWe're done...Congratulations!\n========================================\n\n" |