Mercurial > repos > messersc > jamm
comparison SignalGenerator.sh @ 0:d42f4d78c85e draft
Uploaded
author | messersc |
---|---|
date | Wed, 17 Dec 2014 10:40:23 -0500 |
parents | |
children | 243f75d0ed6e |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:d42f4d78c85e |
---|---|
1 #!/usr/bin/bash | |
2 | |
3 | |
4 | |
5 | |
6 ##Finding out the path | |
7 sPath="`dirname \"$0\"`" | |
8 sPath="`( cd \"$sPath\" && pwd )`" | |
9 | |
10 | |
11 | |
12 usage() | |
13 { | |
14 cat << EOF | |
15 Welcome to JAMM v1.0.6rev2 Signal Generator Script (GNU GPLv3). Copyright (C) 2014 Mahmoud Ibrahim. | |
16 | |
17 This program comes with ABSOLUTELY NO WARRANTY; for details visit http://www.gnu.org/licenses/gpl.html. This is free software, and you are welcome to redistribute it under certain conditions; visit http://www.gnu.org/licenses/gpl.html for details. | |
18 | |
19 OPTIONS: | |
20 -s Directory containing sample files (required) | |
21 -g Genome size file (required) | |
22 -o Output Directory (required) | |
23 -c directory containing input or Control files | |
24 -r file with Regions to get signal for (required) | |
25 -b Bin size for signal generation (default: 10) | |
26 -f Fragment lengths (required) | |
27 -p Number of processors used by R scripts (default: 1) | |
28 | |
29 EOF | |
30 } | |
31 | |
32 | |
33 # ========================= | |
34 # Process Input parameters | |
35 # ========================= | |
36 | |
37 | |
38 sdir="" | |
39 gsize="" | |
40 out="" | |
41 signal="10" | |
42 regs="" | |
43 fraglen="" | |
44 wdir=$(mktemp -d) | |
45 ran=$RANDOM | |
46 cores="1" | |
47 | |
48 | |
49 while getopts "s:o:c:r:b:f:g:p:" OPTION | |
50 do | |
51 case $OPTION in | |
52 s) sdir=$OPTARG | |
53 ;; | |
54 g) gsize=$OPTARG | |
55 ;; | |
56 o) out=$OPTARG | |
57 ;; | |
58 c) bdir=$OPTARG | |
59 ;; | |
60 r) regs=$OPTARG | |
61 ;; | |
62 b) signal=$OPTARG | |
63 ;; | |
64 f) fraglen=$OPTARG | |
65 ;; | |
66 p) cores=$OPTARG | |
67 ;; | |
68 ?) | |
69 usage | |
70 exit | |
71 ;; | |
72 esac | |
73 done | |
74 | |
75 | |
76 if [[ -z $fraglen ]] || [[ -z $sdir ]] || [[ -z $regs ]] || [[ -z $out ]] | |
77 then | |
78 usage | |
79 exit 1 | |
80 fi | |
81 #=======================> DONE! | |
82 | |
83 | |
84 | |
85 # ============================= | |
86 # Step One: Initial Processing | |
87 # ============================= | |
88 printf "\n\n============================================\nStarted JAMM Signal Generator Pipeline...Hang on!\n============================================\n\n" | |
89 | |
90 if [ ! -d "$wdir" ]; then | |
91 mkdir $wdir #make working directory | |
92 fi | |
93 if [ ! -d "$out" ]; then | |
94 mkdir $out #make working directory | |
95 fi | |
96 mkdir $wdir/bkgd.$ran/ #directory to store background files | |
97 mkdir $wdir/sizes.$ran/ #chromosomes and sizes | |
98 mkdir $wdir/samples.$ran/ #store sample files | |
99 | |
100 dupnum=$(ls -1 $sdir | wc -l) #count how many sample files | |
101 | |
102 | |
103 #separate chromosome sizes | |
104 printf "Loading genome size file..." | |
105 ext="$wdir/sizes.$ran/" | |
106 awk -v ext="$ext" '{ print >> ext"/size." $1 ".bed" }' $gsize | |
107 printf "Done!\n" | |
108 | |
109 | |
110 printf "Processing sample files..." | |
111 #load each chromosome from each sample file | |
112 for i in $sdir/*.bed; do | |
113 samplefile=$(basename $i) | |
114 for f in $wdir/sizes.$ran/*; do | |
115 sizefile=$(basename $f) | |
116 chr=$(echo $sizefile | awk -F"." '{print $2}' | awk -F"." '{print $1}'); | |
117 awk -v chr="$chr" -v ext="$wdir/samples.$ran/" -v samplefile="$samplefile" -F"\t" '$1 == chr { print $2"\t"$6 >> ext"sample."chr"."samplefile }' "$i" | |
118 done | |
119 done | |
120 printf "Done!\n" | |
121 | |
122 | |
123 if [ ! -z $bdir ]; then | |
124 #concatenate all background files into one file | |
125 printf "Processing control files..." | |
126 cat $bdir/*.bed > $wdir/bkgd.$ran/ctrl.bed | |
127 | |
128 for f in $wdir/sizes.$ran/*; do | |
129 sizefile=$(basename $f) | |
130 chr=$(echo $sizefile | awk -F"." '{print $2}' | awk -F"." '{print $1}'); | |
131 awk -v chr="$chr" -v ext="$wdir/bkgd.$ran/" -F"\t" '$1 == chr { print $2"\t"$6 >> ext"bkgd."chr".ctrl.bed" }' "$wdir/bkgd.$ran/ctrl.bed" | |
132 done | |
133 | |
134 rm $wdir/bkgd.$ran/ctrl.bed | |
135 | |
136 printf "Done!\n" | |
137 fi | |
138 | |
139 | |
140 #determine average read lengths | |
141 printf "Getting average read lengths..." | |
142 if [ ! -z $bdir ]; then | |
143 readC=$(awk '{a=$3-$2;print a;}' "$wdir/bkgd.$ran/ctrl.bed" | perl -lane '$a+=$_;END{print $a/$.}' | awk '{print int($1)}') | |
144 fi | |
145 readL="" | |
146 for s in $sdir/*.bed; do #and for each sample file | |
147 read=$(awk '{a=$3-$2;print a;}' "$s" | perl -lane '$a+=$_;END{print $a/$.}' | awk '{print int($1)}') | |
148 readL="$readL,$read" | |
149 done | |
150 readL=${readL#","} | |
151 printf "Done!\n" | |
152 #=======================> DONE! | |
153 | |
154 | |
155 | |
156 | |
157 | |
158 | |
159 # =========================== | |
160 # Step Three: Getting Signal | |
161 # =========================== | |
162 mkdir $wdir/signal.$ran/ | |
163 mkdir $out/signal #store signal | |
164 | |
165 | |
166 printf "Generating Signal...(bin size: $signal)\n" | |
167 | |
168 counting=1; | |
169 for f in $wdir/sizes.$ran/*; do #for each chromosome | |
170 samplelist="" | |
171 frag="" | |
172 frag=$fraglen | |
173 k=1 | |
174 | |
175 sizefile=$(basename $f) | |
176 chr=$(echo $sizefile | awk -F"." '{print $2}' | awk -F"." '{print $1}'); | |
177 chrSize=$(cat $f | cut -f2); | |
178 printf "Chromosome $chr: " | |
179 | |
180 #list of sample bed files and fragment lengths | |
181 for s in $wdir/samples.$ran/*.bed; do #and for each sample file | |
182 samplefile=$(basename $s) | |
183 chr2=$(echo $samplefile | awk -F"." '{print $2}'); | |
184 if [ $chr == $chr2 ] #belonging to this chromosome | |
185 then | |
186 samplelist="$samplelist,$wdir/samples.$ran/ext.$samplefile" | |
187 samplename=$(echo $samplefile | awk -F"." '{ print $3 }') | |
188 samplefilename=$(echo $samplefile | cut -d'.' -f 3-) | |
189 shift=$(echo "$frag" | cut -f "$k" -d ",") | |
190 read=$(echo "$readL" | cut -f "$k" -d ",") | |
191 k=$(($k+1)) | |
192 perl "$sPath/readshifter.pl" "$wdir/samples.$ran/$samplefile" $shift $read > "$wdir/samples.$ran/ext.$samplefile" | |
193 fi | |
194 done | |
195 | |
196 #control file | |
197 bkgdfile="None" | |
198 if [ ! -z $bdir ]; then | |
199 l=$(($dupnum+1)) | |
200 bshift=$(echo $frag | cut -f "$l" -d ",") | |
201 perl "$sPath/readshifter.pl" "$wdir/bkgd.$ran/bkgd.$chr.ctrl.bed" $bshift $readC > "$wdir/bkgd.$ran/ext.bkgd.$chr.ctrl.bed" | |
202 bkgdfile="$wdir/bkgd.$ran/ext.bkgd.$chr.ctrl.bed" | |
203 fi | |
204 | |
205 #remove leading comma | |
206 samplelist=${samplelist#","} | |
207 frag=${frag#","} | |
208 | |
209 #call the peak calling R script | |
210 Rscript "$sPath/signalmaker.r" -chromoS="$chrSize" -chromo="$chr" -bednames=$samplelist -frag=$frag -bkgd=$bkgdfile -out="$wdir/signal.$ran/" -sig="$signal" -regions="$regs" -p="$cores" -chrcount="$counting" | |
211 cat $wdir/signal.$ran/*.bedSignal | awk -F"\t" -v j=0 '$4 > j' > "$out/signal/$chr.bedGraph" | |
212 rm $wdir/signal.$ran/*.bedSignal | |
213 counting=$(($counting+1)); | |
214 done | |
215 counting=1; | |
216 #=======================> DONE! | |
217 | |
218 rm -rf $wdir | |
219 | |
220 printf "\n\n========================================\nWe're done...Congratulations!\n========================================\n\n" |