annotate SignalGenerator.sh @ 0:d42f4d78c85e draft

Uploaded
author messersc
date Wed, 17 Dec 2014 10:40:23 -0500
parents
children 243f75d0ed6e
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
1 #!/usr/bin/bash
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
2
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
3
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
4
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
5
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
6 ##Finding out the path
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
7 sPath="`dirname \"$0\"`"
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
8 sPath="`( cd \"$sPath\" && pwd )`"
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
9
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
10
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
11
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
12 usage()
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
13 {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
14 cat << EOF
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
15 Welcome to JAMM v1.0.6rev2 Signal Generator Script (GNU GPLv3). Copyright (C) 2014 Mahmoud Ibrahim.
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
16
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
17 This program comes with ABSOLUTELY NO WARRANTY; for details visit http://www.gnu.org/licenses/gpl.html. This is free software, and you are welcome to redistribute it under certain conditions; visit http://www.gnu.org/licenses/gpl.html for details.
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
18
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
19 OPTIONS:
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
20 -s Directory containing sample files (required)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
21 -g Genome size file (required)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
22 -o Output Directory (required)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
23 -c directory containing input or Control files
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
24 -r file with Regions to get signal for (required)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
25 -b Bin size for signal generation (default: 10)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
26 -f Fragment lengths (required)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
27 -p Number of processors used by R scripts (default: 1)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
28
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
29 EOF
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
30 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
31
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
32
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
33 # =========================
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
34 # Process Input parameters
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
35 # =========================
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
36
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
37
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
38 sdir=""
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
39 gsize=""
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
40 out=""
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
41 signal="10"
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
42 regs=""
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
43 fraglen=""
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
44 wdir=$(mktemp -d)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
45 ran=$RANDOM
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
46 cores="1"
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
47
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
48
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
49 while getopts "s:o:c:r:b:f:g:p:" OPTION
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
50 do
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
51 case $OPTION in
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
52 s) sdir=$OPTARG
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
53 ;;
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
54 g) gsize=$OPTARG
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
55 ;;
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
56 o) out=$OPTARG
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
57 ;;
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
58 c) bdir=$OPTARG
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
59 ;;
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
60 r) regs=$OPTARG
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
61 ;;
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
62 b) signal=$OPTARG
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
63 ;;
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
64 f) fraglen=$OPTARG
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
65 ;;
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
66 p) cores=$OPTARG
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
67 ;;
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
68 ?)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
69 usage
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
70 exit
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
71 ;;
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
72 esac
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
73 done
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
74
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
75
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
76 if [[ -z $fraglen ]] || [[ -z $sdir ]] || [[ -z $regs ]] || [[ -z $out ]]
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
77 then
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
78 usage
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
79 exit 1
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
80 fi
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
81 #=======================> DONE!
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
82
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
83
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
84
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
85 # =============================
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
86 # Step One: Initial Processing
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
87 # =============================
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
88 printf "\n\n============================================\nStarted JAMM Signal Generator Pipeline...Hang on!\n============================================\n\n"
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
89
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
90 if [ ! -d "$wdir" ]; then
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
91 mkdir $wdir #make working directory
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
92 fi
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
93 if [ ! -d "$out" ]; then
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
94 mkdir $out #make working directory
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
95 fi
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
96 mkdir $wdir/bkgd.$ran/ #directory to store background files
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
97 mkdir $wdir/sizes.$ran/ #chromosomes and sizes
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
98 mkdir $wdir/samples.$ran/ #store sample files
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
99
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
100 dupnum=$(ls -1 $sdir | wc -l) #count how many sample files
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
101
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
102
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
103 #separate chromosome sizes
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
104 printf "Loading genome size file..."
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
105 ext="$wdir/sizes.$ran/"
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
106 awk -v ext="$ext" '{ print >> ext"/size." $1 ".bed" }' $gsize
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
107 printf "Done!\n"
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
108
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
109
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
110 printf "Processing sample files..."
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
111 #load each chromosome from each sample file
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
112 for i in $sdir/*.bed; do
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
113 samplefile=$(basename $i)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
114 for f in $wdir/sizes.$ran/*; do
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
115 sizefile=$(basename $f)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
116 chr=$(echo $sizefile | awk -F"." '{print $2}' | awk -F"." '{print $1}');
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
117 awk -v chr="$chr" -v ext="$wdir/samples.$ran/" -v samplefile="$samplefile" -F"\t" '$1 == chr { print $2"\t"$6 >> ext"sample."chr"."samplefile }' "$i"
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
118 done
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
119 done
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
120 printf "Done!\n"
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
121
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
122
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
123 if [ ! -z $bdir ]; then
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
124 #concatenate all background files into one file
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
125 printf "Processing control files..."
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
126 cat $bdir/*.bed > $wdir/bkgd.$ran/ctrl.bed
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
127
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
128 for f in $wdir/sizes.$ran/*; do
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
129 sizefile=$(basename $f)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
130 chr=$(echo $sizefile | awk -F"." '{print $2}' | awk -F"." '{print $1}');
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
131 awk -v chr="$chr" -v ext="$wdir/bkgd.$ran/" -F"\t" '$1 == chr { print $2"\t"$6 >> ext"bkgd."chr".ctrl.bed" }' "$wdir/bkgd.$ran/ctrl.bed"
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
132 done
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
133
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
134 rm $wdir/bkgd.$ran/ctrl.bed
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
135
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
136 printf "Done!\n"
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
137 fi
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
138
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
139
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
140 #determine average read lengths
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
141 printf "Getting average read lengths..."
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
142 if [ ! -z $bdir ]; then
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
143 readC=$(awk '{a=$3-$2;print a;}' "$wdir/bkgd.$ran/ctrl.bed" | perl -lane '$a+=$_;END{print $a/$.}' | awk '{print int($1)}')
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
144 fi
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
145 readL=""
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
146 for s in $sdir/*.bed; do #and for each sample file
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
147 read=$(awk '{a=$3-$2;print a;}' "$s" | perl -lane '$a+=$_;END{print $a/$.}' | awk '{print int($1)}')
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
148 readL="$readL,$read"
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
149 done
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
150 readL=${readL#","}
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
151 printf "Done!\n"
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
152 #=======================> DONE!
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
153
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
154
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
155
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
156
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
157
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
158
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
159 # ===========================
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
160 # Step Three: Getting Signal
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
161 # ===========================
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
162 mkdir $wdir/signal.$ran/
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
163 mkdir $out/signal #store signal
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
164
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
165
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
166 printf "Generating Signal...(bin size: $signal)\n"
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
167
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
168 counting=1;
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
169 for f in $wdir/sizes.$ran/*; do #for each chromosome
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
170 samplelist=""
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
171 frag=""
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
172 frag=$fraglen
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
173 k=1
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
174
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
175 sizefile=$(basename $f)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
176 chr=$(echo $sizefile | awk -F"." '{print $2}' | awk -F"." '{print $1}');
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
177 chrSize=$(cat $f | cut -f2);
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
178 printf "Chromosome $chr: "
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
179
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
180 #list of sample bed files and fragment lengths
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
181 for s in $wdir/samples.$ran/*.bed; do #and for each sample file
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
182 samplefile=$(basename $s)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
183 chr2=$(echo $samplefile | awk -F"." '{print $2}');
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
184 if [ $chr == $chr2 ] #belonging to this chromosome
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
185 then
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
186 samplelist="$samplelist,$wdir/samples.$ran/ext.$samplefile"
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
187 samplename=$(echo $samplefile | awk -F"." '{ print $3 }')
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
188 samplefilename=$(echo $samplefile | cut -d'.' -f 3-)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
189 shift=$(echo "$frag" | cut -f "$k" -d ",")
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
190 read=$(echo "$readL" | cut -f "$k" -d ",")
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
191 k=$(($k+1))
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
192 perl "$sPath/readshifter.pl" "$wdir/samples.$ran/$samplefile" $shift $read > "$wdir/samples.$ran/ext.$samplefile"
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
193 fi
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
194 done
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
195
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
196 #control file
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
197 bkgdfile="None"
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
198 if [ ! -z $bdir ]; then
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
199 l=$(($dupnum+1))
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
200 bshift=$(echo $frag | cut -f "$l" -d ",")
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
201 perl "$sPath/readshifter.pl" "$wdir/bkgd.$ran/bkgd.$chr.ctrl.bed" $bshift $readC > "$wdir/bkgd.$ran/ext.bkgd.$chr.ctrl.bed"
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
202 bkgdfile="$wdir/bkgd.$ran/ext.bkgd.$chr.ctrl.bed"
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
203 fi
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
204
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
205 #remove leading comma
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
206 samplelist=${samplelist#","}
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
207 frag=${frag#","}
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
208
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
209 #call the peak calling R script
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
210 Rscript "$sPath/signalmaker.r" -chromoS="$chrSize" -chromo="$chr" -bednames=$samplelist -frag=$frag -bkgd=$bkgdfile -out="$wdir/signal.$ran/" -sig="$signal" -regions="$regs" -p="$cores" -chrcount="$counting"
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
211 cat $wdir/signal.$ran/*.bedSignal | awk -F"\t" -v j=0 '$4 > j' > "$out/signal/$chr.bedGraph"
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
212 rm $wdir/signal.$ran/*.bedSignal
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
213 counting=$(($counting+1));
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
214 done
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
215 counting=1;
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
216 #=======================> DONE!
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
217
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
218 rm -rf $wdir
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
219
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
220 printf "\n\n========================================\nWe're done...Congratulations!\n========================================\n\n"