comparison SignalGenerator.sh @ 0:d42f4d78c85e draft

Uploaded
author messersc
date Wed, 17 Dec 2014 10:40:23 -0500
parents
children 243f75d0ed6e
comparison
equal deleted inserted replaced
-1:000000000000 0:d42f4d78c85e
1 #!/usr/bin/bash
2
3
4
5
6 ##Finding out the path
7 sPath="`dirname \"$0\"`"
8 sPath="`( cd \"$sPath\" && pwd )`"
9
10
11
12 usage()
13 {
14 cat << EOF
15 Welcome to JAMM v1.0.6rev2 Signal Generator Script (GNU GPLv3). Copyright (C) 2014 Mahmoud Ibrahim.
16
17 This program comes with ABSOLUTELY NO WARRANTY; for details visit http://www.gnu.org/licenses/gpl.html. This is free software, and you are welcome to redistribute it under certain conditions; visit http://www.gnu.org/licenses/gpl.html for details.
18
19 OPTIONS:
20 -s Directory containing sample files (required)
21 -g Genome size file (required)
22 -o Output Directory (required)
23 -c directory containing input or Control files
24 -r file with Regions to get signal for (required)
25 -b Bin size for signal generation (default: 10)
26 -f Fragment lengths (required)
27 -p Number of processors used by R scripts (default: 1)
28
29 EOF
30 }
31
32
33 # =========================
34 # Process Input parameters
35 # =========================
36
37
38 sdir=""
39 gsize=""
40 out=""
41 signal="10"
42 regs=""
43 fraglen=""
44 wdir=$(mktemp -d)
45 ran=$RANDOM
46 cores="1"
47
48
49 while getopts "s:o:c:r:b:f:g:p:" OPTION
50 do
51 case $OPTION in
52 s) sdir=$OPTARG
53 ;;
54 g) gsize=$OPTARG
55 ;;
56 o) out=$OPTARG
57 ;;
58 c) bdir=$OPTARG
59 ;;
60 r) regs=$OPTARG
61 ;;
62 b) signal=$OPTARG
63 ;;
64 f) fraglen=$OPTARG
65 ;;
66 p) cores=$OPTARG
67 ;;
68 ?)
69 usage
70 exit
71 ;;
72 esac
73 done
74
75
76 if [[ -z $fraglen ]] || [[ -z $sdir ]] || [[ -z $regs ]] || [[ -z $out ]]
77 then
78 usage
79 exit 1
80 fi
81 #=======================> DONE!
82
83
84
85 # =============================
86 # Step One: Initial Processing
87 # =============================
88 printf "\n\n============================================\nStarted JAMM Signal Generator Pipeline...Hang on!\n============================================\n\n"
89
90 if [ ! -d "$wdir" ]; then
91 mkdir $wdir #make working directory
92 fi
93 if [ ! -d "$out" ]; then
94 mkdir $out #make working directory
95 fi
96 mkdir $wdir/bkgd.$ran/ #directory to store background files
97 mkdir $wdir/sizes.$ran/ #chromosomes and sizes
98 mkdir $wdir/samples.$ran/ #store sample files
99
100 dupnum=$(ls -1 $sdir | wc -l) #count how many sample files
101
102
103 #separate chromosome sizes
104 printf "Loading genome size file..."
105 ext="$wdir/sizes.$ran/"
106 awk -v ext="$ext" '{ print >> ext"/size." $1 ".bed" }' $gsize
107 printf "Done!\n"
108
109
110 printf "Processing sample files..."
111 #load each chromosome from each sample file
112 for i in $sdir/*.bed; do
113 samplefile=$(basename $i)
114 for f in $wdir/sizes.$ran/*; do
115 sizefile=$(basename $f)
116 chr=$(echo $sizefile | awk -F"." '{print $2}' | awk -F"." '{print $1}');
117 awk -v chr="$chr" -v ext="$wdir/samples.$ran/" -v samplefile="$samplefile" -F"\t" '$1 == chr { print $2"\t"$6 >> ext"sample."chr"."samplefile }' "$i"
118 done
119 done
120 printf "Done!\n"
121
122
123 if [ ! -z $bdir ]; then
124 #concatenate all background files into one file
125 printf "Processing control files..."
126 cat $bdir/*.bed > $wdir/bkgd.$ran/ctrl.bed
127
128 for f in $wdir/sizes.$ran/*; do
129 sizefile=$(basename $f)
130 chr=$(echo $sizefile | awk -F"." '{print $2}' | awk -F"." '{print $1}');
131 awk -v chr="$chr" -v ext="$wdir/bkgd.$ran/" -F"\t" '$1 == chr { print $2"\t"$6 >> ext"bkgd."chr".ctrl.bed" }' "$wdir/bkgd.$ran/ctrl.bed"
132 done
133
134 rm $wdir/bkgd.$ran/ctrl.bed
135
136 printf "Done!\n"
137 fi
138
139
140 #determine average read lengths
141 printf "Getting average read lengths..."
142 if [ ! -z $bdir ]; then
143 readC=$(awk '{a=$3-$2;print a;}' "$wdir/bkgd.$ran/ctrl.bed" | perl -lane '$a+=$_;END{print $a/$.}' | awk '{print int($1)}')
144 fi
145 readL=""
146 for s in $sdir/*.bed; do #and for each sample file
147 read=$(awk '{a=$3-$2;print a;}' "$s" | perl -lane '$a+=$_;END{print $a/$.}' | awk '{print int($1)}')
148 readL="$readL,$read"
149 done
150 readL=${readL#","}
151 printf "Done!\n"
152 #=======================> DONE!
153
154
155
156
157
158
159 # ===========================
160 # Step Three: Getting Signal
161 # ===========================
162 mkdir $wdir/signal.$ran/
163 mkdir $out/signal #store signal
164
165
166 printf "Generating Signal...(bin size: $signal)\n"
167
168 counting=1;
169 for f in $wdir/sizes.$ran/*; do #for each chromosome
170 samplelist=""
171 frag=""
172 frag=$fraglen
173 k=1
174
175 sizefile=$(basename $f)
176 chr=$(echo $sizefile | awk -F"." '{print $2}' | awk -F"." '{print $1}');
177 chrSize=$(cat $f | cut -f2);
178 printf "Chromosome $chr: "
179
180 #list of sample bed files and fragment lengths
181 for s in $wdir/samples.$ran/*.bed; do #and for each sample file
182 samplefile=$(basename $s)
183 chr2=$(echo $samplefile | awk -F"." '{print $2}');
184 if [ $chr == $chr2 ] #belonging to this chromosome
185 then
186 samplelist="$samplelist,$wdir/samples.$ran/ext.$samplefile"
187 samplename=$(echo $samplefile | awk -F"." '{ print $3 }')
188 samplefilename=$(echo $samplefile | cut -d'.' -f 3-)
189 shift=$(echo "$frag" | cut -f "$k" -d ",")
190 read=$(echo "$readL" | cut -f "$k" -d ",")
191 k=$(($k+1))
192 perl "$sPath/readshifter.pl" "$wdir/samples.$ran/$samplefile" $shift $read > "$wdir/samples.$ran/ext.$samplefile"
193 fi
194 done
195
196 #control file
197 bkgdfile="None"
198 if [ ! -z $bdir ]; then
199 l=$(($dupnum+1))
200 bshift=$(echo $frag | cut -f "$l" -d ",")
201 perl "$sPath/readshifter.pl" "$wdir/bkgd.$ran/bkgd.$chr.ctrl.bed" $bshift $readC > "$wdir/bkgd.$ran/ext.bkgd.$chr.ctrl.bed"
202 bkgdfile="$wdir/bkgd.$ran/ext.bkgd.$chr.ctrl.bed"
203 fi
204
205 #remove leading comma
206 samplelist=${samplelist#","}
207 frag=${frag#","}
208
209 #call the peak calling R script
210 Rscript "$sPath/signalmaker.r" -chromoS="$chrSize" -chromo="$chr" -bednames=$samplelist -frag=$frag -bkgd=$bkgdfile -out="$wdir/signal.$ran/" -sig="$signal" -regions="$regs" -p="$cores" -chrcount="$counting"
211 cat $wdir/signal.$ran/*.bedSignal | awk -F"\t" -v j=0 '$4 > j' > "$out/signal/$chr.bedGraph"
212 rm $wdir/signal.$ran/*.bedSignal
213 counting=$(($counting+1));
214 done
215 counting=1;
216 #=======================> DONE!
217
218 rm -rf $wdir
219
220 printf "\n\n========================================\nWe're done...Congratulations!\n========================================\n\n"