0
|
1 #!/usr/bin/bash
|
|
2
|
|
3
|
|
4
|
|
5
|
|
6 ##Finding out the path
|
|
7 sPath="`dirname \"$0\"`"
|
|
8 sPath="`( cd \"$sPath\" && pwd )`"
|
|
9
|
|
10
|
|
11
|
|
12 usage()
|
|
13 {
|
|
14 cat << EOF
|
|
15 Welcome to JAMM v1.0.6rev2 Signal Generator Script (GNU GPLv3). Copyright (C) 2014 Mahmoud Ibrahim.
|
|
16
|
|
17 This program comes with ABSOLUTELY NO WARRANTY; for details visit http://www.gnu.org/licenses/gpl.html. This is free software, and you are welcome to redistribute it under certain conditions; visit http://www.gnu.org/licenses/gpl.html for details.
|
|
18
|
|
19 OPTIONS:
|
|
20 -s Directory containing sample files (required)
|
|
21 -g Genome size file (required)
|
|
22 -o Output Directory (required)
|
|
23 -c directory containing input or Control files
|
|
24 -r file with Regions to get signal for (required)
|
|
25 -b Bin size for signal generation (default: 10)
|
|
26 -f Fragment lengths (required)
|
|
27 -p Number of processors used by R scripts (default: 1)
|
|
28
|
|
29 EOF
|
|
30 }
|
|
31
|
|
32
|
|
33 # =========================
|
|
34 # Process Input parameters
|
|
35 # =========================
|
|
36
|
|
37
|
|
38 sdir=""
|
|
39 gsize=""
|
|
40 out=""
|
|
41 signal="10"
|
|
42 regs=""
|
|
43 fraglen=""
|
|
44 wdir=$(mktemp -d)
|
|
45 ran=$RANDOM
|
|
46 cores="1"
|
|
47
|
|
48
|
|
49 while getopts "s:o:c:r:b:f:g:p:" OPTION
|
|
50 do
|
|
51 case $OPTION in
|
|
52 s) sdir=$OPTARG
|
|
53 ;;
|
|
54 g) gsize=$OPTARG
|
|
55 ;;
|
|
56 o) out=$OPTARG
|
|
57 ;;
|
|
58 c) bdir=$OPTARG
|
|
59 ;;
|
|
60 r) regs=$OPTARG
|
|
61 ;;
|
|
62 b) signal=$OPTARG
|
|
63 ;;
|
|
64 f) fraglen=$OPTARG
|
|
65 ;;
|
|
66 p) cores=$OPTARG
|
|
67 ;;
|
|
68 ?)
|
|
69 usage
|
|
70 exit
|
|
71 ;;
|
|
72 esac
|
|
73 done
|
|
74
|
|
75
|
|
76 if [[ -z $fraglen ]] || [[ -z $sdir ]] || [[ -z $regs ]] || [[ -z $out ]]
|
|
77 then
|
|
78 usage
|
|
79 exit 1
|
|
80 fi
|
|
81 #=======================> DONE!
|
|
82
|
|
83
|
|
84
|
|
85 # =============================
|
|
86 # Step One: Initial Processing
|
|
87 # =============================
|
|
88 printf "\n\n============================================\nStarted JAMM Signal Generator Pipeline...Hang on!\n============================================\n\n"
|
|
89
|
|
90 if [ ! -d "$wdir" ]; then
|
|
91 mkdir $wdir #make working directory
|
|
92 fi
|
|
93 if [ ! -d "$out" ]; then
|
|
94 mkdir $out #make working directory
|
|
95 fi
|
|
96 mkdir $wdir/bkgd.$ran/ #directory to store background files
|
|
97 mkdir $wdir/sizes.$ran/ #chromosomes and sizes
|
|
98 mkdir $wdir/samples.$ran/ #store sample files
|
|
99
|
|
100 dupnum=$(ls -1 $sdir | wc -l) #count how many sample files
|
|
101
|
|
102
|
|
103 #separate chromosome sizes
|
|
104 printf "Loading genome size file..."
|
|
105 ext="$wdir/sizes.$ran/"
|
|
106 awk -v ext="$ext" '{ print >> ext"/size." $1 ".bed" }' $gsize
|
|
107 printf "Done!\n"
|
|
108
|
|
109
|
|
110 printf "Processing sample files..."
|
|
111 #load each chromosome from each sample file
|
|
112 for i in $sdir/*.bed; do
|
|
113 samplefile=$(basename $i)
|
|
114 for f in $wdir/sizes.$ran/*; do
|
|
115 sizefile=$(basename $f)
|
|
116 chr=$(echo $sizefile | awk -F"." '{print $2}' | awk -F"." '{print $1}');
|
|
117 awk -v chr="$chr" -v ext="$wdir/samples.$ran/" -v samplefile="$samplefile" -F"\t" '$1 == chr { print $2"\t"$6 >> ext"sample."chr"."samplefile }' "$i"
|
|
118 done
|
|
119 done
|
|
120 printf "Done!\n"
|
|
121
|
|
122
|
|
123 if [ ! -z $bdir ]; then
|
|
124 #concatenate all background files into one file
|
|
125 printf "Processing control files..."
|
|
126 cat $bdir/*.bed > $wdir/bkgd.$ran/ctrl.bed
|
|
127
|
|
128 for f in $wdir/sizes.$ran/*; do
|
|
129 sizefile=$(basename $f)
|
|
130 chr=$(echo $sizefile | awk -F"." '{print $2}' | awk -F"." '{print $1}');
|
|
131 awk -v chr="$chr" -v ext="$wdir/bkgd.$ran/" -F"\t" '$1 == chr { print $2"\t"$6 >> ext"bkgd."chr".ctrl.bed" }' "$wdir/bkgd.$ran/ctrl.bed"
|
|
132 done
|
|
133
|
|
134 rm $wdir/bkgd.$ran/ctrl.bed
|
|
135
|
|
136 printf "Done!\n"
|
|
137 fi
|
|
138
|
|
139
|
|
140 #determine average read lengths
|
|
141 printf "Getting average read lengths..."
|
|
142 if [ ! -z $bdir ]; then
|
|
143 readC=$(awk '{a=$3-$2;print a;}' "$wdir/bkgd.$ran/ctrl.bed" | perl -lane '$a+=$_;END{print $a/$.}' | awk '{print int($1)}')
|
|
144 fi
|
|
145 readL=""
|
|
146 for s in $sdir/*.bed; do #and for each sample file
|
|
147 read=$(awk '{a=$3-$2;print a;}' "$s" | perl -lane '$a+=$_;END{print $a/$.}' | awk '{print int($1)}')
|
|
148 readL="$readL,$read"
|
|
149 done
|
|
150 readL=${readL#","}
|
|
151 printf "Done!\n"
|
|
152 #=======================> DONE!
|
|
153
|
|
154
|
|
155
|
|
156
|
|
157
|
|
158
|
|
159 # ===========================
|
|
160 # Step Three: Getting Signal
|
|
161 # ===========================
|
|
162 mkdir $wdir/signal.$ran/
|
|
163 mkdir $out/signal #store signal
|
|
164
|
|
165
|
|
166 printf "Generating Signal...(bin size: $signal)\n"
|
|
167
|
|
168 counting=1;
|
|
169 for f in $wdir/sizes.$ran/*; do #for each chromosome
|
|
170 samplelist=""
|
|
171 frag=""
|
|
172 frag=$fraglen
|
|
173 k=1
|
|
174
|
|
175 sizefile=$(basename $f)
|
|
176 chr=$(echo $sizefile | awk -F"." '{print $2}' | awk -F"." '{print $1}');
|
|
177 chrSize=$(cat $f | cut -f2);
|
|
178 printf "Chromosome $chr: "
|
|
179
|
|
180 #list of sample bed files and fragment lengths
|
|
181 for s in $wdir/samples.$ran/*.bed; do #and for each sample file
|
|
182 samplefile=$(basename $s)
|
|
183 chr2=$(echo $samplefile | awk -F"." '{print $2}');
|
|
184 if [ $chr == $chr2 ] #belonging to this chromosome
|
|
185 then
|
|
186 samplelist="$samplelist,$wdir/samples.$ran/ext.$samplefile"
|
|
187 samplename=$(echo $samplefile | awk -F"." '{ print $3 }')
|
|
188 samplefilename=$(echo $samplefile | cut -d'.' -f 3-)
|
|
189 shift=$(echo "$frag" | cut -f "$k" -d ",")
|
|
190 read=$(echo "$readL" | cut -f "$k" -d ",")
|
|
191 k=$(($k+1))
|
|
192 perl "$sPath/readshifter.pl" "$wdir/samples.$ran/$samplefile" $shift $read > "$wdir/samples.$ran/ext.$samplefile"
|
|
193 fi
|
|
194 done
|
|
195
|
|
196 #control file
|
|
197 bkgdfile="None"
|
|
198 if [ ! -z $bdir ]; then
|
|
199 l=$(($dupnum+1))
|
|
200 bshift=$(echo $frag | cut -f "$l" -d ",")
|
|
201 perl "$sPath/readshifter.pl" "$wdir/bkgd.$ran/bkgd.$chr.ctrl.bed" $bshift $readC > "$wdir/bkgd.$ran/ext.bkgd.$chr.ctrl.bed"
|
|
202 bkgdfile="$wdir/bkgd.$ran/ext.bkgd.$chr.ctrl.bed"
|
|
203 fi
|
|
204
|
|
205 #remove leading comma
|
|
206 samplelist=${samplelist#","}
|
|
207 frag=${frag#","}
|
|
208
|
|
209 #call the peak calling R script
|
|
210 Rscript "$sPath/signalmaker.r" -chromoS="$chrSize" -chromo="$chr" -bednames=$samplelist -frag=$frag -bkgd=$bkgdfile -out="$wdir/signal.$ran/" -sig="$signal" -regions="$regs" -p="$cores" -chrcount="$counting"
|
|
211 cat $wdir/signal.$ran/*.bedSignal | awk -F"\t" -v j=0 '$4 > j' > "$out/signal/$chr.bedGraph"
|
|
212 rm $wdir/signal.$ran/*.bedSignal
|
|
213 counting=$(($counting+1));
|
|
214 done
|
|
215 counting=1;
|
|
216 #=======================> DONE!
|
|
217
|
|
218 rm -rf $wdir
|
|
219
|
|
220 printf "\n\n========================================\nWe're done...Congratulations!\n========================================\n\n"
|