Mercurial > repos > messersc > jamm
comparison SignalGenerator.sh @ 1:243f75d0ed6e draft default tip
Uploaded.
Includes new release 1.0.7 with fixed optional controls.
| author | messersc |
|---|---|
| date | Thu, 19 Feb 2015 05:39:45 -0500 |
| parents | d42f4d78c85e |
| children |
comparison
equal
deleted
inserted
replaced
| 0:d42f4d78c85e | 1:243f75d0ed6e |
|---|---|
| 1 #!/usr/bin/bash | 1 ######################################################################## |
| 2 | 2 # JAMMv1.0.7rev1 is a peak finder for joint analysis of NGS replicates. |
| 3 | 3 # Copyright (C) 2014-2015 Mahmoud Ibrahim |
| 4 | 4 # |
| 5 # This program is free software: you can redistribute it and/or modify | |
| 6 # it under the terms of the GNU General Public License as published by | |
| 7 # the Free Software Foundation, either version 3 of the License, or | |
| 8 # (at your option) any later version. | |
| 9 # | |
| 10 # This program is distributed in the hope that it will be useful, | |
| 11 # but WITHOUT ANY WARRANTY; without even the implied warranty of | |
| 12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
| 13 # GNU General Public License for more details. | |
| 14 # | |
| 15 # You should have received a copy of the GNU General Public License | |
| 16 # along with this program. If not, see <http://www.gnu.org/licenses/>. | |
| 17 # | |
| 18 # Contact: mahmoud.ibrahim@mdc-berlin.de | |
| 19 ######################################################################## | |
| 5 | 20 |
| 6 ##Finding out the path | 21 ##Finding out the path |
| 7 sPath="`dirname \"$0\"`" | 22 sPath="`dirname \"$0\"`" |
| 8 sPath="`( cd \"$sPath\" && pwd )`" | 23 sPath="`( cd \"$sPath\" && pwd )`" |
| 9 | 24 |
| 10 | 25 |
| 11 | 26 |
| 12 usage() | 27 usage() |
| 13 { | 28 { |
| 14 cat << EOF | 29 cat << EOF |
| 15 Welcome to JAMM v1.0.6rev2 Signal Generator Script (GNU GPLv3). Copyright (C) 2014 Mahmoud Ibrahim. | 30 Welcome to JAMM v1.0.7rev1 Signal Generator Script (GNU GPLv3). Copyright (C) 2014-2015 Mahmoud Ibrahim. |
| 16 | 31 |
| 17 This program comes with ABSOLUTELY NO WARRANTY; for details visit http://www.gnu.org/licenses/gpl.html. This is free software, and you are welcome to redistribute it under certain conditions; visit http://www.gnu.org/licenses/gpl.html for details. | 32 This program comes with ABSOLUTELY NO WARRANTY; for details visit http://www.gnu.org/licenses/gpl.html. This is free software, and you are welcome to redistribute it under certain conditions; visit http://www.gnu.org/licenses/gpl.html for details. |
| 18 | 33 |
| 19 OPTIONS: | 34 OPTIONS: |
| 20 -s Directory containing sample files (required) | 35 -s Directory containing sample files (required) |
| 21 -g Genome size file (required) | 36 -g Genome size file (required) |
| 22 -o Output Directory (required) | 37 -o Output Directory (required) |
| 23 -c directory containing input or Control files | 38 -c directory containing input or Control files |
| 24 -r file with Regions to get signal for (required) | 39 -r file with Regions to get signal for (required) |
| 25 -b Bin size for signal generation (default: 10) | 40 -b Bin size for signal generation (default: 10) |
| 26 -f Fragment lengths (required) | 41 -f Fragment lengths (required if -t is "single") |
| 27 -p Number of processors used by R scripts (default: 1) | 42 -p Number of processors used by R scripts (default: 1) |
| 43 -t Alignment type, paired or single (default: single) | |
| 28 | 44 |
| 29 EOF | 45 EOF |
| 30 } | 46 } |
| 31 | 47 |
| 32 | 48 |
| 42 regs="" | 58 regs="" |
| 43 fraglen="" | 59 fraglen="" |
| 44 wdir=$(mktemp -d) | 60 wdir=$(mktemp -d) |
| 45 ran=$RANDOM | 61 ran=$RANDOM |
| 46 cores="1" | 62 cores="1" |
| 47 | 63 type="single" |
| 48 | 64 |
| 49 while getopts "s:o:c:r:b:f:g:p:" OPTION | 65 while getopts "s:o:c:r:b:f:g:p:t:" OPTION |
| 50 do | 66 do |
| 51 case $OPTION in | 67 case $OPTION in |
| 52 s) sdir=$OPTARG | 68 s) sdir=$OPTARG |
| 53 ;; | 69 ;; |
| 54 g) gsize=$OPTARG | 70 g) gsize=$OPTARG |
| 63 ;; | 79 ;; |
| 64 f) fraglen=$OPTARG | 80 f) fraglen=$OPTARG |
| 65 ;; | 81 ;; |
| 66 p) cores=$OPTARG | 82 p) cores=$OPTARG |
| 67 ;; | 83 ;; |
| 84 t) type=$OPTARG | |
| 85 ;; | |
| 68 ?) | 86 ?) |
| 69 usage | 87 usage |
| 70 exit | 88 exit |
| 71 ;; | 89 ;; |
| 72 esac | 90 esac |
| 73 done | 91 done |
| 74 | 92 |
| 75 | 93 |
| 76 if [[ -z $fraglen ]] || [[ -z $sdir ]] || [[ -z $regs ]] || [[ -z $out ]] | 94 if [[ -z $sdir ]] || [[ -z $regs ]] || [[ -z $out ]] |
| 77 then | 95 then |
| 78 usage | 96 usage |
| 79 exit 1 | 97 exit 1 |
| 80 fi | 98 fi |
| 99 | |
| 100 if [[ -z $fraglen ]] | |
| 101 then | |
| 102 if [ $type == "single" ] | |
| 103 then | |
| 104 usage | |
| 105 exit 1 | |
| 106 fi | |
| 107 fi | |
| 108 if [[ -d "$out/signal" ]]; then | |
| 109 printf "\n\nOutput directory $out/signal already exists. I can't override existing results!\n\n" | |
| 110 exit 0 | |
| 111 fi | |
| 81 #=======================> DONE! | 112 #=======================> DONE! |
| 82 | 113 |
| 83 | 114 |
| 84 | 115 |
| 85 # ============================= | 116 # ============================= |
| 86 # Step One: Initial Processing | 117 # Step One: Initial Processing |
| 87 # ============================= | 118 # ============================= |
| 88 printf "\n\n============================================\nStarted JAMM Signal Generator Pipeline...Hang on!\n============================================\n\n" | 119 printf "\n\n=======================================================\nStarted JAMM1.0.7rev1 Signal Generator Pipeline...Hang on!\n=======================================================\n\n" |
| 89 | 120 |
| 90 if [ ! -d "$wdir" ]; then | 121 if [ ! -d "$wdir" ]; then |
| 91 mkdir $wdir #make working directory | 122 mkdir $wdir #make working directory |
| 92 fi | 123 fi |
| 93 if [ ! -d "$out" ]; then | 124 if [ ! -d "$out" ]; then |
| 129 sizefile=$(basename $f) | 160 sizefile=$(basename $f) |
| 130 chr=$(echo $sizefile | awk -F"." '{print $2}' | awk -F"." '{print $1}'); | 161 chr=$(echo $sizefile | awk -F"." '{print $2}' | awk -F"." '{print $1}'); |
| 131 awk -v chr="$chr" -v ext="$wdir/bkgd.$ran/" -F"\t" '$1 == chr { print $2"\t"$6 >> ext"bkgd."chr".ctrl.bed" }' "$wdir/bkgd.$ran/ctrl.bed" | 162 awk -v chr="$chr" -v ext="$wdir/bkgd.$ran/" -F"\t" '$1 == chr { print $2"\t"$6 >> ext"bkgd."chr".ctrl.bed" }' "$wdir/bkgd.$ran/ctrl.bed" |
| 132 done | 163 done |
| 133 | 164 |
| 134 rm $wdir/bkgd.$ran/ctrl.bed | |
| 135 | 165 |
| 136 printf "Done!\n" | 166 printf "Done!\n" |
| 137 fi | 167 fi |
| 138 | 168 |
| 139 | 169 |
| 140 #determine average read lengths | 170 #determine average read lengths |
| 171 if [ $type == "single" ]; then | |
| 141 printf "Getting average read lengths..." | 172 printf "Getting average read lengths..." |
| 142 if [ ! -z $bdir ]; then | 173 if [ ! -z $bdir ]; then |
| 143 readC=$(awk '{a=$3-$2;print a;}' "$wdir/bkgd.$ran/ctrl.bed" | perl -lane '$a+=$_;END{print $a/$.}' | awk '{print int($1)}') | 174 readC=$(awk '{a=$3-$2;print a;}' "$wdir/bkgd.$ran/ctrl.bed" | perl -lane '$a+=$_;END{print $a/$.}' | awk '{print int($1)}') |
| 144 fi | 175 fi |
| 145 readL="" | 176 readL="" |
| 147 read=$(awk '{a=$3-$2;print a;}' "$s" | perl -lane '$a+=$_;END{print $a/$.}' | awk '{print int($1)}') | 178 read=$(awk '{a=$3-$2;print a;}' "$s" | perl -lane '$a+=$_;END{print $a/$.}' | awk '{print int($1)}') |
| 148 readL="$readL,$read" | 179 readL="$readL,$read" |
| 149 done | 180 done |
| 150 readL=${readL#","} | 181 readL=${readL#","} |
| 151 printf "Done!\n" | 182 printf "Done!\n" |
| 183 fi | |
| 152 #=======================> DONE! | 184 #=======================> DONE! |
| 153 | 185 |
| 154 | 186 |
| 155 | 187 |
| 156 | 188 |
| 163 mkdir $out/signal #store signal | 195 mkdir $out/signal #store signal |
| 164 | 196 |
| 165 | 197 |
| 166 printf "Generating Signal...(bin size: $signal)\n" | 198 printf "Generating Signal...(bin size: $signal)\n" |
| 167 | 199 |
| 200 if [ $type == "single" ]; then | |
| 168 counting=1; | 201 counting=1; |
| 169 for f in $wdir/sizes.$ran/*; do #for each chromosome | 202 for f in $wdir/sizes.$ran/*; do #for each chromosome |
| 170 samplelist="" | 203 samplelist="" |
| 171 frag="" | 204 frag="" |
| 172 frag=$fraglen | 205 frag=$fraglen |
| 205 #remove leading comma | 238 #remove leading comma |
| 206 samplelist=${samplelist#","} | 239 samplelist=${samplelist#","} |
| 207 frag=${frag#","} | 240 frag=${frag#","} |
| 208 | 241 |
| 209 #call the peak calling R script | 242 #call the peak calling R script |
| 210 Rscript "$sPath/signalmaker.r" -chromoS="$chrSize" -chromo="$chr" -bednames=$samplelist -frag=$frag -bkgd=$bkgdfile -out="$wdir/signal.$ran/" -sig="$signal" -regions="$regs" -p="$cores" -chrcount="$counting" | 243 Rscript "$sPath/signalmaker.r" -chromoS="$chrSize" -chromo="$chr" -bednames=$samplelist -frag=$frag -bkgd=$bkgdfile -out="$wdir/signal.$ran/" -sig="$signal" -regions="$regs" -p="$cores" -chrcount="$counting" -t="$type" |
| 211 cat $wdir/signal.$ran/*.bedSignal | awk -F"\t" -v j=0 '$4 > j' > "$out/signal/$chr.bedGraph" | 244 cat $wdir/signal.$ran/*.bedSignal | awk -F"\t" -v j=0 '$4 > j' > "$out/signal/$chr.bedGraph" |
| 212 rm $wdir/signal.$ran/*.bedSignal | 245 rm $wdir/signal.$ran/*.bedSignal |
| 213 counting=$(($counting+1)); | 246 counting=$(($counting+1)); |
| 214 done | 247 done |
| 215 counting=1; | 248 counting=1; |
| 249 fi | |
| 250 | |
| 251 | |
| 252 | |
| 253 if [ $type == "paired" ]; then | |
| 254 counting=1; | |
| 255 for f in $wdir/sizes.$ran/*; do #for each chromosome | |
| 256 samplelist="" | |
| 257 | |
| 258 sizefile=$(basename $f) | |
| 259 chr=$(echo $sizefile | awk -F"." '{print $2}' | awk -F"." '{print $1}'); | |
| 260 chrSize=$(cat $f | cut -f2); | |
| 261 printf "Chromosome $chr: " | |
| 262 | |
| 263 #list of sample bed files and fragment lengths | |
| 264 for s in $wdir/samples.$ran/*.bed; do #and for each sample file | |
| 265 samplefile=$(basename $s) | |
| 266 chr2=$(echo $samplefile | awk -F"." '{print $2}'); | |
| 267 if [ $chr == $chr2 ] #belonging to this chromosome | |
| 268 then | |
| 269 samplelist="$samplelist,$wdir/samples.$ran/$samplefile" | |
| 270 samplename=$(echo $samplefile | awk -F"." '{ print $3 }') | |
| 271 samplefilename=$(echo $samplefile | cut -d'.' -f 3-) | |
| 272 x="$sdir/$samplefilename" | |
| 273 fi | |
| 274 done | |
| 275 | |
| 276 #control file | |
| 277 bkgdfile="None" | |
| 278 if [ ! -z $bdir ]; then | |
| 279 bkgdfile="$wdir/bkgd.$ran/bkgd.$chr.ctrl.bed" | |
| 280 fi | |
| 281 | |
| 282 #remove leading comma | |
| 283 samplelist=${samplelist#","} | |
| 284 frag=${frag#","} | |
| 285 | |
| 286 #call the peak calling R script | |
| 287 Rscript "$sPath/signalmaker.r" -chromoS="$chrSize" -chromo="$chr" -bednames=$samplelist -frag=$frag -bkgd=$bkgdfile -out="$wdir/signal.$ran/" -sig="$signal" -regions="$regs" -p="$cores" -chrcount="$counting" -t="$type" | |
| 288 cat $wdir/signal.$ran/*.bedSignal | awk -F"\t" -v j=0 '$4 > j' > "$out/signal/$chr.bedGraph" | |
| 289 rm $wdir/signal.$ran/*.bedSignal | |
| 290 counting=$(($counting+1)); | |
| 291 done | |
| 292 counting=1; | |
| 293 fi | |
| 216 #=======================> DONE! | 294 #=======================> DONE! |
| 217 | 295 |
| 218 rm -rf $wdir | 296 rm -rf $wdir |
| 219 | 297 |
| 220 printf "\n\n========================================\nWe're done...Congratulations!\n========================================\n\n" | 298 printf "\n\n========================================\nWe're done...Congratulations!\n========================================\n\n" |
