Mercurial > repos > messersc > jamm
diff SignalGenerator.sh @ 0:d42f4d78c85e draft
Uploaded
author | messersc |
---|---|
date | Wed, 17 Dec 2014 10:40:23 -0500 |
parents | |
children | 243f75d0ed6e |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/SignalGenerator.sh Wed Dec 17 10:40:23 2014 -0500 @@ -0,0 +1,220 @@ +#!/usr/bin/bash + + + + +##Finding out the path +sPath="`dirname \"$0\"`" +sPath="`( cd \"$sPath\" && pwd )`" + + + +usage() +{ +cat << EOF +Welcome to JAMM v1.0.6rev2 Signal Generator Script (GNU GPLv3). Copyright (C) 2014 Mahmoud Ibrahim. + +This program comes with ABSOLUTELY NO WARRANTY; for details visit http://www.gnu.org/licenses/gpl.html. This is free software, and you are welcome to redistribute it under certain conditions; visit http://www.gnu.org/licenses/gpl.html for details. + +OPTIONS: + -s Directory containing sample files (required) + -g Genome size file (required) + -o Output Directory (required) + -c directory containing input or Control files + -r file with Regions to get signal for (required) + -b Bin size for signal generation (default: 10) + -f Fragment lengths (required) + -p Number of processors used by R scripts (default: 1) + +EOF +} + + +# ========================= +# Process Input parameters +# ========================= + + +sdir="" +gsize="" +out="" +signal="10" +regs="" +fraglen="" +wdir=$(mktemp -d) +ran=$RANDOM +cores="1" + + +while getopts "s:o:c:r:b:f:g:p:" OPTION +do + case $OPTION in + s) sdir=$OPTARG + ;; + g) gsize=$OPTARG + ;; + o) out=$OPTARG + ;; + c) bdir=$OPTARG + ;; + r) regs=$OPTARG + ;; + b) signal=$OPTARG + ;; + f) fraglen=$OPTARG + ;; + p) cores=$OPTARG + ;; + ?) + usage + exit + ;; + esac +done + + +if [[ -z $fraglen ]] || [[ -z $sdir ]] || [[ -z $regs ]] || [[ -z $out ]] +then + usage + exit 1 +fi +#=======================> DONE! + + + +# ============================= +# Step One: Initial Processing +# ============================= +printf "\n\n============================================\nStarted JAMM Signal Generator Pipeline...Hang on!\n============================================\n\n" + +if [ ! -d "$wdir" ]; then + mkdir $wdir #make working directory +fi +if [ ! -d "$out" ]; then + mkdir $out #make working directory +fi +mkdir $wdir/bkgd.$ran/ #directory to store background files +mkdir $wdir/sizes.$ran/ #chromosomes and sizes +mkdir $wdir/samples.$ran/ #store sample files + +dupnum=$(ls -1 $sdir | wc -l) #count how many sample files + + +#separate chromosome sizes +printf "Loading genome size file..." +ext="$wdir/sizes.$ran/" +awk -v ext="$ext" '{ print >> ext"/size." $1 ".bed" }' $gsize +printf "Done!\n" + + +printf "Processing sample files..." +#load each chromosome from each sample file +for i in $sdir/*.bed; do +samplefile=$(basename $i) + for f in $wdir/sizes.$ran/*; do + sizefile=$(basename $f) + chr=$(echo $sizefile | awk -F"." '{print $2}' | awk -F"." '{print $1}'); + awk -v chr="$chr" -v ext="$wdir/samples.$ran/" -v samplefile="$samplefile" -F"\t" '$1 == chr { print $2"\t"$6 >> ext"sample."chr"."samplefile }' "$i" + done +done +printf "Done!\n" + + +if [ ! -z $bdir ]; then +#concatenate all background files into one file +printf "Processing control files..." +cat $bdir/*.bed > $wdir/bkgd.$ran/ctrl.bed + +for f in $wdir/sizes.$ran/*; do + sizefile=$(basename $f) + chr=$(echo $sizefile | awk -F"." '{print $2}' | awk -F"." '{print $1}'); + awk -v chr="$chr" -v ext="$wdir/bkgd.$ran/" -F"\t" '$1 == chr { print $2"\t"$6 >> ext"bkgd."chr".ctrl.bed" }' "$wdir/bkgd.$ran/ctrl.bed" +done + +rm $wdir/bkgd.$ran/ctrl.bed + +printf "Done!\n" +fi + + +#determine average read lengths +printf "Getting average read lengths..." + if [ ! -z $bdir ]; then + readC=$(awk '{a=$3-$2;print a;}' "$wdir/bkgd.$ran/ctrl.bed" | perl -lane '$a+=$_;END{print $a/$.}' | awk '{print int($1)}') + fi + readL="" + for s in $sdir/*.bed; do #and for each sample file + read=$(awk '{a=$3-$2;print a;}' "$s" | perl -lane '$a+=$_;END{print $a/$.}' | awk '{print int($1)}') + readL="$readL,$read" + done + readL=${readL#","} +printf "Done!\n" +#=======================> DONE! + + + + + + +# =========================== +# Step Three: Getting Signal +# =========================== +mkdir $wdir/signal.$ran/ +mkdir $out/signal #store signal + + +printf "Generating Signal...(bin size: $signal)\n" + +counting=1; +for f in $wdir/sizes.$ran/*; do #for each chromosome + samplelist="" + frag="" + frag=$fraglen + k=1 + + sizefile=$(basename $f) + chr=$(echo $sizefile | awk -F"." '{print $2}' | awk -F"." '{print $1}'); + chrSize=$(cat $f | cut -f2); + printf "Chromosome $chr: " + + #list of sample bed files and fragment lengths + for s in $wdir/samples.$ran/*.bed; do #and for each sample file + samplefile=$(basename $s) + chr2=$(echo $samplefile | awk -F"." '{print $2}'); + if [ $chr == $chr2 ] #belonging to this chromosome + then + samplelist="$samplelist,$wdir/samples.$ran/ext.$samplefile" + samplename=$(echo $samplefile | awk -F"." '{ print $3 }') + samplefilename=$(echo $samplefile | cut -d'.' -f 3-) + shift=$(echo "$frag" | cut -f "$k" -d ",") + read=$(echo "$readL" | cut -f "$k" -d ",") + k=$(($k+1)) + perl "$sPath/readshifter.pl" "$wdir/samples.$ran/$samplefile" $shift $read > "$wdir/samples.$ran/ext.$samplefile" + fi + done + + #control file + bkgdfile="None" + if [ ! -z $bdir ]; then + l=$(($dupnum+1)) + bshift=$(echo $frag | cut -f "$l" -d ",") + perl "$sPath/readshifter.pl" "$wdir/bkgd.$ran/bkgd.$chr.ctrl.bed" $bshift $readC > "$wdir/bkgd.$ran/ext.bkgd.$chr.ctrl.bed" + bkgdfile="$wdir/bkgd.$ran/ext.bkgd.$chr.ctrl.bed" + fi + + #remove leading comma + samplelist=${samplelist#","} + frag=${frag#","} + + #call the peak calling R script + Rscript "$sPath/signalmaker.r" -chromoS="$chrSize" -chromo="$chr" -bednames=$samplelist -frag=$frag -bkgd=$bkgdfile -out="$wdir/signal.$ran/" -sig="$signal" -regions="$regs" -p="$cores" -chrcount="$counting" + cat $wdir/signal.$ran/*.bedSignal | awk -F"\t" -v j=0 '$4 > j' > "$out/signal/$chr.bedGraph" + rm $wdir/signal.$ran/*.bedSignal + counting=$(($counting+1)); +done +counting=1; +#=======================> DONE! + +rm -rf $wdir + +printf "\n\n========================================\nWe're done...Congratulations!\n========================================\n\n"