jamm: JAMM.sh comparison

comparison JAMM.sh @ 1:243f75d0ed6e draft default tip

Uploaded. Includes new release 1.0.7 with fixed optional controls.

author	messersc
date	Thu, 19 Feb 2015 05:39:45 -0500
parents	d42f4d78c85e
children

comparison

equal deleted inserted replaced

-:d42f4d78c85e
+:243f75d0ed6e
-#!/bin/bash
+########################################################################
-########################################
+# JAMMv1.0.7rev1 is a peak finder for joint analysis of NGS replicates.
-#### Peak finding Pipeline for NGS Data
+# Copyright (C) 2014-2015  Mahmoud Ibrahim
-#### Bash script
+#
-########################################
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
-export LC_ALL=en_US.UTF-8
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+# Contact: mahmoud.ibrahim@mdc-berlin.de
+########################################################################
 ##Finding out the path
 sPath="`dirname \"$0\"`"
 sPath="`( cd \"$sPath\" && pwd )`"
 usage()
 {
 cat << EOF
-Welcome to JAMM v1.0.6rev2 (GNU GPLv3). Copyright (C) 2014  Mahmoud Ibrahim.
+Welcome to JAMM v1.0.7rev1 (GNU GPLv3). Copyright (C) 2014-2015  Mahmoud Ibrahim.
 This program comes with ABSOLUTELY NO WARRANTY; for details visit http://www.gnu.org/licenses/gpl.html. This is free software, and you are welcome to redistribute it under certain conditions; visit http://www.gnu.org/licenses/gpl.html for details.
 OPTIONS:
 -s      directory containing Sample files (required)
 -g      Genome size file (required)
 -o      Output directory (required)
 -c      directory containing input or Control files
+-f      Fragment length(s) (default: estimated)
+-r      Resolution, peak or region or window (default: peak)
 -m      Mode, normal or narrow (default: normal)
--r      Resolution, peak or region (default: peak)
+-i      clustering Initialization window selection, deterministic or stochastic (default: deterministic)
--f      Fragment length(s) (default: estimated)
+-b	   Bin Size (default: estimated)
 -w      minimum Window size (default: 2 --- Note: this means minimum_window_size = bin_size x the_value_of_-w)
+-e	   window Enrichment cutoff, auto or any numeric value (default: 1 --- Set this to "auto" to estimate the window enrichment cutoff)
+-d	   keep PCR Dupicates in single-end mode, y or n (default: n --- if -t is "paired", this option has no effect)
+-t	   Type, single or paired (default: single, requires BED files. paired requires BEDPE files)
 -p	   Number of processors used by R scripts (default: 1)
--b	   Bin Size (default: estimated)
--t	   Type, single or paired (default: single, requires BED files. paired requires BEDPE files)
--e	   window Enrichment cutoff, auto or any numeric value (default: 1 --- Set this to auto to estimate the appropriate window enrichment cutoff)
 EOF
 }
 # =========================
 resol="peak"
 cores="1"
 window="2"
 type="single"
 windowe="1"
+initModel="deterministic"
+uniq="n"
 #Defaults -- Do not change
 sdir=""
 gsize=""
 out=""
 binsize="ns"
 fraglen="ns"
 ran=$RANDOM
 wdir=$(mktemp -d)
+export LANG=C #locale defaults
-while getopts "s:g:o:c:m:r:f:p:w:b:t:e:" OPTION
+export LC_ALL=C #locale defaults
+while getopts "s:g:o:c:m:r:f:p:w:b:t:e:i:d:" OPTION
 do
 	case $OPTION in
 	s) sdir=$OPTARG
 	;;
 	g) gsize=$OPTARG
 	b) binsize=$OPTARG
 	;;
 	t) type=$OPTARG
 	;;
 	e) windowe=$OPTARG
+	;;
+	i) initModel=$OPTARG
+	;;
+	d) uniq=$OPTARG
 	;;
 	?)
 	usage
 	exit
 	;;
 if [[ -z $sdir ]] || [[ -z $gsize ]] || [[ -z $out ]]
 then
 usage
 exit 1
 fi
+if [[ -d "$out/peaks" ]]; then
+	printf "\n\nOutput directory $out/peaks already exists. I can't override existing results!\n\n"
+	exit 0
+fi
+if [ $fraglen == "ns" ]; then
+	if [[ -d "$out/xcorr" ]]; then
+		printf "\n\nOutput directory $out/xcorr already exists. I can't override existing results!\n\n"
+		exit 0
+	fi
+fi
 #=======================> DONE!
 # =============================
 # Step One: Initial Processing
 # =============================
-printf "\n\n========================================\nStarted JAMM Pipeline v1.0.6rev2...Hang on!\n========================================\n\n"
+printf "\n\n============================================\nStarted JAMM Pipeline v1.0.7rev1...Hang on!\n============================================\n\n"
 if [ ! -d "$wdir" ]; then
 	mkdir $wdir #make working directory
 fi
 if [ ! -d "$out" ]; then
 printf "Done!\n"
 fi
 #determine average read lengths
-printf "Getting average read lengths..."
+printf "Getting average read lengths...\n"
-	if [ ! -z $bdir ]; then
+readL=""
-		readC=$(awk '{a=$3-$2;print a;}' "$wdir/bkgd.$ran/ctrl.bed" | perl -lane '$a+=$_;END{print $a/$.}' | awk '{a=$1+0.5;print a;}' | cut -d"." -f1)
+if [ ! -z $bdir ]; then
-	fi
+	readC=$(awk '{a=$3-$2;print a;}' "$wdir/bkgd.$ran/ctrl.bed" | perl -lane '$a+=$_;END{print $a/$.}' | awk '{a=$1+0.5;print a;}' | cut -d"." -f1)
-	readL=""
+	printf "Control: $readC\n"
-	for s in $sdir/*.bed; do #and for each sample file
+fi
-		read=$(awk '{a=$3-$2;print a;}' "$s" | perl -lane '$a+=$_;END{print $a/$.}' | awk '{a=$1+0.5;print a;}' | cut -d"." -f1)
+readL=""
-		readL="$readL,$read"
+for s in $sdir/*.bed; do #and for each sample file
-	done
+	file=$(basename $s)
-	readL=${readL#","}
+	samplefile=$(echo $file | awk -F"." '{print $1}');
-printf "Done!\n"
+	read=$(awk '{a=$3-$2;print a;}' "$s" | perl -lane '$a+=$_;END{print $a/$.}' | awk '{a=$1+0.5;print a;}' | cut -d"." -f1)
+	printf "$samplefile: $read\n"
+	readL="$readL,$read"
+done
+readL=${readL#","}
 #=======================> DONE!
 # =============================
 		fi
 	done
 	#remove leading comma
 	samplelist=${samplelist#","}
 	frag=${frag#","}
 	Rscript "$sPath/bincalculator.r" -ibed="$samplelist" -s="$gsize" -rl="$readL" -d="$wdir" -p="$cores"	-f="$frag" -type="$type"
 fi
 if [ $binsize != "ns" ]; then
 	printf "You set a Bin Size: $binsize \n"
 fi
 				if [ $fraglen != "ns" ]; then
 					shift=$(echo $frag | cut -f "$k" -d ",")
 					read=$(echo $readL | cut -f "$k" -d ",")
 					k=$(($k+1))
 				fi
-				perl "$sPath/readshifter.pl" "$wdir/samples.$ran/$samplefile" $shift $read > "$wdir/samples.$ran/ext.$samplefile"
+				if [ $uniq == "y" ]; then
+					perl "$sPath/readshifter.pl" "$wdir/samples.$ran/$samplefile" $shift $read > "$wdir/samples.$ran/ext.$samplefile"
+				fi
+				if [ $uniq == "n" ]; then
+					perl "$sPath/readshifter.pl" "$wdir/samples.$ran/$samplefile" $shift $read | uniq > "$wdir/samples.$ran/ext.$samplefile"
+				fi
 			fi
 		done
 		#control file
 		bkgdfile="None"
 			fi
 			if [ $fraglen != "ns" ]; then
 				l=$(($dupnum+1))
 				bshift=$(echo $frag | cut -f "$l" -d ",")
 			fi
-			perl "$sPath/readshifter.pl" "$wdir/bkgd.$ran/bkgd.$chr.ctrl.bed" $bshift $readC > "$wdir/bkgd.$ran/ext.bkgd.$chr.ctrl.bed"
+			if [ $uniq == "y" ]; then
+				perl "$sPath/readshifter.pl" "$wdir/bkgd.$ran/bkgd.$chr.ctrl.bed" $bshift $readC > "$wdir/bkgd.$ran/ext.bkgd.$chr.ctrl.bed"
+			fi
+			if [ $uniq == "n" ]; then
+				perl "$sPath/readshifter.pl" "$wdir/bkgd.$ran/bkgd.$chr.ctrl.bed" $bshift $readC | uniq > "$wdir/bkgd.$ran/ext.bkgd.$chr.ctrl.bed"
+			fi
 			bkgdfile="$wdir/bkgd.$ran/ext.bkgd.$chr.ctrl.bed"
 		fi
 		#remove leading comma
 		samplelist=${samplelist#","}
 		frag=${frag#","}
 		#call the peak calling R script
-		Rscript "$sPath/peakfinder.r" -sfile="$f" -bednames="$samplelist" -frag="$frag" -bkgd=$bkgdfile -out="$wdir/peaks.$ran/" -clustnummer="$clustno" -resolution="$resol" -window="$window" -p="$cores" -bin="$binsize" -type="$type" -chrcount="$counting" -windowe="$windowe"
+		Rscript "$sPath/peakfinder.r" -sfile="$f" -bednames="$samplelist" -frag="$frag" -bkgd=$bkgdfile -out="$wdir/peaks.$ran/" -clustnummer="$clustno" -resolution="$resol" -window="$window" -p="$cores" -bin="$binsize" -type="$type" -chrcount="$counting" -initModel="$initModel" -windowe="$windowe"
 		counting=$(($counting+1));
 		cp "$wdir/peaks.$ran/$chr.peaks.bed" "$out/peaks/$chr.peaks.bed"
 		rm "$wdir/peaks.$ran/$chr.peaks.bed"
 done
 counting=1;
 	#remove leading comma
 	samplelist=${samplelist#","}
 	frag=${frag#","}
 	#call the peak calling R script
-	Rscript "$sPath/peakfinder.r" -sfile=$f -bednames=$samplelist -frag="NA" -bkgd=$bkgdfile -out="$wdir/peaks.$ran/" -clustnummer="$clustno" -resolution="$resol" -window="$window" -p="$cores" -bin="$binsize" -type="$type" -chrcount="$counting" -windowe="$windowe"
+	Rscript "$sPath/peakfinder.r" -sfile=$f -bednames=$samplelist -frag="NA" -bkgd=$bkgdfile -out="$wdir/peaks.$ran/" -clustnummer="$clustno" -resolution="$resol" -window="$window" -p="$cores" -bin="$binsize" -type="$type" -chrcount="$counting" -initModel="$initModel" -windowe="$windowe"
 	counting=$(($counting+1));
 	cp "$wdir/peaks.$ran/$chr.peaks.bed" "$out/peaks/$chr.peaks.bed"
 	rm "$wdir/peaks.$ran/$chr.peaks.bed"
 done
 counting=1;
 cp $wdir/peaks.$ran/min.peaksize $out/peaks/min.peaksize
 #concatenate, sort and filter
 cat $out/peaks/*.bed > $out/peaks/all.narrowPeak
-Rscript "$sPath/peakhelper.r" -filelist="$out/peaks/all.narrowPeak"
+if [[ -s  $out/peaks/all.narrowPeak ]]; then
-perl "$sPath/peakfilter.pl" $out/peaks/all.narrowPeak | sort -nr -k7 > $out/peaks/filtered.peaks.narrowPeak
+	Rscript "$sPath/peakhelper.r" -filelist="$out/peaks/all.narrowPeak"
-cut -f1-10 $out/peaks/all.narrowPeak | awk -F"\t" -v j=0 '$7 > j' | sort -nr -k7 > $out/peaks/all.peaks.narrowPeak
+	perl "$sPath/peakfilter.pl" $out/peaks/all.narrowPeak | sort -nr -k7 > $out/peaks/filtered.peaks.narrowPeak
+	cut -f1-10 $out/peaks/all.narrowPeak | awk -F"\t" -v j=0 '$7 > j' | sort -nr -k7 > $out/peaks/all.peaks.narrowPeak
+fi
 rm $out/peaks/all.narrowPeak
 rm $out/peaks/*.bed
 rm $out/peaks/min.peaksize
 #=======================> DONE!

Mercurial > repos > messersc > jamm

comparison JAMM.sh @ 1:243f75d0ed6e draft default tip