Mercurial > repos > messersc > jamm

diff peakfinder.r @ 1:243f75d0ed6e draft default tip
Uploaded. Includes new release 1.0.7 with fixed optional controls.
author: messersc
date: Thu, 19 Feb 2015 05:39:45 -0500
parents: d42f4d78c85e
--- a/peakfinder.r	Wed Dec 17 10:40:23 2014 -0500
+++ b/peakfinder.r	Thu Feb 19 05:39:45 2015 -0500
@@ -1,16 +1,35 @@
-###############################
-#### Peak finder for NGS Data
-#### R script		
-###############################
+########################################################################
+# JAMMv1.0.7rev1 is a peak finder for joint analysis of NGS replicates.
+# Copyright (C) 2014-2015  Mahmoud Ibrahim
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+# Contact: mahmoud.ibrahim@mdc-berlin.de
+########################################################################
+
+
 
 
 # ======================= 
 # User-defined variables
 # ======================= 
-cutoff = NA #To enforce an SNR cutoff ratio, delete NA and enter the number you want
-strict = 1 #to make peak calling more / less strict, increase / decrease this number
-meanAdjust = "n" #adjust the initialization mean vector for each window before clustering? If you want to do so, change this to "y".
-options(warn = -1, scipen = 1000) #R will not report any warnings (warn = -1), R will not use scientific notation (scipen = 1000) 
+samplingSeed = 1011414 #the seed makes the random sampling that JAMM does "deterministic". You can change this to your integer seed of choice to reproduce exact same results or randomize it this way: ceiling(rnorm(1,50,300)). 
+reportNoClust = "n" #report windows for which clustering failed? Reported windows will be marked by "NoClust" in the peak name (4th column). 
+cutoff = NA #To enforce an SNR cutoff ratio for bin enrichment calling, delete NA and enter the number you want.
+strict = 1 #To make bin enrichment calling more / less strict, increase / decrease this number.
+meanAdjust = "n" #Adjust the initialization mean vector for each window before clustering? If you want to do so, change this to "y".
+options(warn = -1, scipen = 1000) #R will not report any warnings (warn = -1), R will not use scientific notation (scipen = 1000).
 #=======================> DONE! 
 
 
@@ -115,7 +134,7 @@
 		}
 		counts = counts/mCount
 	}
-		
+
 	return(counts)
 }
 
@@ -146,7 +165,7 @@
 		}
 		counts = counts/mCount
 	}
-	
+
 	return(counts)
 }
 
@@ -168,11 +187,8 @@
 		Cs = counts[[numdup+1]][winStart:tempend]
 		mCs = mean(Cs)
 		dCs = sd(Cs)
-	} else {
-		Cs = sample(C, binSizeTemp, replace = TRUE)
 	}
 
-	
 	go = rep(0, numdup)
 	for (g in 1:numdup) {
 		mS = (mean(counts[[g]][winStart:tempend])) 
@@ -236,6 +252,7 @@
 		Cs = Cs[winStart:winEnd]
 		Cs = filtfilt(rep(1,80)/80,1,Cs) + Cmin #gets rid of Inf in the fold change
 	} else {
+		set.seed(samplingSeed)
 		Cs = sample(C, rWinSizeTemp, replace = TRUE)
 		Cs = filtfilt(rep(1,80)/80,1,Cs)
 	}
@@ -265,6 +282,7 @@
 	return(Rs)
 }
 cluster = function(model, sig, init, clustnummer, noise) { #helper function2
+	set.seed(samplingSeed)
 	noisy = sample(noise, length(sig[,1]), replace = TRUE)
 	clust = me(model, sig+noisy, init)
 	bicc =  bic(model, clust$loglik, length(sig[,1]), length(sig[1,]), clustnummer)
@@ -283,7 +301,8 @@
 	sig = do.call(rbind, sig) 
 
 	#kmeans initialization
-	init = kmeans(sig, clustnummer)
+	set.seed(samplingSeed)
+	init = kmeans(sig, clustnummer, nstart = 20)
 	init = unmap(init$cluster)
 	
 
@@ -307,15 +326,16 @@
 
 
 #find peaks
-findpeak = function(winStart, coffeeshopSud, numdup, C, param, bkgd, resol, counts, noise, startlist, meanAdjust) {
+findpeak = function(winStart, coffeeshopSud, numdup, C, param, bkgd, resol, counts, noise, startlist, meanAdjust, clustnummer) {
 	
-	
+		
 	plz = which(startlist == winStart)
 	winEnd = coffeeshopSud[plz]
 	
 	
 	#will store peak information
 	writethis = list()
+	ccx = 1 #default is clustering didNOT work
 
 	
 	rWinSizeTemp = winEnd - winStart + 1
@@ -338,20 +358,32 @@
 	}
 	
 	if (resol != "window") {
-	
-		####CLUSTERING START HERE#####
+		
+		#clustering (take 1)
+		take = 1
+		set.seed(samplingSeed)
 		noisy = sample(noise, rWinSizeTemp, replace = TRUE)
 		clust = em(param$modelname, Rs+noisy, param$initparam)
 		clust$classification = map(clust$z)
-		####CLUSTERING DONE####
-	
-		#check whether clutering succeeded
-		if((any(diff(clust$classification)) != 0) && (!(any(is.na(clust$classification))))) {
-	
-	
-			#check whether all components replicates agreed on clustering assignments
-			ccx = 1
-			if (numdup > 1) {
+		if (!((any(diff(clust$classification)) != 0) && (!(any(is.na(clust$classification)))))) { #clustering didn't work, take1
+			
+			#repeat clustering from scratch, take 2!
+			set.seed(samplingSeed)
+			init = kmeans(Rs, clustnummer, nstart = 20)
+			init = unmap(init$cluster)
+			set.seed(samplingSeed)
+			noisy = sample(noise, rWinSizeTemp, replace = TRUE)
+			clust = me(param$modelname, Rs+noisy, init)
+			clust$classification = map(clust$z)
+			if ((any(diff(clust$classification)) != 0) && (!(any(is.na(clust$classification))))) {
+				ccx = 0 #clustering worked, take2
+				take = 2
+			}
+		} else {ccx = 0}  #clustering worked, take1
+		 
+		if (ccx != 1) { #clustering worked either in take1 or take2
+					
+			if (numdup > 1) { #check whether all components replicates agreed on clustering assignments
 				cc = vector(mode = "numeric", length = numdup)
 				for (g in 1:numdup) {
 					cc[g] = which.max(clust$parameters$mean[g,]) #which cluster has the largest mean (this is the peak cluster, hopefully!)
@@ -359,98 +391,123 @@
 				ccx = sum(diff(cc))
 				cluster = cc[1]
 				rm(cc)
-			} else {
-				cluster = which.max(clust$parameters$mean) #which cluster has the largest mean (this is the peak cluster, hopefully!)
-				ccx = 0
-			}
-			if (ccx == 0) {
 			
-				#extract subset of the background
-				if (bkgd != "None") {
-					Cs = counts[[numdup+1]][winStart:winEnd]
-					Cs = filtfilt(rep(1,80)/80,1,Cs)
-				} else {
-					Cs = sample(C, rWinSizeTemp, replace = TRUE)
-					Cs = filtfilt(rep(1,80)/80,1,Cs)
+				if ((ccx != 0) && (take == 1)) { #not all replicates agreed? Repeat the clustering with from scratch if not already done!
+					set.seed(samplingSeed)
+					init = kmeans(Rs, clustnummer, nstart = 20)
+					init = unmap(init$cluster)
+					set.seed(samplingSeed)
+					noisy = sample(noise, rWinSizeTemp, replace = TRUE)
+					clust = me(param$modelname, Rs+noisy, init)
+					clust$classification = map(clust$z)
+					if ((any(diff(clust$classification)) != 0) && (!(any(is.na(clust$classification))))) { #clustering worked? check whether replicates agreed take 3
+						cc = vector(mode = "numeric", length = numdup)
+						for (g in 1:numdup) {
+							cc[g] = which.max(clust$parameters$mean[g,]) #which cluster has the largest mean (this is the peak cluster, hopefully!)
+						}
+						ccx = sum(diff(cc))
+						cluster = cc[1]
+						rm(cc)
+						take = 3
+					}
 				}
+			} else { #no replicates!
+				cluster = which.max(clust$parameters$mean) #which cluster has the largest mean (this is the peak cluster, hopefully!)
+			}
+		}
+		
+		if ((ccx != 0) && (reportNoClust=="y")) { resol = "window" } #clustering did not work and windows should be reported
+			
+		
+		if (ccx == 0) { #clustering worked and all replicates agree on the cluster assignments
+			
+			#extract subset of the background
+			if (bkgd != "None") {
+				Cs = counts[[numdup+1]][winStart:winEnd]
+				Cs = filtfilt(rep(1,80)/80,1,Cs)
+			} else {
+				set.seed(samplingSeed)
+				Cs = sample(C, rWinSizeTemp, replace = TRUE)
+				Cs = filtfilt(rep(1,80)/80,1,Cs)
+			}
 			
-				#find region boundaries
-				loc = 1:length(clust$classification)
-				gmclass = cbind(loc, clust$classification)
-				locPeak = gmclass[gmclass[,2] == cluster,,drop=FALSE]
-				rStart = locPeak[1] #start position of the region
-				rEnd = locPeak[length(locPeak[,1]),1] #end position of the region
+			#find region boundaries
+			loc = 1:length(clust$classification)
+			gmclass = cbind(loc, clust$classification)
+			locPeak = gmclass[gmclass[,2] == cluster,,drop=FALSE]
+			rStart = locPeak[1] #start position of the region
+			rEnd = locPeak[length(locPeak[,1]),1] #end position of the region
 		
-				#peak resolution check
-				if (resol == "region") {
-					pSize = rEnd - rStart
-					signal = (geomean(Rs[rStart:rEnd,])) 
-					signal2 = (signal) - (Cs[rStart:rEnd])
+			#peak resolution check
+			if (resol == "region") {
+				pSize = rEnd - rStart
+				signal = (geomean(Rs[rStart:rEnd,])) 
+				signal2 = (signal) - (Cs[rStart:rEnd])
+				gm = mean(signal2)
+				summit = which.max(geomean(Rsr[rStart:rEnd,])) - 1
+				will2k = wilcox.test(signal, Cs[rStart:rEnd])
+				
+				#Is there signal in the region above background
+				if (gm > 0) {
+					writethis[[1]] = rStart + winStart - 1
+					writethis[[2]] = rEnd + winStart
+					writethis[[3]] = paste0(chromName, ".", rStart+winStart -1)
+					writethis[[4]] = "1000"
+					writethis[[5]] = "."
+					writethis[[6]] = gm
+					writethis[[7]] = will2k$p.value
+					writethis[[8]] = "-1"
+					writethis[[9]] = summit
+				}
+			} else if (resol == "peak") {
+				#find out where separate peaks are
+				d = diff(locPeak[,1]) 
+				d[length(d)+1] = 0		
+				locPeak = cbind(locPeak, d)
+				bound1 = which(locPeak[,3] > 1, arr.in=TRUE)
+				bound2 = bound1 + 1
+				bound = locPeak[sort(c(bound1,bound2))]
+				bound = c(rStart, bound, rEnd)
+				w = 1
+				warum = 0
+				while (w < length(bound)) {
+					pStart = bound[w] + winStart - 1 
+					pEnd = bound[w+1] + winStart
+					pSize = pEnd - pStart
+					signal = (geomean(Rs[(bound[w]):(bound[w+1]),])) 
+					signal2 = (signal) - (Cs[bound[w]:bound[w+1]])
 					gm = mean(signal2)
-					summit = which.max(geomean(Rsr[rStart:rEnd,])) - 1
-					will2k = wilcox.test(signal, Cs[rStart:rEnd])
-					
+					summit = which.max(geomean(Rsr[(bound[w]):(bound[w+1]),])) - 1
+					will2k = wilcox.test(signal, Cs[(bound[w]):(bound[w+1])])
+										
+					weil = warum * 9
 					#Is there signal in the region above background
 					if (gm > 0) {
-						writethis[[1]] = rStart + winStart - 1
-						writethis[[2]] = rEnd + winStart
-						writethis[[3]] = paste0(chromName, ".", rStart+winStart -1)
-						writethis[[4]] = "1000"
-						writethis[[5]] = "."
-						writethis[[6]] = gm
-						writethis[[7]] = will2k$p.value
-						writethis[[8]] = "-1"
-						writethis[[9]] = summit
+						writethis[[1+weil]] = pStart
+						writethis[[2+weil]] = pEnd
+						writethis[[3+weil]] = paste0(chromName, ".", pStart)
+						writethis[[4+weil]] = "1000"
+						writethis[[5+weil]] = "."
+						writethis[[6+weil]] = gm
+						writethis[[7+weil]] = will2k$p.value
+						writethis[[8+weil]] = "-1"
+						writethis[[9+weil]] = summit
 					}
-				} else if (resol == "peak") {
-					#find out where separate peaks are
-					d = diff(locPeak[,1]) 
-					d[length(d)+1] = 0		
-					locPeak = cbind(locPeak, d)
-					bound1 = which(locPeak[,3] > 1, arr.in=TRUE)
-					bound2 = bound1 + 1
-					bound = locPeak[sort(c(bound1,bound2))]
-					bound = c(rStart, bound, rEnd)
-					w = 1
-					warum = 0
-					while (w < length(bound)) {
-						pStart = bound[w] + winStart - 1 
-						pEnd = bound[w+1] + winStart
-						pSize = pEnd - pStart
-						signal = (geomean(Rs[(bound[w]):(bound[w+1]),])) 
-						signal2 = (signal) - (Cs[bound[w]:bound[w+1]])
-						gm = mean(signal2)
-						summit = which.max(geomean(Rsr[(bound[w]):(bound[w+1]),])) - 1
-						will2k = wilcox.test(signal, Cs[(bound[w]):(bound[w+1])])
-											
-						weil = warum * 9
-						#Is there signal in the region above background
-						if (gm > 0) {
-							writethis[[1+weil]] = pStart
-							writethis[[2+weil]] = pEnd
-							writethis[[3+weil]] = paste0(chromName, ".", pStart)
-							writethis[[4+weil]] = "1000"
-							writethis[[5+weil]] = "."
-							writethis[[6+weil]] = gm
-							writethis[[7+weil]] = will2k$p.value
-							writethis[[8+weil]] = "-1"
-							writethis[[9+weil]] = summit
-						}
-						w = w + 2
-						warum = warum + 1			
-					}
-			
-				} #peak resolution check
-			} #all replicates agree on clustering assignments?
-		} #clustering worked?
-		
-	} else if (resol == "window") { #window resolution check
+					w = w + 2
+					warum = warum + 1			
+				}
+			} #peak resolution check
+		} #clustering worked and all replicates agree on clustering assignments?
+	} #window resolution check
+	
+	if (resol == "window") {
 	
 		#extract subset of the background
 		if (bkgd != "None") {
 			Cs = counts[[numdup+1]][winStart:winEnd]
 			Cs = filtfilt(rep(1,80)/80,1,Cs)
 		} else {
+			set.seed(samplingSeed)
 			Cs = sample(C, rWinSizeTemp, replace = TRUE)
 			Cs = filtfilt(rep(1,80)/80,1,Cs)
 		}
@@ -467,7 +524,7 @@
 		if (gm > 0) {
 			writethis[[1]] = winStart - 1
 			writethis[[2]] = winEnd
-			writethis[[3]] = paste0(chromName, ".", winStart -1)
+			writethis[[3]] = paste0(chromName, ".", winStart -1, ".NoClust")
 			writethis[[4]] = "1000"
 			writethis[[5]] = "."
 			writethis[[6]] = gm
@@ -475,7 +532,7 @@
 			writethis[[8]] = "-1"
 			writethis[[9]] = summit
 		}
-	} #window clustering
+	} #window reporting
 
 return(writethis)
 }
@@ -506,7 +563,7 @@
 #Parsing arguments and storing values
 for (each.arg in args) {
 	#chormosome size file
-	if (grepl('^-sfile=',each.arg)) {
+	if (grepl('-sfile=',each.arg)) {
 		arg.split <- strsplit(each.arg,'=',fixed=TRUE)[[1]] 
 		if (! is.na(arg.split[2]) ) {
 				size.file <- arg.split[2]
@@ -515,7 +572,7 @@
 		} 
 	}
 	#bed file names
-	if (grepl('^-bednames=',each.arg)) {
+	if (grepl('-bednames=',each.arg)) {
 		arg.split <- strsplit(each.arg,'=',fixed=TRUE)[[1]] 
 		if (! is.na(arg.split[2]) ) {
 				bednames <- arg.split[2]
@@ -524,7 +581,7 @@
 		} 
 	}
 	#bed files directory
-	if (grepl('^-frag=',each.arg)) {
+	if (grepl('-frag=',each.arg)) {
 		arg.split <- strsplit(each.arg,'=',fixed=TRUE)[[1]] 
 		if (! is.na(arg.split[2]) ) {
 				frag <- arg.split[2]
@@ -533,7 +590,7 @@
 		} 
 	}	
 	#bakcground files directory
-	if (grepl('^-bkgd=',each.arg)) {
+	if (grepl('-bkgd=',each.arg)) {
 		arg.split <- strsplit(each.arg,'=',fixed=TRUE)[[1]] 
 		if (! is.na(arg.split[2]) ) {
 				bkgd <- arg.split[2]
@@ -542,7 +599,7 @@
 		} 
 	}	
 	#bakcground files directory
-	if (grepl('^-out=',each.arg)) {
+	if (grepl('-out=',each.arg)) {
 		arg.split <- strsplit(each.arg,'=',fixed=TRUE)[[1]] 
 		if (! is.na(arg.split[2]) ) {
 				out <- arg.split[2]
@@ -551,61 +608,69 @@
 		} 
 	}
 	#Cluster number
-	if (grepl('^-clustnummer=',each.arg)) {
+	if (grepl('-clustnummer=',each.arg)) {
 		arg.split <- strsplit(each.arg,'=',fixed=TRUE)[[1]] 
 		if (! is.na(arg.split[2]) ) {
 				clustnummer <- as.numeric(arg.split[2])
 		} 
 	}
 	#resolution
-	if (grepl('^-resolution=',each.arg)) {
+	if (grepl('-resolution=',each.arg)) {
 		arg.split <- strsplit(each.arg,'=',fixed=TRUE)[[1]] 
 		if (! is.na(arg.split[2]) ) {
 				resol <- arg.split[2]
 		} 
 	}
 	#processor cores
-	if (grepl('^-p=',each.arg)) {
+	if (grepl('-p=',each.arg)) {
 		arg.split <- strsplit(each.arg,'=',fixed=TRUE)[[1]] 
 		if (! is.na(arg.split[2]) ) {
 				cornum <- as.numeric(arg.split[2])
 		} 
 	}
 	#minimum window size
-	if (grepl('^-window=',each.arg)) {
+	if (grepl('-window=',each.arg)) {
 		arg.split <- strsplit(each.arg,'=',fixed=TRUE)[[1]] 
 		if (! is.na(arg.split[2]) ) {
 			winSize <- arg.split[2]
 		} 
 	}
 	#window size
-	if (grepl('^-bin=',each.arg)) {
+	if (grepl('-bin=',each.arg)) {
 		arg.split <- strsplit(each.arg,'=',fixed=TRUE)[[1]] 
 		if (! is.na(arg.split[2]) ) {
 			binsize <- arg.split[2]
 		} 
 	}
 	#type (paired / single)
-	if (grepl('^-type=',each.arg)) {
+	if (grepl('-type=',each.arg)) {
 		arg.split <- strsplit(each.arg,'=',fixed=TRUE)[[1]] 
 		if (! is.na(arg.split[2]) ) {
 			type <- arg.split[2]
 		} 
 	}
 	#chromosome number
-	if (grepl('^-chrcount=',each.arg)) {
+	if (grepl('-chrcount=',each.arg)) {
 		arg.split <- strsplit(each.arg,'=',fixed=TRUE)[[1]] 
 		if (! is.na(arg.split[2]) ) {
 			chrcount <- as.numeric(arg.split[2])
 		} 
 	}
 	#window enrichment cutoff
-	if (grepl('^-windowe=',each.arg)) {
+	if (grepl('-windowe=',each.arg)) {
 		arg.split <- strsplit(each.arg,'=',fixed=TRUE)[[1]] 
 		if (! is.na(arg.split[2]) ) {
 			windowe <- arg.split[2]
 		} 
 	}
+	#initialize
+	if (grepl('-initModel=',each.arg)) {
+		arg.split <- strsplit(each.arg,'=',fixed=TRUE)[[1]] 
+		if (! is.na(arg.split[2]) ) {
+			initialize <- arg.split[2]
+		} 
+	}
+	
 }
 
 ##Parse in variables
@@ -614,6 +679,11 @@
 chromSize = chromosomes$V2; #chromosomes size
 rm(chromosomes)
 
+if (chrcount == 1) {
+	write(paste(samplingSeed), file = paste0(out, "/seed.info"))
+} else {
+	samplingSeed = as.numeric(read.table(paste0(out, "/seed.info"), header = FALSE)[[1]])
+}
 readsFiles = as.list(strsplit(bednames, ",", fixed = TRUE)[[1]])
 numdup = length(readsFiles) #number of replicates
 if (bkgd != "None") {
@@ -703,7 +773,6 @@
 # ============================ 
 # Estimating Background Model
 # ============================ 
-
 if (chrcount == 1){ #first chromosome, estimate bkgd (includes SNR cutoff)
 	if (is.na(cutoff)) {
 		if (bkgd != "None") {
@@ -722,13 +791,16 @@
 			mmM = mean(geomeanL(counts))
 			sigma = log(1+((mmV) / ((mmM)^2)))
 			mu = (log(mmM)) - (0.5 * (sigma))
+			set.seed(samplingSeed)
 			C = rlnorm(100000, mu, sqrt(sigma))
 			for (x in 1:numdup) {
 				cutoff[x] = (mean(counts[[x]]))/(sd(C))
 			}
 			cutoff = max(cutoff)
-			mCs = (mean(sample(C, binSize*5, replace = TRUE)))
-			dCs = (sd(sample(C, binSize*5, replace = TRUE)))
+			set.seed(samplingSeed)
+			snow = sample(C, binSize*5, replace = TRUE)
+			mCs = mean(snow)
+			dCs = sd(snow)
 			write(paste(c(cutoff,sigma,mu)), file = paste0(out, "/bkgd.info"), append = TRUE)
 		}
 	}
@@ -744,9 +816,12 @@
 	} else {
 		sigma = as.numeric(bkgdInfo[[1]][2])
 		mu = as.numeric(bkgdInfo[[1]][3])
+		set.seed(samplingSeed)
 		C = rlnorm(100000, mu, sqrt(sigma))
-		mCs = (mean(sample(C, binSize*5, replace = TRUE)))
-		dCs = (sd(sample(C, binSize*5, replace = TRUE)))
+		set.seed(samplingSeed)
+		snow = sample(C, binSize*5, replace = TRUE)
+		mCs = mean(snow)
+		dCs = sd(snow)
 	}
 }
 #=======================> DONE!
@@ -764,6 +839,9 @@
 }
 coffeeshop = as.numeric(unlist(coffeeshop))
 coffeeshop[coffeeshop != numdup] = 0
+
+if (sum(coffeeshop) != 0) { #Any enriched bins?
+
 coffeeshop = c(0, diff(coffeeshop))
 coffeeshop = cbind(coffeeshop, bins)
 coffeeshopNord = coffeeshop[coffeeshop[,1] == numdup,,drop=FALSE]
@@ -800,16 +878,26 @@
 # ===================================
 if (length(coffeeshop[,1]) > 0) { #any enriched windows detected?
 
-yummy = ceiling(length(coffeeshop[,1]) / 4)
-if (yummy > 20) {
-	yummy = sample(1:yummy, 20)
-} else if (yummy > 0) {
-	yummy = 1:yummy
-} else {
-	yummy = 1
+if (initialize == "deterministic") {
+	yummy = ceiling(length(coffeeshop[,1]) / 1000)
+	if (yummy == 0) {
+		yummy = 1
+	}
+}
+if (initialize == "stochastic") {
+	yummy = ceiling(length(coffeeshop[,1]) / 4)
+	if (yummy > 20) {
+		set.seed(samplingSeed)
+		yummy = sample(1:yummy, 20)
+	} else if (yummy > 0) {
+		yummy = 1:yummy
+	} else {
+		yummy = 1
+	}
 }
 coffeeshopNord = coffeeshop[yummy,1]
 coffeeshopSud = coffeeshop[yummy,2]
+set.seed(samplingSeed)
 noise = rnorm(100000, mean=0, sd=0.1)
 param = initparam(coffeeshopNord, coffeeshopSud, numdup, counts, cornum, clustnummer, modelnames, noise)
 #=======================> DONE!
@@ -828,7 +916,6 @@
 		scores = lapply(coffeeshop[,1], scorewindow, coffeeshop[,2], numdup, C, bkgd, counts, startlist = coffeeshop[,1])
 	}
 	scores = unlist(scores)
-	#scores = scores[is.finite(scores)] #just a sanity check
 
 	if (windowe == "auto") {
 		lscores = log(scores)
@@ -867,12 +954,12 @@
 # ============== 
 # Finding Peaks
 # ==============
-#if (length(coffeeshop[,1]) > 0) { #any enriched windows left after filtering?, just a sanity check!
+if (length(coffeeshop[,1]) > 0) { #any enriched windows left after filtering?
 coffeeshop = coffeeshop[,-3]
 if (cornum > 1) {
-	peaks = mclapply(coffeeshop[,1], findpeak, coffeeshop[,2], numdup, C, param, bkgd, resol, counts, noise, startlist = coffeeshop[,1], meanAdjust, mc.cores = cornum, mc.preschedule = TRUE)
+	peaks = mclapply(coffeeshop[,1], findpeak, coffeeshop[,2], numdup, C, param, bkgd, resol, counts, noise, startlist = coffeeshop[,1], meanAdjust, clustnummer, mc.cores = cornum, mc.preschedule = TRUE)
 } else {
-	peaks = lapply(coffeeshop[,1], findpeak, coffeeshop[,2], numdup, C, param, bkgd, resol, counts, noise, startlist = coffeeshop[,1], meanAdjust)
+	peaks = lapply(coffeeshop[,1], findpeak, coffeeshop[,2], numdup, C, param, bkgd, resol, counts, noise, startlist = coffeeshop[,1], meanAdjust, clustnummer)
 }
 if (!(is.null(peaks))) { #any peaks discovered?
 writethis = processPeaks(peaks)
@@ -886,15 +973,16 @@
 # Writing Peak Information
 # =========================
 } else { nothing = TRUE } #no peaks
-#} else { nothing = TRUE } #no enriched windows left after filtering, just a sanity check (at least one window should still be there)
+} else { nothing = TRUE } #no enriched windows left after filtering
 } else { nothing = TRUE } #no enriched widnows discovered
+} else { nothing = TRUE; cutthisW = windowe } #no enriched bins discovered
 
 if (isTRUE(nothing)) {
 	file.create(paste0(out, "/", chromName, ".peaks.bed"))
 	write(paste(chromName, minpeak, sep = "	"), file = paste0(out, "/min.peaksize"), append=TRUE)
 
 	if (chrcount == 1) {
-		message(paste0("No peaks found! - Window Fold Enrichment: ", round(cutthisW,2)))
+		message(paste0("No peaks found! - Window Fold Enrichment: ", cutthisW, " - Seed: ", samplingSeed))
 	} else {
 		message("No peaks found!")
 	}
@@ -904,7 +992,7 @@
 	
 	
 	if (chrcount == 1) {
-		message(paste0("Done! - Window Fold Enrichment: ", round(cutthisW,2)))
+		message(paste0("Done! - Window Fold Enrichment: ", cutthisW, " - Seed: ", samplingSeed))
 	} else {
 		message("Done!")
 	}
author	messersc
date	Thu, 19 Feb 2015 05:39:45 -0500
parents	d42f4d78c85e
children