# HG changeset patch
# User greg
# Date 1517425991 18000
# Node ID 3651f1592f3ffde240d914c1d5a033619c4c31b4
# Parent 99102499271a61c267c9f26e9144a88b61b75d6a
Uploaded
diff -r 99102499271a -r 3651f1592f3f ideas_preprocessor.R
--- a/ideas_preprocessor.R Wed Jan 31 08:22:43 2018 -0500
+++ b/ideas_preprocessor.R Wed Jan 31 14:13:11 2018 -0500
@@ -45,6 +45,8 @@
cat(s, file=output);
}
+tmp_dir = "tmp";
+
# Read the ideaspre_input_config text file which has this format:
# "cell type name" "epigenetic factor name" "file path" "file name" "datatype"
ideaspre_input_config = as.matrix(read.table(opt$ideaspre_input_config));
@@ -71,9 +73,10 @@
bigwig_file_name = file_path;
}
bed_file_name = paste(file_name, "bed", sep=".");
- cmd = paste("bigWigAverageOverBed", bigwig_file_name, opt$chrom_bed_input, "stdout | cut -f5 >", bed_file_name);
+ bed_file_path = paste("tmp", bed_file_name, sep="/");
+ cmd = paste("bigWigAverageOverBed", bigwig_file_name, opt$chrom_bed_input, "stdout | cut -f5 >", bed_file_path);
system(cmd);
- cmd = paste("gzip -f", bed_file_name);
+ cmd = paste("gzip -f", bed_file_path);
system(cmd);
}
}
@@ -81,8 +84,10 @@
# Create file1.txt.
cmd = paste("cut -d' '", opt$ideaspre_input_config, "-f1,2 > file1.txt", sep=" ");
system(cmd);
-# Compress the bed files and create file2.txt.
-cmd = "ls *.bed.gz > file2.txt";
+# Compress the bed files in the tmp directory.
+tmp_gzipped_files = paste(tmp_dir, "*.bed.gz", sep="/");
+# Create file2.txt.
+cmd = paste("ls", tmp_gzipped_files, "> file2.txt", sep=" ");
system(cmd);
# Create IDEAS_input_config.txt with the format required by IDEAS.
ideas_input_config = "IDEAS_input_config.txt"
@@ -91,7 +96,13 @@
# Move IDEAS_input_config.txt to the output directory.
to_path = paste(opt$output_files_path, ideas_input_config, sep="/");
file.rename(ideas_input_config, to_path);
-# Handle optional chrom_bed_input.txt and chromosomes.bed files.
+# Archive the tmp directory.
+cmd = "tar -cvf tmp.tar tmp";
+system(cmd);
+# Move the tmp archive to the output directory.
+to_path = paste(opt$output_files_path, "tmp.tar", sep="/");
+file.rename("tmp.tar", to_path);
+
if (!is.null(opt$chrom_bed_input) && !is.null(opt$chromosome_windows)) {
# Renane opt$chrom_bed_input to be chromosomes.bed
# and make a copy of it in the output directory.
diff -r 99102499271a -r 3651f1592f3f ideas_preprocessor.xml
--- a/ideas_preprocessor.xml Wed Jan 31 08:22:43 2018 -0500
+++ b/ideas_preprocessor.xml Wed Jan 31 14:13:11 2018 -0500
@@ -11,6 +11,8 @@
#set chromosome_windows = "chromosome_windows.txt"
#set ideaspre_input_config = "ideaspre_input_config.txt"
#set specify_chrom_windows = $specify_chrom_windows_cond.specify_chrom_windows
+#set tmp_dir = "tmp"
+mkdir $tmp_dir &&
mkdir $output.files_path &&
#if str($specify_chrom_windows) == "yes":
##############################################
@@ -179,8 +181,13 @@
-
-
+
+
diff -r 99102499271a -r 3651f1592f3f runproc.R
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/runproc.R Wed Jan 31 14:13:11 2018 -0500
@@ -0,0 +1,105 @@
+normdata<-function(case, control, method="nbinom")
+{ d=dnorm(-10:10,0,10);
+ u= c(rep(0,10),control,rep(0,10));
+ l=length(case);
+ nu=rep(0,l);
+ for(i in 1:21)
+ { nu = nu + u[i-1+1:l] * d[i];
+ }
+ nu = nu / sum(d);
+
+ t = which(case0.9) p=0.9;
+ if(p<0.1) p=0.1;
+ n=mean(case[t])*p/(1-p);
+ print(c(n,p));
+ #nc = log2((case*20+1)/(nu/mean(nu)*mean(case)*20+1)+1)-1;
+ if(method=="nbinom")
+ { nc = -pnbinom(case,size=c((nu+1)/mean(nu+1)*n),prob=p,lower.tail=F,log.p=T)/log(10);
+ } else
+ { nc = -ppois(case,(nu+1)/mean(nu+1)*mean(case),lower.tail=F,log.p=T)/log(10);
+ }
+ return(nc);
+}
+
+binsz=200
+folder="me66/Input/"
+mbed="/gpfs/group/yzz2/default/IDEAS_server/data/mm10.bed"
+
+#read file list
+args<-commandArgs(trailingOnly=TRUE);
+flist=args[1];
+x=as.matrix(read.table(flist));
+if(length(args) > 1)
+{ st=as.integer(args[2]);
+ ed=as.integer(args[3]);
+} else
+{ st=1;
+ ed=dim(x)[1];
+}
+l=paste(x[,1],x[,2]);
+ll=unique(l);
+id=rep(0,length(l));
+for(i in ll)
+{ t=which(l==i);
+ if(length(t)>1)
+ { id[t]=1:length(t);
+ }
+}
+
+y=x[st:ed,1:3];
+if(dim(x)[2]==4)
+{ y=rbind(y,x[st:ed,c(1:2,4)]); }
+t=match(unique(y[,3]),y[,3]);
+k=rapply(as.list(y[t,3]),function(z){tail(unlist(gregexpr("\\/",z)),n=1);});
+y=cbind(y[t,],substr(y[t,3],k+1,1000));
+
+#fetch data and process data to windows mean
+for(i in 1:dim(y)[1])
+{ fout = paste(folder, y[i,1],"_",y[i,2],"_",y[i,4],sep="");
+ system(paste("wget -O",fout, y[i,3]));
+ tl = tail(unlist(gregexpr("\\.",fout)),n=1);
+ name = substr(fout,1,tl);
+ ftype = substr(fout, tl+1, 100000);
+ if(tolower(ftype) == "bam")
+ { system(paste("samtools index", fout));
+ bw=paste(name,".bw",sep="");
+ system(paste("bamCoverage --bam",fout,"-o",bw,"--binSize", binsz));
+ } else
+ { bw=fout;
+ }
+ bd=paste(name,".bed",sep="");
+ system(paste("/storage/home/yzz2/work/tools/bigWigAverageOverBed", bw, mbed, "stdout | cut -f5 >", bd));
+ system(paste("gzip -f",bd));
+ system(paste("rm ", name,".ba*",sep=""));
+ system(paste("rm ", name,".bw",sep=""));
+}
+
+#normalize data if inputs are available, and prepare data file list
+fname=NULL;
+for(i in st:ed)
+{ k=match(x[i,3],y[,3]);
+ f1=paste(folder,y[k,1],"_",y[k,2],"_",substr(y[k,4],1,nchar(y[k,4])-4),".bed.gz",sep="");
+ if(dim(x)[2]==4)
+ { k=match(x[i,4],y[,3]);
+ f0=paste(folder,y[k,1],"_",y[k,2],"_",substr(y[k,4],1,nchar(y[k,4])-4),".bed.gz",sep="");
+ if(id[i]>0)
+ { nf=paste(folder,x[i,1],".",x[i,2],"_",id[i],".norm.bed",sep="");
+ } else
+ { nf=paste(folder,x[i,1],".",x[i,2],".norm.bed",sep="");
+ }
+ x1=as.matrix(read.table(f1));
+ x0=as.matrix(read.table(f0));
+ nx=normdata(x1,x0);
+ write.table(round(nx*100)/100,nf,quote=F,row.names=F,col.names=F);
+ system(paste("gzip -f",nf));
+ fname=rbind(fname,c(x[i,1:2],paste(nf,".gz",sep="")));
+ } else
+ { nf=paste(fold,x[i,1],".",x[i,2],".bed.gz",sep="");
+ system(paste("mv", f1, nf));
+ fname=rbind(fname,c(x[i,1:2],nf));
+ }
+}
+
+write.table(fname,paste(folder,"tmp_",st,"_",ed,".input",sep=""),quote=F,row.names=F,col.names=F);
diff -r 99102499271a -r 3651f1592f3f test-data/IDEAS_input_config.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/IDEAS_input_config.txt Wed Jan 31 14:13:11 2018 -0500
@@ -0,0 +1,1 @@
+e001 h3k4me3 tmp/e001-h3k4me3.bed.gz
diff -r 99102499271a -r 3651f1592f3f test-data/chrom_windows.bed
--- a/test-data/chrom_windows.bed Wed Jan 31 08:22:43 2018 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,50 +0,0 @@
-chr1 21819600 21819800 R100001
-chr1 21819800 21820000 R100002
-chr1 21820000 21820200 R100003
-chr1 21820200 21820400 R100004
-chr1 21820400 21820600 R100005
-chr1 21820600 21820800 R100006
-chr1 21820800 21821000 R100007
-chr1 21821000 21821200 R100008
-chr1 21821200 21821400 R100009
-chr1 21821400 21821600 R100010
-chr1 21821600 21821800 R100011
-chr1 21821800 21822000 R100012
-chr1 21822000 21822200 R100013
-chr1 21822200 21822400 R100014
-chr1 21822400 21822600 R100015
-chr1 21822600 21822800 R100016
-chr1 21822800 21823000 R100017
-chr1 21823000 21823200 R100018
-chr1 21823200 21823400 R100019
-chr1 21823400 21823600 R100020
-chr1 21823600 21823800 R100021
-chr1 21823800 21824000 R100022
-chr1 21824000 21824200 R100023
-chr1 21824200 21824400 R100024
-chr1 21824400 21824600 R100025
-chr1 21824600 21824800 R100026
-chr1 21824800 21825000 R100027
-chr1 21825000 21825200 R100028
-chr1 21825200 21825400 R100029
-chr1 21825400 21825600 R100030
-chr1 21825600 21825800 R100031
-chr1 21825800 21826000 R100032
-chr1 21826000 21826200 R100033
-chr1 21826200 21826400 R100034
-chr1 21826400 21826600 R100035
-chr1 21826600 21826800 R100036
-chr1 21826800 21827000 R100037
-chr1 21827000 21827200 R100038
-chr1 21827200 21827400 R100039
-chr1 21827400 21827600 R100040
-chr1 21827600 21827800 R100041
-chr1 21827800 21828000 R100042
-chr1 21828000 21828200 R100043
-chr1 21828200 21828400 R100044
-chr1 21828400 21828600 R100045
-chr1 21829000 21829200 R100046
-chr1 21829400 21829600 R100047
-chr1 21829600 21829800 R100048
-chr1 21829800 21830000 R100049
-chr1 21830000 21830200 R100050
diff -r 99102499271a -r 3651f1592f3f test-data/chromosome_windows.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/chromosome_windows.txt Wed Jan 31 14:13:11 2018 -0500
@@ -0,0 +1,1 @@
+chr1 0 50
diff -r 99102499271a -r 3651f1592f3f test-data/chromosomes.bed
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/chromosomes.bed Wed Jan 31 14:13:11 2018 -0500
@@ -0,0 +1,50 @@
+chr1 21819600 21819800 R100001
+chr1 21819800 21820000 R100002
+chr1 21820000 21820200 R100003
+chr1 21820200 21820400 R100004
+chr1 21820400 21820600 R100005
+chr1 21820600 21820800 R100006
+chr1 21820800 21821000 R100007
+chr1 21821000 21821200 R100008
+chr1 21821200 21821400 R100009
+chr1 21821400 21821600 R100010
+chr1 21821600 21821800 R100011
+chr1 21821800 21822000 R100012
+chr1 21822000 21822200 R100013
+chr1 21822200 21822400 R100014
+chr1 21822400 21822600 R100015
+chr1 21822600 21822800 R100016
+chr1 21822800 21823000 R100017
+chr1 21823000 21823200 R100018
+chr1 21823200 21823400 R100019
+chr1 21823400 21823600 R100020
+chr1 21823600 21823800 R100021
+chr1 21823800 21824000 R100022
+chr1 21824000 21824200 R100023
+chr1 21824200 21824400 R100024
+chr1 21824400 21824600 R100025
+chr1 21824600 21824800 R100026
+chr1 21824800 21825000 R100027
+chr1 21825000 21825200 R100028
+chr1 21825200 21825400 R100029
+chr1 21825400 21825600 R100030
+chr1 21825600 21825800 R100031
+chr1 21825800 21826000 R100032
+chr1 21826000 21826200 R100033
+chr1 21826200 21826400 R100034
+chr1 21826400 21826600 R100035
+chr1 21826600 21826800 R100036
+chr1 21826800 21827000 R100037
+chr1 21827000 21827200 R100038
+chr1 21827200 21827400 R100039
+chr1 21827400 21827600 R100040
+chr1 21827600 21827800 R100041
+chr1 21827800 21828000 R100042
+chr1 21828000 21828200 R100043
+chr1 21828200 21828400 R100044
+chr1 21828400 21828600 R100045
+chr1 21829000 21829200 R100046
+chr1 21829400 21829600 R100047
+chr1 21829600 21829800 R100048
+chr1 21829800 21830000 R100049
+chr1 21830000 21830200 R100050
diff -r 99102499271a -r 3651f1592f3f test-data/output.html
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/output.html Wed Jan 31 14:13:11 2018 -0500
@@ -0,0 +1,7 @@
+Files prepared for IDEAS
+
\ No newline at end of file
diff -r 99102499271a -r 3651f1592f3f test-data/output.ideaspre
--- a/test-data/output.ideaspre Wed Jan 31 08:22:43 2018 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,9 +0,0 @@
-Files prepared for IDEAS
-
-
-
\ No newline at end of file
diff -r 99102499271a -r 3651f1592f3f test-data/tmp.tar
Binary file test-data/tmp.tar has changed