Mercurial > repos > pitagora > test_rna_seq_02
changeset 0:20dae34ead93 default tip
commit
author | pitagora <ryota.yamanaka@riken.jp> |
---|---|
date | Thu, 16 Apr 2015 12:55:03 +0900 |
parents | |
children | |
files | core_DEGA.R core_bam2readcount.R core_boxplot.R core_enrichment.R core_filtergenes.R core_get_PPI.R core_heatmapplot.R core_normalization.R core_tsplot.R exec_DEGA.R exec_DEGA.xml exec_bam2readcount.R exec_bam2readcount.xml exec_boxplot.R exec_boxplot.xml exec_enrichment.R exec_enrichment.xml exec_filtergenes.R exec_filtergenes.xml exec_get_PPI.R exec_get_PPI.xml exec_heatmapplot.R exec_heatmapplot.xml exec_normalization.R exec_normalization.xml exec_tsplot.R exec_tsplot.xml exec_upload.py exec_upload.xml |
diffstat | 29 files changed, 2021 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/core_DEGA.R Thu Apr 16 12:55:03 2015 +0900 @@ -0,0 +1,112 @@ +rm( list=ls(all=TRUE) ) # clean up R workspace + +#### Basic configuration #### +homedir <- Sys.getenv("HOME") +corefile_dir <- paste(homedir,"/galaxy-dist/tools/ske/",sep="") +args <- commandArgs(TRUE) +if( length(args)==0 ) { + print("No arguments supplied.") +} else { + for( i in 1:length(args) ) { + print( args[i] ) + } +} + +## configuration ## +#methods <- c("edgeR","DESeq2") +method <- args[1] + +cdir <- corefile_dir +## read table ## +tbln <- args[2] +tbl <- read.table(tbln,sep=",",header=T) +target_ids <- which(as.vector(tbl$analysis)=="yes") +sIDs <- tbl$sampleID[target_ids] +group <- tbl$class[target_ids] + +times <- tbl$time[target_ids] +units <- tbl$unit[target_ids] +time_points <- unique( paste(times,units,sep="") ) +if( length(time_points)==1 ) { + ## no time series data ## + times <- rep("single",length=length(times)) + time_points <- unique(times) +} + +## read normalized matrix ## +datafile <- args[3] +mat <- read.table(datafile,sep="\t",header=T) +data_sIDs <- colnames(mat) +gsyms <- rownames( mat ) # gene symbols + +DEG_lst <- c() +count <- 1 +for( time in unique(times) ) { + ## get sample ID ## + tp <- time_points[count] + count <- count+1 + time_ids <- which(time==times) + # group 1 # + group_ids <- which(group==1) + ids <- intersect(time_ids,group_ids) + g1_samples <- as.vector( sIDs[ids] ) + data_ids <- c() + for( s in g1_samples ) { + data_ids <- c(data_ids,which( s==data_sIDs )) + } + rc_data1 <- mat[,data_ids] + # group 2 # + group_ids <- which(group==2) + ids <- intersect(time_ids,group_ids) + g2_samples <- as.vector( sIDs[ids] ) + data_ids <- c() + for( s in g2_samples ) { + data_ids <- c(data_ids,which( data_sIDs==s )) + } + rc_data2 <- mat[,data_ids] + + ## combine data-matrix ## + rc_data <- cbind(rc_data1,rc_data2) + rc_data_cl <- c(rep(1,ncol(rc_data1)),rep(2,ncol(rc_data2))) + + if( method=="edgeR" ) { + ## edge R ## + library(edgeR) + d <- DGEList(counts=rc_data,group=rc_data_cl) + d <- calcNormFactors(d) + d <- estimateCommonDisp(d) + d <- estimateTagwiseDisp(d) + out <- exactTest(d) + fdr <- p.adjust(out$table$PValue, method="BH") + ranked <- rank(fdr) + res <- cbind(rownames(rc_data),out$table$logFC,fdr,out$table$PValue) + + } else if( method=="DESeq2" ) { + ## DESeq2 ## + library(DESeq2) + coldat <- DataFrame(grp=factor(rc_data_cl)) + rc_data <- floor(rc_data) # to be integer + dds <- DESeqDataSetFromMatrix(rc_data,colData=coldat,design = ~ grp) + dds <- DESeq(dds) + deseq2_res <- results(dds,pAdjustMethod="BH") + fdr <- deseq2_res$padj + ranked <- rank(fdr) + res <- cbind(rownames(rc_data),as.vector(deseq2_res$log2FoldChange),as.vector(deseq2_res$padj),as.vector(deseq2_res$pvalue)) + } + colnames(res) <- c("gene","logFC","FDR","pvalue") + + ## save results ## + sfn <- paste("DEG_",tp,".txt",sep="") + write.table(res[order(ranked),],sfn,sep="\t",append=F,quote=F,row.names=F) + DEG_lst <- c(DEG_lst,sfn) +} +DEGs <- paste(DEG_lst,collapse=" ") +#cmd <- paste("tar -czf DEG.tar.gz ",DEGs,paste="") +cmd <- paste("zip DEG.zip ",DEGs,paste="") +system(cmd) + +savefile_lst <- c() +savefilename <- "DEG.zip" +savefile_lst <- c(savefile_lst,savefilename) +sfn <- paste(corefile_dir,"/savefiles.txt",sep="") +write.table(savefile_lst,sfn,quote=F,row.names=F,col.names=F)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/core_bam2readcount.R Thu Apr 16 12:55:03 2015 +0900 @@ -0,0 +1,54 @@ +rm( list=ls(all=TRUE) ) # clean up R workspace + +library(Rsubread) +library(annotate) + +homedir <- Sys.getenv("HOME") +corefile_dir <- paste(homedir,"/galaxy-dist/tools/ske/",sep="") +args <- commandArgs(TRUE) +if( length(args)==0 ) { + print("No arguments supplied.") +} else { + for( i in 1:length(args) ) { + print( args[i] ) + } +} + +inputfile <- args[1] +annotation <- args[2] +if( annotation=="mm9" ) { + library(org.Mm.eg.db) + organism_db <- "org.Mm.eg" +} else if( annotation=="hg19" ) { + library(org.Hs.eg.db) + orgranism_db <- "org.Hs.eg" +} +geneid <- symbol <- "sample" + +## gene ID and read count file ## +x <- featureCounts(files=inputfile,annot.inbuilt=annotation,nthreads=5) +rc_geneID <- paste(geneid,"_rcid.txt",sep="") +write.table(x$counts,rc_geneID,quote=F,col.names=F,sep="\t") + +## conversion gene ID to gene symbols ## +x <- read.table(rc_geneID,header=F) +ID <- x[,1] +rc <- x[,2] +ID <- as.character(ID) +Sym <- lookUp(ID,organism_db,"SYMBOL") +Sym <- unlist(Sym) +Sym <- as.vector(Sym) +x <- cbind(Sym,rc) +x <- na.omit(x) + +## save gene-symbol and read cound file ## +rc_geneSym <- paste(symbol,"_rcsym.txt",sep="") +write.table(x,rc_geneSym,quote=F,col.names=F,row.names=F,sep="\t") + +## move created files into a known directory, and store file-names ## +savefile_lst <- c() +savefilename <- rc_geneSym +savefile_lst <- c(savefile_lst,savefilename) +sfn <- paste(corefile_dir,"/savefiles.txt",sep="") +write.table(savefile_lst,sfn,quote=F,row.names=F,col.names=F) +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/core_boxplot.R Thu Apr 16 12:55:03 2015 +0900 @@ -0,0 +1,141 @@ +rm( list=ls(all=TRUE) ) # clean up R workspace +library(ggplot2) +library(gridExtra) + +#### Basic configuration #### +homedir <- Sys.getenv("HOME") +corefile_dir <- paste(homedir,"/galaxy-dist/tools/ske/",sep="") +args <- commandArgs(TRUE) +if( length(args)==0 ) { + print("No arguments supplied.") +} else { + for( i in 1:length(args) ) { + print( args[i] ) + } +} + +single_boxplot <- function( tp, df ) { + b <- ggplot(df,aes(label,value)) + geom_boxplot(aes(fill=label)) + labs(x=tp,y="gene expression") + theme(legend.position="none") + return( b ) +} + +pdfmake <- function( gsym, nr=2, nc=2 ) { + title <- paste("Gene expression of ",gsym,sep="") + num <- nr*nc + itr <- floor((N-1)/num) + mod <- N-itr*num + input <- "" + if( itr>0 ) { + for( i in 1:itr ) { + ge <- "pl <- grid.arrange(" + for( j in 1:num ) { + ge <- paste(ge,"p",(i-1)*num+j,",",sep="") + } + ge <- paste(ge,"main=","\"",title,"\",","nrow=",nr,",ncol=",nc,")",sep="") + sfn <- paste("sep",formatC(i,width=3,flag="0"),".pdf",sep="") + pdf(sfn) + eval(parse(text=ge)) + dev.off() + input <- paste(input,sfn,sep=" ") + } + } + if( mod>0 ) { + ge <- "grid.arrange(" + for( j in 1:mod ) { + ge <- paste(ge,"p",itr*num+j,",",sep="") + } + ge <- paste(ge,"main=","\"",title,"\",","nrow=",nr,",ncol=",nc,")",sep="") + } + sfn <- paste("sep",formatC(itr+1,width=3,flag="0"),".pdf",sep="") + pdf(sfn) + eval(parse(text=ge)) + dev.off() + input <- paste(input,sfn,sep=" ") + #ggsave(sfn,pl) + #mfn <- paste("ts_boxplot_",gsym,".pdf",sep="") + cmd <- paste("gs -dBATCH -dNOPAUSE -q -sDEVICE=pdfwrite -sOutputFile=boxplot.pdf",input,sep="") + system(cmd) + system("rm sep*.pdf") +} + +## configuration ## +nr <- 2 +nc <- 2 +gsym <- args[3] + +cdir <- getwd() +## read table ## +tbln <- args[1] +tbl <- read.table(tbln,sep=",",header=T) +target_ids <- which(as.vector(tbl$analysis)=="yes") +sIDs <- tbl$sampleID[target_ids] +group <- tbl$class[target_ids] +gname <- tbl$classname[target_ids] + +times <- tbl$time[target_ids] +units <- tbl$unit[target_ids] +time_points <- unique( paste(times,units,sep="") ) +if( length(time_points)==1 ) { + ## no time series data ## + times <- rep("boxplot",length=length(times)) + time_points <- unique(times) + nc <- 1 + nr <- 1 +} + +## read normalized matrix ## +datafile <- args[2] +mat <- read.table(datafile,sep="\t",header=T) +data_sIDs <- colnames(mat) +gsyms <- rownames( mat ) # gene symbols + +gid <- which( gsym==gsyms ) +if( length(gid)==0 ) { + print( "No such gene symbol is found. Quit boplot." ) + quit() +} + +N <- length(unique(times)) +count <- 1 +for( time in unique(times) ) { + tp <- time_points[count] + time_ids <- which(time==times) + ## remove low copy genes ## + # group 1 # + group_ids <- which(group==1) + gn1 <- unique(gname[group_ids]) + ids <- intersect(time_ids,group_ids) + g1_samples <- as.vector( sIDs[ids] ) + data_ids <- c() + for( s in g1_samples ) { + data_ids <- c(data_ids,which( s==data_sIDs )) + } + rc_data1 <- as.numeric( mat[gid,data_ids] ) + df1 <- data.frame(time=time,value=rc_data1,label=gn1) + # group 2 # + group_ids <- which(group==2) + gn2 <- unique(gname[group_ids]) + ids <- intersect(time_ids,group_ids) + g2_samples <- as.vector( sIDs[ids] ) + data_ids <- c() + for( s in g2_samples ) { + data_ids <- c(data_ids,which( data_sIDs==s )) + } + rc_data2 <- as.numeric( mat[gid,data_ids] ) + df2 <- data.frame(time=time,value=rc_data2,label=gn2) + print(df1) + df_all <- rbind(df1,df2) + cmd <- paste("p",count," <- single_boxplot(tp,df_all)",sep="") + eval(parse(text=cmd)) + count <- count+1 +} + +## boxplot ## +pdfmake(gsym,nr=nr,nc=nc) +## move created files into a known directory, and store file-names ## +savefile_lst <- c() +savefilename <- "boxplot.pdf" +savefile_lst <- c(savefile_lst,savefilename) +sfn <- paste(corefile_dir,"/savefiles.txt",sep="") +write.table(savefile_lst,sfn,quote=F,row.names=F,col.names=F) +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/core_enrichment.R Thu Apr 16 12:55:03 2015 +0900 @@ -0,0 +1,288 @@ +rm( list=ls(all=TRUE) ) # clean up R workspace + +#library(pathview) +library(org.Mm.eg.db) +library(org.Hs.eg.db) +library(gage) +data(kegg.gs) +data(go.gs) +library(gageData) +data(go.sets.mm) # mouse GO dataset +data(go.subs.mm) # mouse GO subcategory datasets +data(go.sets.hs) # mouse GO dataset +data(go.subs.hs) # mouse GO subcategory datasets +#same.dir=F +#ssaTest=gs.zTest / gs.KSTest +#use.fold=F +#rank.test=T +#compare="unpaired" +#use.stouffer=FALSE + +#### Basic configuration #### +homedir <- Sys.getenv("HOME") +corefile_dir <- paste(homedir,"/galaxy-dist/tools/ske/",sep="") +args <- commandArgs(TRUE) +if( length(args)==0 ) { + print("No arguments supplied.") +} else { + for( i in 1:length(args) ) { + print( args[i] ) + } +} + +sptype <- args[3] + +#dtypes <- c("ALL","KEGG","GOBP","GOMF","GOCC") +dtype <- args[4] + +## configuration ## + +cdir <- getwd() +## read table ## +tbln <- args[1] +tbl <- read.table(tbln,sep=",",header=T) +target_ids <- which(as.vector(tbl$analysis)=="yes") +sIDs <- tbl$sampleID[target_ids] +group <- tbl$class[target_ids] + +times <- tbl$time[target_ids] +units <- tbl$unit[target_ids] +time_points <- unique( paste(times,units,sep="") ) +if( length(time_points)==1 ) { + ## no time series data ## + times <- rep("single",length=length(times)) + time_points <- unique(times) +} + +## read normalized matrix ## +datafile <- args[2] +mat <- read.table(datafile,sep="\t",header=T) +data_sIDs <- colnames(mat) +gsyms <- rownames( mat ) # gene symbols + +if( sptype=="mm" ) { + EntrezIDs <- unlist(mget(as.character(gsyms),org.Mm.egSYMBOL2EG,ifnotfound=NA)) +} else if( sptype=="hs" ) { + EntrezIDs <- unlist(mget(as.character(gsyms),org.Hs.egSYMBOL2EG,ifnotfound=NA)) +} + +# remove +over_genes <- setdiff(names(EntrezIDs),gsyms) # "Trav16n1" "Trav16n2" "Aqp41" "Aqp42" "Snord1161" "Snord1162" + +if( length(over_genes)>0 ) { + rm_ids <- c() + for( ov in over_genes ) { + rm_id <- which( names(EntrezIDs)==ov ) + rm_ids <- c(rm_ids,rm_id) + } + #print( rm_ids ) + ch_ids <- c() + for( ch in c("Trav16n1","Aqp41","Snord116l1") ) { + ch_id <- which( names(EntrezIDs)==ch ) + ch_ids <- c(ch_ids,ch_id) + } + #print( ch_ids ) + names(ch_ids) <- c("Trav16n","Aqp4","Snord116") + eids <- c(as.vector(EntrezIDs[-rm_ids]),ch_ids) +} else { + eids <- as.vector(EntrezIDs) +} + +savefiles <- c() +count <- 1 +for( time in unique(times) ) { + ## get sample ID ## + tp <- time_points[count] + count <- count+1 + time_ids <- which(time==times) + # group 1 # + gids <- which(group==1) + g1ids <- intersect(time_ids,gids) + g1_samples <- as.vector( sIDs[g1ids] ) + data_ids <- c() + for( s in g1_samples ) { + data_ids <- c(data_ids,which( s==data_sIDs )) + } + rc_data1 <- mat[,data_ids] + # group 2 # + gids <- which(group==2) + g2ids <- intersect(time_ids,gids) + g2_samples <- as.vector( sIDs[g2ids] ) + data_ids <- c() + for( s in g2_samples ) { + data_ids <- c(data_ids,which( data_sIDs==s )) + } + rc_data2 <- mat[,data_ids] + ## combine data-matrix ## + rc_data <- as.matrix(cbind(rc_data1,rc_data2)) + rownames(rc_data) <- eids + colnames(rc_data) <- c(g1_samples,g2_samples) + refids <- rep(1,length=length(g1ids)) + samids <- rep(2,length=length(g1ids)) + + ## GO enrichment ## + if( dtype=="GOBP" ) { + # Biological Process # + if( sptype=="mm" ) { + ORA_GOBP <- gage(rc_data,gsets=go.sets.mm[go.subs.mm$BP],ref=refids,samp=samids,compare="unpaired") + } else if( sptype=="hs" ) { + ORA_GOBP <- gage(rc_data,gsets=go.sets.hs[go.subs.hs$BP],ref=refids,samp=samids,compare="unpaired") + } + ## greater ## + sfn <- paste(dtype,"_",tp,"_greater.txt",sep="") + write.table(ORA_GOBP$greater[,4],sfn,sep="\t",quote=F,col.names="ontology\tqvalue") + savefiles <- c(savefiles,sfn) + ## less ## + sfn <- paste(dtype,"_",tp,"_less.txt",sep="") + write.table(ORA_GOBP$less[,4],sfn,sep="\t",quote=F,col.names="ontology\tqvalue") + savefiles <- c(savefiles,sfn) + } else if( dtype=="MF" ) { + # Molecular Function # + if( sptype=="mm" ) { + ORA_GOMF <- gage(rc_data,gsets=go.sets.mm[go.subs.mm$MF],ref=refids,samp=samids,compare="unpaired") + } else if( sptype=="hs" ) { + ORA_GOMF <- gage(rc_data,gsets=go.sets.hs[go.subs.hs$MF],ref=refids,samp=samids,compare="unpaired") + } + ## greater ## + sfn <- paste(dtype,"_",tp,"_greater.txt",sep="") + write.table(ORA_GOMF$greater[,4],sfn,sep="\t",quote=F,col.names="ontology\tqvalue") + savefiles <- c(savefiles,sfn) + ## less ## + sfn <- paste(dtype,"_",tp,"_less.txt",sep="") + write.table(ORA_GOMF$less[,4],sfn,sep="\t",quote=F,col.names="ontology\tqvalue") + savefiles <- c(savefiles,sfn) + } else if( dtype=="CC" ) { + # Cellular component # + if( sptype=="mm" ) { + ORA_GOCC <- gage(rc_data,gsets=go.sets.mm[go.subs.mm$CC],ref=refids,samp=samids,compare="unpaired") + } else if( sptype=="hs" ) { + ORA_GOCC <- gage(rc_data,gsets=go.sets.hs[go.subs.hs$CC],ref=refids,samp=samids,compare="unpaired") + } + ## greater ## + sfn <- paste(dtype,"_",tp,"_greater.txt",sep="") + write.table(ORA_GOCC$greater[,4],sfn,sep="\t",quote=F,col.names="ontology\tqvalue") + savefiles <- c(savefiles,sfn) + ## less ## + sfn <- paste(dtype,"_",tp,"_less.txt",sep="") + write.table(ORA_GOCC$less[,4],sfn,sep="\t",quote=F,col.names="ontology\tqvalue") + savefiles <- c(savefiles,sfn) + } else if( dtype=="KEGG" ) { + ## KEGG enrichment ## + if( sptype=="mm" ) { + kg.mmu <- kegg.gsets("mmu") + kegg.gs <- kg.mmu$kg.sets[kg.mmu$sigmet.idx] + ORA_KEGG <- gage(rc_data,gsets=kegg.gs,ref=refids,samp=samids,compare="unpaired") + } else if( sptype=="hs" ) { + kg.hs <- kegg.gsets("hsa") + kegg.gs <- kg.hs$kg.sets[kg.hs$sigmet.idx] + ORA_KEGG <- gage(rc_data,gsets=kegg.gs,ref=refids,samp=samids,compare="unpaired") + } + #Ess_ORA_KEGG <- esset.grp(ORA_KEGG,rc_data,gsets=kegg.gs,ref=refids,samp=samids,test4up=T,output=T,outname="Ess_ORA_KEGGup",make.plot=F) + #print(head(Ess_ORA_KEGGup$coreGeneSets,4)) + ## greater ## + sfn <- paste(dtype,"_",tp,"_greater.txt",sep="") + write.table(ORA_KEGG$greater[,4],sfn,sep="\t",quote=F,col.names="ontology\tqvalue") + savefiles <- c(savefiles,sfn) + ## less ## + sfn <- paste(dtype,"_",tp,"_less.txt",sep="") + write.table(ORA_KEGG$less[,4],sfn,sep="\t",quote=F,col.names="ontology\tqvalue") + savefiles <- c(savefiles,sfn) + #### Pathway view #### + #mn <- min(length(refids),length(samids)) + #diff <- rc_data[,refids[1:mn]]-rc_data[,samids[1:mn]] + #path <- c("mmu04630 Jak-STAT signaling pathway") + #path_id <- substr(path,1,8) + ## native KEGG view ## + #sfn <- paste("pathview_",tp,".png",sep="") + #png(sfn,width=1200,height=1200) + #pvout_list <- sapply(path_ids, function(pid) pathview(gene.data=diff[,1:mn],pathway.id=pid,species="mmu")) + #dev.off() + #cmd <- paste("mv ",path_id,".pathview.multi.ng ",sfn,sep="") + #system(cmd) + #savefiles <- c(savefiles,sfn) + } else if( dtype=="ALL" ) { + # Biological Process # + if( sptype=="mm" ) { + ORA_GOBP <- gage(rc_data,gsets=go.sets.mm[go.subs.mm$BP],ref=refids,samp=samids,compare="unpaired") + } else if( sptype=="hs" ) { + ORA_GOBP <- gage(rc_data,gsets=go.sets.hs[go.subs.hs$BP],ref=refids,samp=samids,compare="unpaired") + } + ## greater ## + sfn <- paste("GOBP_",tp,"_greater.txt",sep="") + write.table(ORA_GOBP$greater[,4],sfn,sep="\t",quote=F,col.names="ontology\tqvalue") + savefiles <- c(savefiles,sfn) + ## less ## + sfn <- paste("GOBP_",tp,"_less.txt",sep="") + write.table(ORA_GOBP$less[,4],sfn,sep="\t",quote=F,col.names="ontology\tqvalue") + savefiles <- c(savefiles,sfn) + # Molecular Function # + if( sptype=="mm" ) { + ORA_GOMF <- gage(rc_data,gsets=go.sets.mm[go.subs.mm$MF],ref=refids,samp=samids,compare="unpaired") + } else if( sptype=="hs" ) { + ORA_GOMF <- gage(rc_data,gsets=go.sets.hs[go.subs.hs$MF],ref=refids,samp=samids,compare="unpaired") + } + ## greater ## + sfn <- paste("GOMF_",tp,"_greater.txt",sep="") + write.table(ORA_GOMF$greater[,4],sfn,sep="\t",quote=F,col.names="ontology\tqvalue") + savefiles <- c(savefiles,sfn) + ## less ## + sfn <- paste("GOMF_",tp,"_less.txt",sep="") + write.table(ORA_GOMF$less[,4],sfn,sep="\t",quote=F,col.names="ontology\tqvalue") + # Cellular component # + if( sptype=="mm" ) { + ORA_GOCC <- gage(rc_data,gsets=go.sets.mm[go.subs.mm$CC],ref=refids,samp=samids,compare="unpaired") + } else if( sptype=="hs" ) { + ORA_GOCC <- gage(rc_data,gsets=go.sets.hs[go.subs.hs$CC],ref=refids,samp=samids,compare="unpaired") + } + ## greater ## + sfn <- paste("GOCC_",tp,"_greater.txt",sep="") + write.table(ORA_GOCC$greater[,4],sfn,sep="\t",quote=F,col.names="ontology\tqvalue") + savefiles <- c(savefiles,sfn) + ## less ## + sfn <- paste("GOCC_",tp,"_less.txt",sep="") + write.table(ORA_GOCC$less[,4],sfn,sep="\t",quote=F,col.names="ontology\tqvalue") + ## KEGG enrichment ## + if( sptype=="mm" ) { + kg.mmu <- kegg.gsets("mmu") + kegg.gs <- kg.mmu$kg.sets[kg.mmu$sigmet.idx] + ORA_KEGG <- gage(rc_data,gsets=kegg.gs,ref=refids,samp=samids,compare="unpaired") + } else if( sptype=="hs" ) { + kg.hs <- kegg.gsets("hs") + kegg.gs <- kg.hs$kg.sets[kg.hs$sigmet.idx] + ORA_KEGG <- gage(rc_data,gsets=kegg.gs,ref=refids,samp=samids,compare="unpaired") + } + #Ess_ORA_KEGG <- esset.grp(ORA_KEGG,rc_data,gsets=kegg.gs,ref=refids,samp=samids,test4up=T,output=T,outname="Ess_ORA_KEGGup",make.plot=F) + #print(head(Ess_ORA_KEGGup$coreGeneSets,4)) + ## greater ## + sfn <- paste("KEGG_",tp,"_greater.txt",sep="") + write.table(ORA_KEGG$greater[,4],sfn,sep="\t",quote=F,col.names="ontology\tqvalue") + savefiles <- c(savefiles,sfn) + ## less ## + sfn <- paste("KEGG_",tp,"_less.txt",sep="") + write.table(ORA_KEGG$less[,4],sfn,sep="\t",quote=F,col.names="ontology\tqvalue") + savefiles <- c(savefiles,sfn) + #### Pathway view #### + #mn <- min(length(refids),length(samids)) + #diff <- rc_data[,refids[1:mn]]-rc_data[,samids[1:mn]] + #path <- c("mmu04630 Jak-STAT signaling pathway") + #path_id <- substr(path,1,8) + ## native KEGG view ## + #sfn <- paste("pathview_",tp,".png",sep="") + #png(sfn,width=1200,height=1200) + #pvout_list <- sapply(path_id, function(pid) pathview(gene.data=diff[,1:mn],pathway.id=pid,species="mmu")) + #dev.off() + #cmd <- paste("mv ",path_id,".pathview.multi.png ",sfn,sep="") + #system(cmd) + #savefiles <- c(savefiles,sfn) + } +} +# zip all files +cmd <- paste("zip enrich.zip ",paste(savefiles,collapse=" "),sep="") +system(cmd) + +## save ## +savefile_lst <- c() +savefilename <- "enrich.zip" +savefile_lst <- c(savefile_lst,savefilename) +sfn <- paste(corefile_dir,"/savefiles.txt",sep="") +write.table(savefile_lst,sfn,quote=F,row.names=F,col.names=F)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/core_filtergenes.R Thu Apr 16 12:55:03 2015 +0900 @@ -0,0 +1,178 @@ +rm( list=ls(all=TRUE) ) # clean up R workspace + +is_integer0 <- function(x) { + is.integer(x) && length(x) == 0L +} + +remove_lowcopy_genes <- function( givengenes, rcmat ) { + ids <- c() + for( g in givengenes ) { + id <- which( g==gsyms ) + ids <- c(ids,id) + } + mean_rcmat <- rowMeans( rcmat[ids,] ) + lowcopy_ids <- which( mean_rcmat<gexp_thr ) + if( is_integer0(lowcopy_ids)==TRUE ) { + res <- givengenes + } else { + res <- givengenes[-lowcopy_ids] + } + return( res ) +} + +#### Basic configuration #### +homedir <- Sys.getenv("HOME") +corefile_dir <- paste(homedir,"/galaxy-dist/tools/ske/",sep="") +args <- commandArgs(TRUE) +if( length(args)==0 ) { + print("No arguments supplied.") +} else { + for( i in 1:length(args) ) { + print( args[i] ) + } +} + +#### Basic configuration #### +is_remove_neggenes <- "no" +is_top100 <- "no" +neg_genes <- c("Mir","Snord","Snora","Gm","Rik","Trav16n") + +# cutoff values +gexp_thr <- as.numeric(args[4]) #1.0 +FDR_thr <- as.numeric(args[5]) #1.0e-5 +FC_up_thr <- as.numeric(args[6]) #1.0 +FC_down_thr <- as.numeric(args[7]) #-1.0 + +DEGzip <- args[3] +cmd <- paste("cp ",DEGzip," ",corefile_dir,sep="") +tmpdir <- getwd() +setwd(corefile_dir) +cmd <- paste("unzip -o ",DEGzip,sep="") +system(cmd) + +cdir <- getwd() +## read table ## +tbln <- args[1] +tbl <- read.table(tbln,sep=",",header=T) +target_ids <- which(as.vector(tbl$analysis)=="yes") +sIDs <- tbl$sampleID[target_ids] +group <- tbl$class[target_ids] + +times <- tbl$time[target_ids] +units <- tbl$unit[target_ids] +time_points <- unique( paste(times,units,sep="") ) +if( length(time_points)==1 ) { + ## no time series data ## + times <- rep("single",length=length(times)) + time_points <- unique(times) +} + +## read normalized matrix ## +datafile <- args[2] +mat <- read.table(datafile,sep="\t",header=T) +data_sIDs <- colnames(mat) +gsyms <- rownames( mat ) # gene symbols + +if( is_top100=="yes" ) { + #### Select signinficant genes on the basis of your given criterion #### + siggenes_all <- c() + count <- 1 + for( time in unique(times) ) { + tp <- time_points[count] + time_ids <- which(time==times) + count <- count+1 + rfn <- paste("DEG_",tp,".txt",sep="") + DEG <- read.csv(rfn,header=TRUE,sep="\t") + ## screening ## + siggenes <- as.vector( (DEG$gene)[1:99] ) + ## remove uninteresting gene families from plot ## + if( is_remove_neggenes=="yes" ) { + tmp <- siggenes + nc_ids <- c() + for( nc in neg_genes ) { + mtch_ids <- grep(nc,tmp) + if( length(mtch_ids)>0 ) { + nc_ids <- c(nc_ids,mtch_ids) + } + } + if( length(nc_ids)>0 ) { + siggenes <- tmp[-nc_ids] + } + } + ## save results ## + sfn <- paste("selected_genes_",tp,".txt",sep="") + write.table(siggenes,file=sfn,col.names="gene",row.names=F,quote=F) + ## all selected genes ## + siggenes_all <- union(siggenes_all,siggenes) + } +} else { + #### Select signinficant genes on the basis of your given criterion #### + siggenes_all <- c() + count <- 1 + for( time in unique(times) ) { + tp <- time_points[count] + time_ids <- which(time==times) + count <- count+1 + rfn <- paste("DEG_",tp,".txt",sep="") + DEG <- read.csv(rfn,header=TRUE,sep="\t") + ## screening ## + FDR_ids <- which( DEG$FDR<FDR_thr ) + FCup_ids <- which( DEG$logFC>FC_up_thr ) + FCdown_ids <- which( DEG$logFC<FC_down_thr ) + sigids <- intersect(FDR_ids,union(FCup_ids,FCdown_ids)) + siggenes <- as.vector( (DEG$gene)[sigids] ) + #print( siggenes ) + ## remove low copy genes ## + # group 1 # + group_ids <- which(group==1) + ids <- intersect(time_ids,group_ids) + g1_samples <- as.vector( sIDs[ids] ) + data_ids <- c() + for( s in g1_samples ) { + data_ids <- c(data_ids,which( s==data_sIDs )) + } + rc_data1 <- mat[,data_ids] + siggenes <- remove_lowcopy_genes(siggenes,rc_data1) + # group 2 # + group_ids <- which(group==2) + ids <- intersect(time_ids,group_ids) + g2_samples <- as.vector( sIDs[ids] ) + data_ids <- c() + for( s in g2_samples ) { + data_ids <- c(data_ids,which( data_sIDs==s )) + } + rc_data2 <- mat[,data_ids] + siggenes <- remove_lowcopy_genes(siggenes,rc_data2) + ## remove uninteresting gene families from plot ## + if( is_remove_neggenes=="yes" ) { + tmp <- siggenes + nc_ids <- c() + for( nc in neg_genes ) { + mtch_ids <- grep(nc,tmp) + if( length(mtch_ids)>0 ) { + nc_ids <- c(nc_ids,mtch_ids) + } + } + if( length(nc_ids)>0 ) { + siggenes <- tmp[-nc_ids] + } + } + ## save results ## + sfn <- paste("selected_genes_",tp,".txt",sep="") + write.table(siggenes,file=sfn,col.names="gene",row.names=F,quote=F) + ## all selected genes ## + siggenes_all <- union(siggenes_all,siggenes) + } +} +setwd(tmpdir) + +## save results ## +sfn <- "filtered_genes.txt" +write.table(siggenes_all,file=sfn,col.names="gene",row.names=F,quote=F) + +## move created files into a known directory, and store file-names ## +savefile_lst <- c() +savefilename <- sfn +savefile_lst <- c(savefile_lst,savefilename) +sfn <- paste(corefile_dir,"/savefiles.txt",sep="") +write.table(savefile_lst,sfn,quote=F,row.names=F,col.names=F)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/core_get_PPI.R Thu Apr 16 12:55:03 2015 +0900 @@ -0,0 +1,79 @@ +rm( list=ls(all=TRUE) ) # clean up R workspace + +library(STRINGdb) + +## basic configuration ## +homedir <- Sys.getenv("HOME") +corefile_dir <- paste(homedir,"/galaxy-dist/tools/ske/",sep="") +args <- commandArgs(TRUE) +if( length(args)==0 ) { + print("No arguments supplied.") +} else { + for( i in 1:length(args) ) { + print( args[i] ) + } +} + +hitnum <- 200 +#fig_size <- c(2400,2400) + +cdir <- getwd() +## read table ## +tbln <- args[1] +tbl <- read.table(tbln,sep=",",header=T) +target_ids <- which(as.vector(tbl$analysis)=="yes") +sIDs <- tbl$sampleID[target_ids] +group <- tbl$class[target_ids] + +times <- tbl$time[target_ids] +units <- tbl$unit[target_ids] +time_points <- unique( paste(times,units,sep="") ) +if( length(time_points)==1 ) { + ## no time series data ## + times <- rep("single",length=length(times)) + time_points <- unique(times) +} + +## read normalized matrix ## +datafile <- args[2] +mat <- read.table(datafile,sep="\t",header=T) +data_sIDs <- colnames(mat) +gsyms <- rownames( mat ) # gene symbols + +DEGzip <- args[3] +cmd <- paste("cp ",DEGzip," ",corefile_dir,sep="") +tmpdir <- getwd() +setwd(corefile_dir) +cmd <- paste("unzip -o ",DEGzip,sep="") +system(cmd) + +string_db <- STRINGdb$new(version="9_05",species=10090,input_directory=cdir,score_threshold=0) # mouse + +pdfs <- c() +for( tp in time_points ) { + rfn <- paste("DEG_",tp,".txt",sep="") + DEG <- read.csv(rfn,header=TRUE,sep="\t") + mapped <- string_db$map(DEG,"gene",removeUnmappedRows=TRUE) + mapped_pval05 <- string_db$add_diff_exp_color(subset(mapped,pvalue<0.05),logFcColStr="logFC") + payload_id <- string_db$post_payload(mapped_pval05$STRING_id,colors=mapped_pval05$color) + if( length(DEG$gene)<hitnum ) { + hitnum <- length(DEG$gene) + } + hits <- mapped$STRING_id[1:hitnum] + ## Pathway mapping ## + sfn <- paste("STRINGdb_PPI_network_",tp,".pdf",sep="") + pdf(sfn) + string_db$plot_network(hits,payload_id=payload_id) + dev.off() + pdfs <- c(pdfs,sfn) +} +savefilename <- "PPIs.zip" +cmd <- paste("zip ",savefilename," ",paste(pdfs,collapse=" "),sep="") +system(cmd) + +setwd(tmpdir) +## move created files into a known directory, and store file-names ## +savefile_lst <- c() +savefile_lst <- c(savefile_lst,savefilename) +sfn <- paste(corefile_dir,"/savefiles.txt",sep="") +write.table(savefile_lst,sfn,quote=F,row.names=F,col.names=F)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/core_heatmapplot.R Thu Apr 16 12:55:03 2015 +0900 @@ -0,0 +1,186 @@ +rm( list=ls(all=TRUE) ) # clean up R workspace + +library(gplots) # for heatmap and heatmap.2 +library(genefilter) # for genescale +library(RColorBrewer) + +#### Basic configuration #### +homedir <- Sys.getenv("HOME") +corefile_dir <- paste(homedir,"/galaxy-dist/tools/ske/",sep="") +args <- commandArgs(TRUE) +if( length(args)==0 ) { + print("No arguments supplied.") +} else { + for( i in 1:length(args) ) { + print( args[i] ) + } +} + +is_integer0 <- function(x) { + is.integer(x) && length(x) == 0L +} + +distEisen <- function( mat, method="spearman" ) { + dist <- as.dist(1-cor(mat, method=method)) + return( dist ) +} + +generate_heatmap_labels <- function( time_points ) { + clns <- c() + for( gn in c("group1","group2") ) { + for( tp in time_points ) { + new_name <- paste(gn,tp,sep="_") + clns <- c(clns,new_name) + } + } + return( clns ) +} + +calc_mean_gexp <- function( rowids, submat ) { + mean_mat <- rowMeans( submat[rowids,] ) + return( mean_mat ) +} + +#### plot hierarchical clustering and heatmap across time series #### +heatmapplot <- function( mat, hm_opts ) { + ## hierarchical tree : all ## + gene_tree <- hclust(distEisen(t(mat)),method=hm_opts$tree_method) # genes + sample_tree <- hclust(distEisen(mat),method=hm_opts$tree_method) # samples + dist_hm <- genescale(mat,axis=1,method=as.character(hm_opts$heatmap_method)) + ## heatmap ## + mycol <- rev(brewer.pal(11,"RdBu")) + sfn <- "heatmap.pdf" + #png(sfn,width=hm_opts$width,height=hm_opts$height) + par(mar=c(1,1,1,1)) # to include all information in a figure + pdf(sfn) + par(ps=hm_opts$ps) + e <- try( h <- heatmap.2(as.matrix(dist_hm),Rowv=as.dendrogram(gene_tree),Colv=NA,dendrogram=c("row"),scale="none",col=mycol,cexCol=1.0,cexRow=0.8,keysize=0.5,trace="none",density.info="none",main=hm_opts$title,margin=c(10,5)), + silent=FALSE) # try + if( class(e)=="try-error" ) { + print("Error in plot.new() : figure margins too large. Please reduce gene symbols.") + x <- rnorm(1000,mean=0.0,sd=1.0) + pdf("tsplot.pdf") + hist(x,main="Please reduce gene symbols. Quit Computation.",xlab="",ylab="") + dev.off() + savefilename <- "heatmap.pdf" + savefile_lst <- c() + savefile_lst <- c(savefile_lst,savefilename) + sfn <- paste(corefile_dir,"/savefiles.txt",sep="") + write.table(savefile_lst,sfn,quote=F,row.names=F,col.names=F) + quit() + } + dev.off() +} + +#### Basic configuration #### + +rfn <- args[3] +hc_methods <- list(ward="ward", single="single", complete="complete", average="average", mcquitty="mcquitty", median="median", centroid="centroid") +hc_method <- hc_methods$average +hm_opts <- list(width=800,height=1000,fig_ps=32,tree_method=hc_method,heatmap_method="Z",x_label="weeks",y_label="genes",title="Heatmap: group 1 vs group 2") + +cdir <- getwd() +## read table ## +tbln <- args[1] +tbl <- read.table(tbln,sep=",",header=T) +target_ids <- which(as.vector(tbl$analysis)=="yes") +sIDs <- tbl$sampleID[target_ids] +group <- tbl$class[target_ids] + +times <- tbl$time[target_ids] +units <- tbl$unit[target_ids] +time_points <- unique( paste(times,units,sep="") ) +if( length(time_points)==1 ) { + ## no time series data ## + times <- rep("mean",length=length(times)) + time_points <- unique(times) +} + +## read normalized matrix ## +datafile <- args[2] +mat <- read.table(datafile,sep="\t",header=T) +data_sIDs <- colnames(mat) +gsyms <- rownames( mat ) # gene symbols + +#rfn <- paste("selected_genes_",tp,".txt",sep="_") +siggenes_all <- as.vector( read.table(rfn,header=T)$gene ) +## remove not-found genes ## +all_sigids <- c() +rmg <- c() +rmg_ids <- c() +for( g in siggenes_all ) { + id <- which( g==gsyms ) + if( is_integer0(id)==TRUE ) { + rmg <- c(rmg,g) + rmg_ids <- c(rmg_ids,id) + } else { + all_sigids <- c(all_sigids,id) + } +} +print("Not found genes: ") +print( rmg ) +sfn <- "notfound_genes.txt" +write.table(rmg,sfn,row.names=F,col.names=F,quote=F) +siggenes_all <- gsyms[all_sigids] + +## get mean gene expression values ## +N <- length(unique(times)) +meanmat_ts1 <- matrix(NA,nrow=length(siggenes_all),ncol=N) +meanmat_ts2 <- matrix(NA,nrow=length(siggenes_all),ncol=N) +count <- 1 +for( time in unique(times) ) { + tp <- time_points[count] + time_ids <- which(time==times) + ## remove low copy genes ## + # group 1 # + group_ids <- which(group==1) + ids <- intersect(time_ids,group_ids) + g1_samples <- as.vector( sIDs[ids] ) + data_ids <- c() + for( s in g1_samples ) { + data_ids <- c(data_ids,which( s==data_sIDs )) + } + rc_data1 <- mat[,data_ids] + meanv1 <- calc_mean_gexp(all_sigids,rc_data1) + meanmat_ts1[,count] <- meanv1 + # group 2 # + group_ids <- which(group==2) + ids <- intersect(time_ids,group_ids) + g2_samples <- as.vector( sIDs[ids] ) + data_ids <- c() + for( s in g2_samples ) { + data_ids <- c(data_ids,which( data_sIDs==s )) + } + rc_data2 <- mat[,data_ids] + meanv2 <- calc_mean_gexp(all_sigids,rc_data2) + meanmat_ts2[,count] <- meanv2 + count <- count+1 +} +all_ts <- cbind(meanmat_ts1,meanmat_ts2) +rownames(all_ts) <- siggenes_all +colnames(all_ts) <- generate_heatmap_labels(time_points) + +## remove zero-variance genes +zero_variance_ids <- c() +for( i in 1:ncol(all_ts) ) { + v <- var(all_ts[,i]) + if( v==0.0 ) { + zero_variance_ids <- c(zero_variance_ids,i) + } +} +print( zero_variance_ids ) +if( length(zero_variance_ids)>0 ) { + siggenes_all <- siggenes_all[-zero_variance_ids] + all_ts <- all_ts[,-zero_variance_ids] +} + +## generate heatmap ## +heatmapplot(all_ts,hm_opts) + +## move created files into a known directory, and store file-names ## +savefile_lst <- c() +savefilename <- "heatmap.pdf" +savefile_lst <- c(savefile_lst,savefilename) +sfn <- paste(corefile_dir,"/savefiles.txt",sep="") +write.table(savefile_lst,sfn,quote=F,row.names=F,col.names=F) +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/core_normalization.R Thu Apr 16 12:55:03 2015 +0900 @@ -0,0 +1,79 @@ +rm( list=ls(all=TRUE) ) # clean up R workspace + +library(TCC) + +homedir <- Sys.getenv("HOME") +corefile_dir <- paste(homedir,"/galaxy-dist/tools/ske/",sep="") +args <- commandArgs(TRUE) +if( length(args)==0 ) { + print("No arguments supplied.") +} else { + for( i in 1:length(args) ) { + print( args[i] ) + } +} + +## configuration ## +methods <- c("edgeR","DESeq2") +method <- args[1] +# remove duplicated genes # +delgenes <- c("Trav16n") # duplication error in Rsubread + +cdir <- corefile_dir +## read table ## +tbln <- args[2] +tbl <- read.table(tbln,sep=",",header=T) +target_ids <- which(as.vector(tbl$analysis)=="yes") +sIDs <- tbl$sampleID[target_ids] +group <- tbl$class[target_ids] + +## extract zipped files ## +#setwd(cdir) +#file <- "data.zip" +file <- args[3] +cmd <- paste("unzip -o ",file,sep="") +system(cmd) +setwd("data") + +## read read-count data ## +readfile <- paste(sIDs[1],"_rcsym.txt",sep="") +data <- read.csv(readfile,sep="\t",header=F) +gsyms <- as.vector(data[,1]) +com_data <- as.vector(data[,2]) + +for( i in 2:length(sIDs) ) { + readfile <- paste(sIDs[i],"_rcsym.txt",sep="") + data <- read.csv(readfile,sep="\t",header=F) + com_data <- cbind( com_data,as.vector(data[,2]) ) +} +for( dg in delgenes ) { + dgid <- which( gsyms==dg ) + gsyms <- gsyms[-dgid[1]] + com_data <- com_data[-dgid[1],] +} +colnames(com_data) <- sIDs +rownames(com_data) <- gsyms + +tcc <- new("TCC",com_data,group) +if( method=="edgeR" ) { + tcc <- calcNormFactors(tcc,norm.method="edger",test.method="edger",iteration=1,FDR=0.1,floorPDEG=0.05) +} else if( method=="DESeq2" ) { + tcc <- calcNormFactors(tcc,norm.method="deseq2",test.method="deseq2",iteration=1,FDR=0.1,floorPDEG=0.05) +} +normalized_count <- getNormalizedData(tcc) + +setwd(corefile_dir) +sfn <- "rcmat.txt" +if( method=="edgeR" ) { + #write.table(t(normalized.count),file=sfn,quote=F,sep="\t") + write.table(normalized_count,file=sfn,quote=F,sep="\t") +} else if( method=="DESeq2" ) { + #write.table(t(normalized.count),file=sfn,quote=F,sep="\t") + write.table(normalized_count,file=sfn,quote=F,sep="\t") +} +## move created files into a known directory, and store file-names ## +savefile_lst <- c() +savefilename <- sfn +savefile_lst <- c(savefile_lst,savefilename) +sfn <- paste(corefile_dir,"/savefiles.txt",sep="") +write.table(savefile_lst,sfn,quote=F,row.names=F,col.names=F)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/core_tsplot.R Thu Apr 16 12:55:03 2015 +0900 @@ -0,0 +1,204 @@ +rm( list=ls(all=TRUE) ) # clean up R workspace + +library(ggplot2) +library(gridExtra) + +#### Basic configuration #### +homedir <- Sys.getenv("HOME") +corefile_dir <- paste(homedir,"/galaxy-dist/tools/ske/",sep="") +args <- commandArgs(TRUE) +if( length(args)==0 ) { + print("No arguments supplied.") +} else { + for( i in 1:length(args) ) { + print( args[i] ) + } +} + +is_integer0 <- function(x) { + is.integer(x) && length(x) == 0L +} + +calc_mean_gexp <- function( rowids, submat ) { + mean_mat <- rowMeans( submat[rowids,] ) + return( mean_mat ) +} + +tsplot <- function( rid=1, df=df, plot_opt=list(xlab="week",title="Group-1 (red) and Group-2 (green)",point_size=5,line_width=1,font_size=15,fig_size=3) ) { + + gname <- colnames(df)[rid] + xlab <- plot_opt$xlab + title <- plot_opt$title + fsize <- plot_opt$font_size + psize <- plot_opt$point_size + lwidth <- plot_opt$line_width + fig_name <- paste(gname,".png",sep="") + fig_size <- plot_opt$fig_size + df_single <- as.data.frame(cbind(time=df$time,value=df[,rid])) + df_single <- cbind(df_single,label=df$label) + rownames(df_single) <- rownames(df) + #colnames(df_single) <- c("time","value","label") + + p <- ggplot(aes(x=time,y=value,group=label,color=label),data=df_single) + p <- p + geom_point(size=psize) + geom_line(size=lwidth) + labs(x=xlab,y=gname) + p <- p + theme(axis.title.x=element_text(size=fsize),axis.title.y=element_text(size=fsize)) + p <- p + theme(axis.text.x=element_text(size=fsize),axis.text.y=element_text(size=fsize)) + p <- p + theme(legend.title=element_text(size=fsize-4),legend.text=element_text(size=fsize-4),legend.position="top") + #png(fig_name,width=fig_size,height=fig_size) + #print( p ) + #dev.off() + if( is_pngmake=="yes" ) { + ggsave(file=fig_name,width=fig_size,height=fig_size) + } + return( p ) +} + +pdfmake <- function(nr=3,nc=3) { + num <- nr*nc + itr <- floor((length(siggenes_all)-1)/num) + #mod <- (length(siggenes_all)-1)-itr*num + mod <- (length(siggenes_all))-itr*num + input <- "" + for( i in 1:itr ) { + ge <- "pl <- grid.arrange(" + for( j in 1:num ) { + ge <- paste(ge,"p",(i-1)*num+j,",",sep="") + } + ge <- paste(ge,"nrow=",nr,",ncol=",nc,")",sep="") + sfn <- paste("sep",formatC(i,width=3,flag="0"),".pdf",sep="") + pdf(sfn) + eval(parse(text=ge)) + dev.off() + input <- paste(input,sfn,sep=" ") + } + if( mod>0 ) { + ge <- "pl <- grid.arrange(" + for( j in 1:mod ) { + ge <- paste(ge,"p",itr*num+j,",",sep="") + } + ge <- paste(ge,"nrow=",nr,",ncol=",nc,")",sep="") + } + sfn <- paste("sep",formatC(itr+1,width=3,flag="0"),".pdf",sep="") + pdf(sfn) + eval(parse(text=ge)) + dev.off() + input <- paste(input,sfn,sep=" ") + #ggsave(sfn,pl) + cmd <- paste("gs -dBATCH -dNOPAUSE -q -sDEVICE=pdfwrite -sOutputFile=tsplot.pdf ",input,sep="") + system(cmd) + system("rm sep*.pdf") +} + +#### Basic configuration #### + +is_pngmake <- "no" +#rfn <- "selected_genes_all.txt" +rfn <- args[3] +plot_opt <- list(xlab="week",title="Spade (red) vs B6/J (green)",point_size=5,line_width=1,font_size=10,fig_size=3) + +cdir <- getwd() +## read table ## +tbln <- args[1] +tbl <- read.table(tbln,sep=",",header=T) +target_ids <- which(as.vector(tbl$analysis)=="yes") +sIDs <- tbl$sampleID[target_ids] +group <- tbl$class[target_ids] + +times <- tbl$time[target_ids] +units <- tbl$unit[target_ids] +time_points <- unique( paste(times,units,sep="") ) +if( length(time_points)==1 ) { + ## no time series data ## + print( "No time-course data is given. Quit Computation." ) + x <- rnorm(1000,mean=0.0,sd=1.0) + pdf("tsplot.pdf") + hist(x,main="No time-course data is given. Quit Computation.",xlab="",ylab="") + dev.off() + savefilename <- "tsplot.pdf" + savefile_lst <- c() + savefile_lst <- c(savefile_lst,savefilename) + sfn <- paste(corefile_dir,"/savefiles.txt",sep="") + write.table(savefile_lst,sfn,quote=F,row.names=F,col.names=F) + quit() +} + +## read normalized matrix ## +datafile <- args[2] +mat <- read.table(datafile,sep="\t",header=T) +data_sIDs <- colnames(mat) +gsyms <- rownames( mat ) # gene symbols + +#rfn <- paste("selected_genes_",tp,".txt",sep="_") +siggenes_all <- as.vector( read.table(rfn,header=T)$gene ) +## remove not-found genes ## +all_sigids <- c() +rmg <- c() +rmg_ids <- c() +for( g in siggenes_all ) { + id <- which( g==gsyms ) + if( is_integer0(id)==TRUE ) { + rmg <- c(rmg,g) + rmg_ids <- c(rmg_ids,id) + } else { + all_sigids <- c(all_sigids,id) + } +} +print("Not found genes: ") +print( rmg ) +sfn <- "notfound_genes.txt" +write.table(rmg,sfn,row.names=F,col.names=F,quote=F) +siggenes_all <- gsyms[all_sigids] + +N <- length(unique(times)) +meanmat_ts1 <- matrix(NA,nrow=length(siggenes_all),ncol=N) +meanmat_ts2 <- matrix(NA,nrow=length(siggenes_all),ncol=N) +count <- 1 +for( time in unique(times) ) { + tp <- time_points[count] + time_ids <- which(time==times) + ## remove low copy genes ## + # group 1 # + group_ids <- which(group==1) + ids <- intersect(time_ids,group_ids) + g1_samples <- as.vector( sIDs[ids] ) + data_ids <- c() + for( s in g1_samples ) { + data_ids <- c(data_ids,which( s==data_sIDs )) + } + rc_data1 <- mat[,data_ids] + meanv1 <- calc_mean_gexp(all_sigids,rc_data1) + meanmat_ts1[,count] <- meanv1 + # group 2 # + group_ids <- which(group==2) + ids <- intersect(time_ids,group_ids) + g2_samples <- as.vector( sIDs[ids] ) + data_ids <- c() + for( s in g2_samples ) { + data_ids <- c(data_ids,which( data_sIDs==s )) + } + rc_data2 <- mat[,data_ids] + meanv2 <- calc_mean_gexp(all_sigids,rc_data2) + meanmat_ts2[,count] <- meanv2 + count <- count+1 +} +rownames(meanmat_ts1) <- siggenes_all +rownames(meanmat_ts2) <- siggenes_all +df1 <- data.frame(time=times,t(meanmat_ts1),label="1") +df2 <- data.frame(time=times,t(meanmat_ts2),label="2") +all_ts <- rbind(df1,df2) + +#### plot time-series data #### +for( i in 2:(length(siggenes_all)+1) ) { + cmd <- paste("p",i-1," <- tsplot(rid=",i,",df=all_ts,plot_opt=plot_opt)",sep="") + eval(parse(text=cmd)) +} +## merge produced pictures into a pdf file ## +pdfmake(nr=3,nc=3) + +## move created files into a known directory, and store file-names ## +savefilename <- "tsplot.pdf" +savefile_lst <- c() +savefile_lst <- c(savefile_lst,savefilename) +sfn <- paste(corefile_dir,"/savefiles.txt",sep="") +write.table(savefile_lst,sfn,quote=F,row.names=F,col.names=F) +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/exec_DEGA.R Thu Apr 16 12:55:03 2015 +0900 @@ -0,0 +1,38 @@ +rm( list=ls(all=TRUE) ) # clean up R workspace + +homedir <- Sys.getenv("HOME") +errorlog <- paste(homedir,"/galaxy-dist/tools/ske/errorlog.txt",sep="") +write.table("Replace with a variable.",errorlog,quote=F,row.names=F,col.names=F) + +corefile_name <- "core_DEGA.R" + +suppressMessages(library(argparse)) +parser <- ArgumentParser(description="arg parser") +parser$add_argument("--input1", dest="input1", default="edgeR", required=TRUE) +parser$add_argument("--input2", dest="input2", default="table.csv", required=TRUE) +parser$add_argument("--input3", dest="input3", default="rcmat.txt", required=TRUE) +parser$add_argument("--output", dest="output", default="DEG.txt", required=TRUE) + +## Extract a value from each list, and then concatenate to obtain a space delimiated line ## +tmp <- parser$parse_args() +argsvec <- unlist(lapply(tmp,"[[",1)) +args <- paste(argsvec,collapse=" ") +corefile_dir <- paste(homedir,"/galaxy-dist/tools/ske/",sep="") +corefile <- paste(corefile_dir,corefile_name,sep="") +execlog <- paste(homedir,"/galaxy-dist/tools/ske/execlog.txt",sep="") +cmd <- paste("R CMD BATCH --no-save --no-restore \'--args ",args,"\' ",corefile," ",execlog,sep="") +system(cmd) +## output files ## +outputlocs <- c() +outputlocs <- c(outputlocs,tmp$output) +sfns <- read.table(paste(corefile_dir,"savefiles.txt",sep=""),header=F)$V1 +N <- length(sfns) +for( i in 1:N ) { + cmd <- paste("cp ",sfns[i]," ",corefile_dir,sep="") + system(cmd) + cmd <- paste("mv ",sfns[i]," ",outputlocs[i],sep="") + system(cmd) +} +## check the status of computation ## +write.table("Successfully computed.",errorlog,quote=F,row.names=F,col.names=F) +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/exec_DEGA.xml Thu Apr 16 12:55:03 2015 +0900 @@ -0,0 +1,24 @@ +<tool id="DEGA" name="STEP-04: DEG" version="1.0.0"> +<description>Differentially Expressed Gene Analysis</description> + <command interpreter="Rscript --vanilla"> + exec_DEGA.R + --input1 $input1 + --input2 $input2 + --input3 $input3 + --output $output + --output2 $output2 + </command> + <inputs> + <param name="input1" type="select" label="Method" > + <option value="edgeR">edgeR</option> + <option value="deseq2">DESeq2</option> + + </param> + <param name="input2" type="data" format="txt" label="Description table" /> + <param name="input3" type="data" format="txt" label="Normalized read count data" /> + </inputs> + <outputs> + <data name="output" format="zip" label="Zipped DEG files"/> + <data name="output2" format="txt" label="Execution log file"/> + </outputs> +</tool> \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/exec_bam2readcount.R Thu Apr 16 12:55:03 2015 +0900 @@ -0,0 +1,39 @@ +rm( list=ls(all=TRUE) ) # clean up R workspace + +homedir <- Sys.getenv("HOME") +errorlog <- paste(homedir,"/galaxy-dist/tools/ske/errorlog.txt",sep="") +write.table("Replace with a variable.",errorlog,quote=F,row.names=F,col.names=F) + +corefile_name <- "core_bam2readcount.R" + +suppressMessages(library(argparse)) +parser <- ArgumentParser(description="arg parser") +parser$add_argument('--input1', dest='input1', default='sample.bam', required=TRUE) +parser$add_argument('--input2', dest='input2', default='mm9', required=TRUE) +parser$add_argument('--output', dest='output', default='sample_rcsym.txt', required=TRUE) +parser$add_argument("--output2", dest="output2", default="execlog.txt", required=TRUE) + +## Extract a value from each list, and then concatenate to obtain a space delimiated line ## +tmp <- parser$parse_args() +argsvec <- unlist(lapply(tmp,"[[",1)) +args <- paste(argsvec,collapse=" ") +corefile_dir <- paste(homedir,"/galaxy-dist/tools/ske/",sep="") +corefile <- paste(corefile_dir,corefile_name,sep="") +execlog <- paste(homedir,"/galaxy-dist/tools/ske/execlog.txt",sep="") +cmd <- paste("R CMD BATCH --no-save --no-restore \'--args ",args,"\' ",corefile," ",execlog,sep="") +system(cmd) + +## execution log file ## +cmd <- paste("mv ",execlog," ",tmp$output2,sep="") +system(cmd) +## output files ## +outputlocs <- c() +outputlocs <- c(outputlocs,tmp$output) +sfns <- read.table(paste(corefile_dir,"savefiles.txt",sep=""),header=F)$V1 +N <- length(sfns) +for( i in 1:N ) { + cmd <- paste("mv ",sfns[i]," ",outputlocs[i],sep="") + system(cmd) +} +## check the status of computation ## +write.table("Successfully computed.",errorlog,quote=F,row.names=F,col.names=F)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/exec_bam2readcount.xml Thu Apr 16 12:55:03 2015 +0900 @@ -0,0 +1,21 @@ +<tool id="bam2readcount" name="STEP-01: BAM to Read-count" version="1.0.0"> +<description>Extract read-count from BAM file</description> + <command interpreter="Rscript --vanilla"> + exec_bam2readcount.R + --input1 $input1 + --input2 $input2 + --output $output + --output2 $output2 + </command> + <inputs> + <param name="input1" type="data" format="bam" label="BAM file" /> + <param name="input2" type="select" label="Reference dataset" > + <option value="mm9">mouse (UCSC mouse gene : mm9)</option> + <option value="hg19">human (UCSC homo sapiens gene (hg19)</option> + </param> + </inputs> + <outputs> + <data format="txt" name="output" label="output"/> + <data name="output2" format="txt" label="Execution log file"/> + </outputs> +</tool> \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/exec_boxplot.R Thu Apr 16 12:55:03 2015 +0900 @@ -0,0 +1,42 @@ +rm( list=ls(all=TRUE) ) # clean up R workspace + +homedir <- Sys.getenv("HOME") +errorlog <- paste(homedir,"/galaxy-dist/tools/ske/errorlog.txt",sep="") +write.table("Replace with a variable.",errorlog,quote=F,row.names=F,col.names=F) + +corefile_name <- "core_boxplot.R" + +suppressMessages(library(argparse)) +parser <- ArgumentParser(description="arg parser") +parser$add_argument("--input1", dest="input1", default="table.csv", required=TRUE) +parser$add_argument("--input2", dest="input2", default="rcmat.txt", required=TRUE) +parser$add_argument("--input3", dest="input3", default="Nanog", required=TRUE) +parser$add_argument("--output", dest="output", default="boxplot.pdf", required=TRUE) +parser$add_argument("--output2", dest="output2", default="execlog.txt", required=TRUE) + +## Extract a value from each list, and then concatenate to obtain a space delimiated line ## +tmp <- parser$parse_args() +argsvec <- unlist(lapply(tmp,"[[",1)) +args <- paste(argsvec,collapse=" ") +corefile_dir <- paste(homedir,"/galaxy-dist/tools/ske/",sep="") +corefile <- paste(corefile_dir,corefile_name,sep="") +execlog <- paste(homedir,"/galaxy-dist/tools/ske/execlog.txt",sep="") +cmd <- paste("R CMD BATCH --no-save --no-restore \'--args ",args,"\' ",corefile," ",execlog,sep="") +system(cmd) + +## execution log file ## +cmd <- paste("mv ",execlog," ",tmp$output2,sep="") +system(cmd) +## output files ## +outputlocs <- c() +outputlocs <- c(outputlocs,tmp$output) +sfns <- read.table(paste(corefile_dir,"savefiles.txt",sep=""),header=F)$V1 +N <- length(sfns) +for( i in 1:N ) { + cmd <- paste("cp ",sfns[i]," ",corefile_dir,sep="") + system(cmd) + cmd <- paste("mv ",sfns[i]," ",outputlocs[i],sep="") + system(cmd) +} +## check the status of computation ## +write.table("Successfully computed.",errorlog,quote=F,row.names=F,col.names=F)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/exec_boxplot.xml Thu Apr 16 12:55:03 2015 +0900 @@ -0,0 +1,20 @@ +<tool id="boxplot" name="STEP-06_2: Boxplot" version="1.0.0"> +<description>Boxplot of gene expression</description> + <command interpreter="Rscript --vanilla"> + exec_boxplot.R + --input1 $input1 + --input2 $input2 + --input3 $input3 + --output $output + --output2 $output2 + </command> + <inputs> + <param name="input1" type="data" format="txt" label="Description table" /> + <param name="input2" type="data" format="txt" label="Normalized read count data" /> + <param name="input3" size="30" type="text" value="Nanog" label="Gene symbol" /> + </inputs> + <outputs> + <data name="output" format="pdf" label="Box-plot"/> + <data name="output2" format="txt" label="Execution log file"/> + </outputs> +</tool> \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/exec_enrichment.R Thu Apr 16 12:55:03 2015 +0900 @@ -0,0 +1,44 @@ +rm( list=ls(all=TRUE) ) # clean up R workspace + +homedir <- Sys.getenv("HOME") +errorlog <- paste(homedir,"/galaxy-dist/tools/ske/errorlog.txt",sep="") +write.table("Replace with a variable.",errorlog,quote=F,row.names=F,col.names=F) + +corefile_name <- "core_enrichment.R" + +suppressMessages(library(argparse)) +parser <- ArgumentParser(description="arg parser") +parser$add_argument("--input1", dest="input1", default="table.csv", required=TRUE) +parser$add_argument("--input2", dest="input2", default="rcmat.txt", required=TRUE) +parser$add_argument("--input3", dest="input3", default="mm", required=TRUE) +parser$add_argument("--input4", dest="input4", default="KEGG", required=TRUE) +parser$add_argument("--output", dest="output", default="DEG.txt", required=TRUE) +parser$add_argument("--output2", dest="output2", default="execlog.txt", required=TRUE) + +## Extract a value from each list, and then concatenate to obtain a space delimiated line ## +tmp <- parser$parse_args() +argsvec <- unlist(lapply(tmp,"[[",1)) +args <- paste(argsvec,collapse=" ") +corefile_dir <- paste(homedir,"/galaxy-dist/tools/ske/",sep="") +corefile <- paste(corefile_dir,corefile_name,sep="") +execlog <- paste(homedir,"/galaxy-dist/tools/ske/execlog.txt",sep="") +cmd <- paste("R CMD BATCH --no-save --no-restore \'--args ",args,"\' ",corefile," ",execlog,sep="") +system(cmd) + +## execution log file ## +cmd <- paste("mv ",execlog," ",tmp$output2,sep="") +system(cmd) +## output files ## +outputlocs <- c() +outputlocs <- c(outputlocs,tmp$output) +sfns <- read.table(paste(corefile_dir,"savefiles.txt",sep=""),header=F)$V1 +N <- length(sfns) +for( i in 1:N ) { + cmd <- paste("cp ",sfns[i]," ",corefile_dir,sep="") + system(cmd) + cmd <- paste("mv ",sfns[i]," ",outputlocs[i],sep="") + system(cmd) +} +## check the status of computation ## +write.table("Successfully computed.",errorlog,quote=F,row.names=F,col.names=F) +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/exec_enrichment.xml Thu Apr 16 12:55:03 2015 +0900 @@ -0,0 +1,31 @@ +<tool id="enrichment" name="STEP-06_4: Enrichment" version="1.0.0"> +<description>Over Representation Analysis via GO and KEGG</description> + <command interpreter="Rscript --vanilla"> + exec_enrichment.R + --input1 $input1 + --input2 $input2 + --input3 $input3 + --input4 $input4 + --output $output + --output2 $output2 + </command> + <inputs> + <param name="input1" type="data" format="txt" label="Description table" /> + <param name="input2" type="data" format="txt" label="Normalized read count data" /> + <param name="input3" type="select" label="Species" > + <option value="mm">Mouse (Mus musculus)</option> + <option value="hs">Human (Homo sapiens)</option> + </param> + <param name="input4" type="select" label="Method" > + <option value="KEGG">KEGG</option> + <option value="GOBP">GO Biological Process</option> + <option value="GOMF">GO Molecular Function</option> + <option value="GOCC">GO Cellular Component</option> + <option value="ALL">All</option> + </param> + </inputs> + <outputs> + <data name="output" format="zip" label="Zipped enrichment files"/> + <data name="output2" format="txt" label="Execution log file"/> + </outputs> +</tool> \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/exec_filtergenes.R Thu Apr 16 12:55:03 2015 +0900 @@ -0,0 +1,46 @@ +rm( list=ls(all=TRUE) ) # clean up R workspace + +homedir <- Sys.getenv("HOME") +errorlog <- paste(homedir,"/galaxy-dist/tools/ske/errorlog.txt",sep="") +write.table("Replace with a variable.",errorlog,quote=F,row.names=F,col.names=F) + +corefile_name <- "core_filtergenes.R" + +suppressMessages(library(argparse)) +parser <- ArgumentParser(description="arg parser") +parser$add_argument("--input1", dest="input1", default="table.csv", required=TRUE) +parser$add_argument("--input2", dest="input2", default="rcmat.txt", required=TRUE) +parser$add_argument("--input3", dest="input3", default="DEG.zip", required=TRUE) +parser$add_argument("--input4", dest="input4", default="1.0", required=TRUE) +parser$add_argument("--input5", dest="input5", default="0.001", required=TRUE) +parser$add_argument("--input6", dest="input6", default="1.0", required=TRUE) +parser$add_argument("--input7", dest="input7", default="-1.0", required=TRUE) +parser$add_argument("--output", dest="output", default="filtered_genes.txt", required=TRUE) +parser$add_argument("--output2", dest="output2", default="execlog.txt", required=TRUE) + +## Extract a value from each list, and then concatenate to obtain a space delimiated line ## +tmp <- parser$parse_args() +argsvec <- unlist(lapply(tmp,"[[",1)) +args <- paste(argsvec,collapse=" ") +corefile_dir <- paste(homedir,"/galaxy-dist/tools/ske/",sep="") +corefile <- paste(corefile_dir,corefile_name,sep="") +execlog <- paste(homedir,"/galaxy-dist/tools/ske/execlog.txt",sep="") +cmd <- paste("R CMD BATCH --no-save --no-restore \'--args ",args,"\' ",corefile," ",execlog,sep="") +system(cmd) + +## execution log file ## +cmd <- paste("mv ",execlog," ",tmp$output2,sep="") +system(cmd) +## output files ## +outputlocs <- c() +outputlocs <- c(outputlocs,tmp$output) +sfns <- read.table(paste(corefile_dir,"savefiles.txt",sep=""),header=F)$V1 +N <- length(sfns) +for( i in 1:N ) { + cmd <- paste("cp ",sfns[i]," ",corefile_dir,sep="") + system(cmd) + cmd <- paste("mv ",sfns[i]," ",outputlocs[i],sep="") + system(cmd) +} +## check the status of computation ## +write.table("Successfully computed.",errorlog,quote=F,row.names=F,col.names=F)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/exec_filtergenes.xml Thu Apr 16 12:55:03 2015 +0900 @@ -0,0 +1,28 @@ +<tool id="filtergenes" name="STEP-05: filter genes" version="1.0.0"> +<description>Perform filtering based on given thresholds</description> + <command interpreter="Rscript --vanilla"> + exec_filtergenes.R + --input1 $input1 + --input2 $input2 + --input3 $input3 + --input4 $input4 + --input5 $input5 + --input6 $input6 + --input7 $input7 + --output $output + --output2 $output2 + </command> + <inputs> + <param name="input1" type="data" format="txt" label="Description table" /> + <param name="input2" type="data" format="txt" label="Normalized read count data" /> + <param name="input3" type="data" format="zip" label="Zipped DEG file" /> + <param name="input4" size="30" type="text" value="1.0" label="Lower cutoff value of gene expression" /> + <param name="input5" size="30" type="text" value="0.001" label="False Discovery Rate" /> + <param name="input6" size="30" type="text" value="1.0" label="Upper cutoff value of log-fold change" /> + <param name="input7" size="30" type="text" value="-1.0" label="Lower cutoff value of log-fold change" /> + </inputs> + <outputs> + <data name="output" format="txt" label="Filtered gene symbols"/> + <data name="output2" format="txt" label="Execution log file"/> + </outputs> +</tool> \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/exec_get_PPI.R Thu Apr 16 12:55:03 2015 +0900 @@ -0,0 +1,40 @@ +rm( list=ls(all=TRUE) ) # clean up R workspace + +homedir <- Sys.getenv("HOME") +errorlog <- paste(homedir,"/galaxy-dist/tools/ske/errorlog.txt",sep="") +write.table("Replace with a variable.",errorlog,quote=F,row.names=F,col.names=F) + +corefile_name <- "core_get_PPI.R" + +suppressMessages(library(argparse)) +parser <- ArgumentParser(description="arg parser") +parser$add_argument("--input1", dest="input1", default="table.csv", required=TRUE) +parser$add_argument("--input2", dest="input2", default="rcmat.txt", required=TRUE) +parser$add_argument("--input3", dest="input3", default="DEG.zip", required=TRUE) +parser$add_argument('--output', dest='output', default="PPIs.zip", required=TRUE) +parser$add_argument("--output2", dest="output2", default="execlog.txt", required=TRUE) + +## Extract a value from each list, and then concatenate to obtain a space delimiated line ## +tmp <- parser$parse_args() +argsvec <- unlist(lapply(tmp,"[[",1)) +args <- paste(argsvec,collapse=" ") +corefile_dir <- paste(homedir,"/galaxy-dist/tools/ske/",sep="") +corefile <- paste(corefile_dir,corefile_name,sep="") +execlog <- paste(homedir,"/galaxy-dist/tools/ske/execlog.txt",sep="") +cmd <- paste("R CMD BATCH --no-save --no-restore \'--args ",args,"\' ",corefile," ",execlog,sep="") +system(cmd) + +## execution log file ## +cmd <- paste("mv ",execlog," ",tmp$output2,sep="") +system(cmd) +## output files ## +outputlocs <- c() +outputlocs <- c(outputlocs,tmp$output) +sfns <- read.table(paste(corefile_dir,"savefiles.txt",sep=""),header=F)$V1 +N <- length(sfns) +for( i in 1:N ) { + cmd <- paste("cp ",corefile_dir,sfns[i]," ",outputlocs[i],sep="") + system(cmd) +} +## check the status of computation ## +write.table("Successfully computed.",errorlog,quote=F,row.names=F,col.names=F)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/exec_get_PPI.xml Thu Apr 16 12:55:03 2015 +0900 @@ -0,0 +1,20 @@ +<tool id="get_PPI" name="STEP-06_5: PPI" version="1.0.0"> +<description>Search protein-protein interaction from STRING-db</description> + <command interpreter="Rscript --vanilla"> + exec_get_PPI.R + --input1 $input1 + --input2 $input2 + --input3 $input3 + --output $output + --output2 $output2 + </command> + <inputs> + <param name="input1" type="data" format="txt" label="Description table" /> + <param name="input2" type="data" format="txt" label="Normalized read count data" /> + <param name="input3" type="data" format="zip" label="Zipped DEG file" /> + </inputs> + <outputs> + <data name="output" type="data" format="zip" label="enriched PPI network"/> + <data name="output2" format="txt" label="Execution log file"/> + </outputs> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/exec_heatmapplot.R Thu Apr 16 12:55:03 2015 +0900 @@ -0,0 +1,43 @@ +rm( list=ls(all=TRUE) ) # clean up R workspace + +homedir <- Sys.getenv("HOME") +errorlog <- paste(homedir,"/galaxy-dist/tools/ske/errorlog.txt",sep="") +write.table("Replace with a variable.",errorlog,quote=F,row.names=F,col.names=F) + +corefile_name <- "core_heatmapplot.R" + +suppressMessages(library(argparse)) +parser <- ArgumentParser(description="arg parser") +parser$add_argument("--input1", dest="input1", default="table.csv", required=TRUE) +parser$add_argument("--input2", dest="input2", default="rcmat.txt", required=TRUE) +parser$add_argument("--input3", dest="input3", default="genesymbols.txt", required=TRUE) +parser$add_argument("--output", dest="output", default="heatmap.pdf", required=TRUE) +parser$add_argument("--output2", dest="output2", default="execlog.txt", required=TRUE) + +## Extract a value from each list, and then concatenate to obtain a space delimiated line ## +tmp <- parser$parse_args() +argsvec <- unlist(lapply(tmp,"[[",1)) +args <- paste(argsvec,collapse=" ") +corefile_dir <- paste(homedir,"/galaxy-dist/tools/ske/",sep="") +corefile <- paste(corefile_dir,corefile_name,sep="") +execlog <- paste(homedir,"/galaxy-dist/tools/ske/execlog.txt",sep="") +cmd <- paste("R CMD BATCH --no-save --no-restore \'--args ",args,"\' ",corefile," ",execlog,sep="") +system(cmd) + +## execution log file ## +cmd <- paste("cp ",execlog," ",tmp$output2,sep="") +system(cmd) +## output files ## +outputlocs <- c() +outputlocs <- c(outputlocs,tmp$output) +sfns <- read.table(paste(corefile_dir,"savefiles.txt",sep=""),header=F)$V1 +N <- length(sfns) +for( i in 1:N ) { + cmd <- paste("cp ",sfns[i]," ",corefile_dir,sep="") + system(cmd) + cmd <- paste("mv ",sfns[i]," ",outputlocs[i],sep="") + system(cmd) +} +## check the status of computation ## +write.table("Successfully computed.",errorlog,quote=F,row.names=F,col.names=F) +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/exec_heatmapplot.xml Thu Apr 16 12:55:03 2015 +0900 @@ -0,0 +1,20 @@ +<tool id="heatmapplot" name="STEP-06_1: Heatmap" version="1.0.0"> +<description>Draw heatmap for given genes</description> + <command interpreter="Rscript --vanilla"> + exec_heatmapplot.R + --input1 $input1 + --input2 $input2 + --input3 $input3 + --output $output + --output2 $output2 + </command> + <inputs> + <param name="input1" type="data" format="txt" label="Description table" /> + <param name="input2" type="data" format="txt" label="Normalized read count data" /> + <param name="input3" type="data" format="txt" label="Gene symbol list" /> + </inputs> + <outputs> + <data name="output" format="pdf" label="Heatmap plot"/> + <data name="output2" format="txt" label="Execution log file"/> + </outputs> +</tool> \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/exec_normalization.R Thu Apr 16 12:55:03 2015 +0900 @@ -0,0 +1,40 @@ +rm( list=ls(all=TRUE) ) # clean up R workspace + +homedir <- Sys.getenv("HOME") +errorlog <- paste(homedir,"/galaxy-dist/tools/ske/errorlog.txt",sep="") +write.table("Replace with a variable.",errorlog,quote=F,row.names=F,col.names=F) + +corefile_name <- "core_normalization.R" + +suppressMessages(library(argparse)) +parser <- ArgumentParser(description="arg parser") +parser$add_argument("--input1", dest="input1", default="edgeR", required=TRUE) +parser$add_argument("--input2", dest="input2", default="table.csv", required=TRUE) +parser$add_argument("--input3", dest="input3", default="data.zip", required=TRUE) +parser$add_argument("--output", dest="output", default="rcmat.txt", required=TRUE) +parser$add_argument("--output2", dest="output2", default="execlog.txt", required=TRUE) + +## Extract a value from each list, and then concatenate to obtain a space delimiated line ## +tmp <- parser$parse_args() +argsvec <- unlist(lapply(tmp,"[[",1)) +args <- paste(argsvec,collapse=" ") +corefile_dir <- paste(homedir,"/galaxy-dist/tools/ske/",sep="") +corefile <- paste(corefile_dir,corefile_name,sep="") +execlog <- paste(homedir,"/galaxy-dist/tools/ske/execlog.txt",sep="") +cmd <- paste("R CMD BATCH --no-save --no-restore \'--args ",args,"\' ",corefile," ",execlog,sep="") +system(cmd) + +## execution log file ## +cmd <- paste("mv ",execlog," ",tmp$output2,sep="") +system(cmd) +## output files ## +outputlocs <- c() +outputlocs <- c(outputlocs,tmp$output) +sfns <- read.table(paste(corefile_dir,"savefiles.txt",sep=""),header=F)$V1 +N <- length(sfns) +for( i in 1:N ) { + cmd <- paste("cp ",corefile_dir,sfns[i]," ",outputlocs[i],sep="") + system(cmd) +} +## check the status of computation ## +write.table("Successfully computed.",errorlog,quote=F,row.names=F,col.names=F)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/exec_normalization.xml Thu Apr 16 12:55:03 2015 +0900 @@ -0,0 +1,23 @@ +<tool id="normalization" name="STEP-03: Normalization" version="1.0.0"> +<description>Perform normalization for read count data</description> + <command interpreter="Rscript --vanilla"> + exec_normalization.R + --input1 $input1 + --input2 $input2 + --input3 $input3 + --output $output + --output2 $output2 + </command> + <inputs> + <param name="input1" type="select" label="Normalization method" > + <option value="edgeR">Trimmed Mean normalization (edgeR)</option> + <option value="DESeq2">DESeq2 normalization </option> + </param> + <param name="input2" type="data" format="txt" label="Description table" /> + <param name="input3" type="data" format="data" label="Zipped read count data" /> + </inputs> + <outputs> + <data name="output" format="txt" label="Normalized read count data"/> + <data name="output2" format="txt" label="Execution log file"/> + </outputs> +</tool> \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/exec_tsplot.R Thu Apr 16 12:55:03 2015 +0900 @@ -0,0 +1,43 @@ +rm( list=ls(all=TRUE) ) # clean up R workspace + +homedir <- Sys.getenv("HOME") +errorlog <- paste(homedir,"/galaxy-dist/tools/ske/errorlog.txt",sep="") +write.table("Replace with a variable.",errorlog,quote=F,row.names=F,col.names=F) + +corefile_name <- "core_tsplot.R" + +suppressMessages(library(argparse)) +parser <- ArgumentParser(description="arg parser") +parser$add_argument("--input1", dest="input1", default="table.csv", required=TRUE) +parser$add_argument("--input2", dest="input2", default="rcmat.txt", required=TRUE) +parser$add_argument("--input3", dest="input3", default="genesymbols.txt", required=TRUE) +parser$add_argument("--output", dest="output", default="tsplot.pdf", required=TRUE) +parser$add_argument("--output2", dest="output2", default="execlog.txt", required=TRUE) + +## Extract a value from each list, and then concatenate to obtain a space delimiated line ## +tmp <- parser$parse_args() +argsvec <- unlist(lapply(tmp,"[[",1)) +args <- paste(argsvec,collapse=" ") +corefile_dir <- paste(homedir,"/galaxy-dist/tools/ske/",sep="") +corefile <- paste(corefile_dir,corefile_name,sep="") +execlog <- paste(homedir,"/galaxy-dist/tools/ske/execlog.txt",sep="") +cmd <- paste("R CMD BATCH --no-save --no-restore \'--args ",args,"\' ",corefile," ",execlog,sep="") +system(cmd) + +## execution log file ## +cmd <- paste("mv ",execlog," ",tmp$output2,sep="") +system(cmd) +## output files ## +outputlocs <- c() +outputlocs <- c(outputlocs,tmp$output) +sfns <- read.table(paste(corefile_dir,"savefiles.txt",sep=""),header=F)$V1 +N <- length(sfns) +for( i in 1:N ) { + cmd <- paste("cp ",sfns[i]," ",corefile_dir,sep="") + system(cmd) + cmd <- paste("mv ",sfns[i]," ",outputlocs[i],sep="") + system(cmd) +} +## check the status of computation ## +write.table("Successfully computed.",errorlog,quote=F,row.names=F,col.names=F) +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/exec_tsplot.xml Thu Apr 16 12:55:03 2015 +0900 @@ -0,0 +1,20 @@ +<tool id="tsplot" name="STEP-06_3: Plot" version="1.0.0"> +<description>Time series data visualization</description> + <command interpreter="Rscript --vanilla"> + exec_tsplot.R + --input1 $input1 + --input2 $input2 + --input3 $input3 + --output $output + --output2 $output2 + </command> + <inputs> + <param name="input1" type="data" format="txt" label="Description table" /> + <param name="input2" type="data" format="txt" label="Normalized read count data" /> + <param name="input3" type="data" format="txt" label="Gene symbol list" /> + </inputs> + <outputs> + <data name="output" format="pdf" label="Time course plot"/> + <data name="output2" format="txt" label="Execution log file"/> + </outputs> +</tool> \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/exec_upload.py Thu Apr 16 12:55:03 2015 +0900 @@ -0,0 +1,65 @@ +#!/usr/bin/env python +#Processes uploads from the user modified from the original script in Galaxy + +import sys,os,shutil,re +from galaxy import eggs +# need to import model before sniff to resolve a circular import dependency +import galaxy.model +from galaxy.datatypes.checkers import * +from galaxy.datatypes import sniff +from galaxy.datatypes.binary import * +from galaxy.datatypes.images import Pdf +from galaxy.datatypes.registry import Registry +from galaxy import util +from galaxy.datatypes.util.image_util import * +from galaxy.util.json import * + +assert sys.version_info[:2] >= ( 2, 4 ) + +def safe_dict(d): + """ + Recursively clone json structure with UTF-8 dictionary keys + http://mellowmachines.com/blog/2009/06/exploding-dictionary-with-unicode-keys-as-python-arguments/ + """ + if isinstance(d, dict): + return dict([(k.encode('utf-8'), safe_dict(v)) for k,v in d.iteritems()]) + elif isinstance(d, list): + return [safe_dict(x) for x in d] + else: + return d +def parse_outputs( args ): + rval = {} + for arg in args: + id, files_path, path = arg.split( ':', 2 ) + rval[int( id )] = ( path, files_path ) + return rval + +def __main__(): + if len( sys.argv ) < 4: + print >>sys.stderr, 'usage: upload.py <root> <datatypes_conf> <json paramfile> <output spec> ...' + sys.exit( 1 ) + + output_paths = parse_outputs( sys.argv[4:] ) + json_file = open( 'galaxy.json', 'w' ) + + registry = Registry() + registry.load_datatypes( root_dir=sys.argv[1], config=sys.argv[2] ) + + for line in open( sys.argv[3], 'r' ): + dataset = from_json_string( line ) + dataset = util.bunch.Bunch( **safe_dict( dataset ) ) + cmd = "cp %s /home/galaxy/galaxy-dist/tools/ske/data.zip" % (dataset.path) + os.system(cmd) + try: + output_path = output_paths[int( dataset.dataset_id )][0] + except: + print >>sys.stderr, 'Output path for dataset %s not found on command line' % dataset.dataset_id + sys.exit( 1 ) + shutil.copy( dataset.path, output_path ) + try: + os.remove( sys.argv[3] ) + except: + pass + +if __name__ == '__main__': + __main__()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/exec_upload.xml Thu Apr 16 12:55:03 2015 +0900 @@ -0,0 +1,53 @@ +<?xml version="1.0"?> + +<tool name="STEP-02: Upload zipped file" id="exec_upload" version="1.0" workflow_compatible="false"> + <description> + from your computer + </description> + <action module="galaxy.tools.actions.upload" class="UploadToolAction"/> + <command interpreter="python"> + exec_upload.py $GALAXY_ROOT_DIR $GALAXY_DATATYPES_CONF_FILE $paramfile + #set $outnum = 0 + #while $varExists('output%i' % $outnum): + #set $output = $getVar('output%i' % $outnum) + #set $outnum += 1 + #set $file_name = $output.file_name + ## FIXME: This is not future-proof for other uses of external_filename (other than for use by the library upload's "link data" feature) + #if $output.dataset.dataset.external_filename: + #set $file_name = "None" + #end if + ${output.dataset.dataset.id}:${output.files_path}:${file_name} + #end while + </command> + <inputs nginx_upload="false"> + <param name="file_type" type="select" label="File Format" help="Only the zip format is currently available."> + <options from_parameter="tool.app.datatypes_registry.upload_file_formats"> + <column name="value" index="1"/> + <column name="name" index="0"/> + <filter type="sort_by" column="0"/> + <filter type="add_value" name="zip" value="auto" index="0"/> + </options> + </param> + <param name="async_datasets" type="hidden" value="None"/> + <upload_dataset name="files" title="Specify Files for Dataset" file_type_name="file_type" metadata_ref="files_metadata"> + <param name="file_data" type="file" size="30" label="File" ajax-upload="true" help="TIP: Due to browser limitations, uploading files larger than 2GB is guaranteed to fail. To upload large files, use FTP (if enabled by the site administrator)."> + <validator type="expression" message="You will need to reselect the file you specified (%s)." substitute_value_in_message="True">not ( ( isinstance( value, unicode ) or isinstance( value, str ) ) and value != "" )</validator> <!-- use validator to post message to user about needing to reselect the file, since most browsers won't accept the value attribute for file inputs --> + </param> + <param name="url_paste" type="hidden" label="URL upload is not currently available."/> + <param name="ftp_files" type="ftpfile" label="Files uploaded via FTP"/> + <!-- Swap the following parameter for the select one that follows to + enable the to_posix_lines option in the Web GUI. See Bitbucket + Pull Request 171 for more information. --> + <param name="to_posix_lines" type="hidden" value="Yes" /> + + <param name="NAME" type="hidden" help="Name for dataset in upload"></param> + </upload_dataset> + </inputs> + <help> + +**Memo** + +Please use this tool to upload your datasets. The default uploader automatically uncompressed your compressed file. The zipped file is accepted in this pipeline. + + </help> +</tool>