Quality Control

# HG changeset patch # User sblanck # Date 1490106527 14400 # Node ID 93451f832736a6bf51fc4762b11bb852ff24c943 # Parent f8a2f1fec8ef06dd0417023e4dfc5563f2b52ad9 Uploaded diff -r f8a2f1fec8ef -r 93451f832736 AffyQCnormalization.R --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/AffyQCnormalization.R Tue Mar 21 10:28:47 2017 -0400 @@ -0,0 +1,110 @@ +library(Biobase) +library(GEOquery) +library(GEOmetadb) +library(limma) +library(jsonlite) +library(affy) +library(affyPLM) +library(dplyr) + +cargs<-commandArgs() +cargs<-cargs[(which(cargs=="--args")+1):length(cargs)] +nbargs=length(cargs) +celList=vector() +celFileNameList=vector() +for (i in seq(1,nbargs-7,2)) +{ + celList=c(celList,cargs[[i]]) + celFileNameList=c(celFileNameList,cargs[[i+1]]) +} + + +normalization=cargs[[nbargs-6]] +result_export_eset=cargs[[nbargs-5]] +result=cargs[[nbargs-4]] +result.path=cargs[[nbargs-3]] +result.template=cargs[[nbargs-2]] + +dir.create(result.path, showWarnings = TRUE, recursive = TRUE) +for(i in 1:length(celList)) +{ + file.copy(celList[i],paste0("./",celFileNameList[i])) +} + +data <- ReadAffy(filenames=celFileNameList, celfile.path=".") +htmlfile=readChar(result.template, file.info(result.template)$size) + +boxplot="boxplot.png" +png(boxplot,width=800,height = 400) +par(mar=c(7,5,1,1)) +boxplot(data,las=2,outline=FALSE) +dev.off() +htmlfile=gsub(x=htmlfile,pattern = "###BOXPLOT###",replacement = boxplot, fixed = TRUE) +file.copy(boxplot,result.path) + +images="images.png" +nblines=length(celList)%/%4 + as.numeric((length(celList)%%4)!=0) +png(images,width=800,height = 200*nblines) +par(mfrow=c(nblines,4)) +image(data) +dev.off() +htmlfile=gsub(x=htmlfile,pattern = "###IMAGES###",replacement = images, fixed = TRUE) +file.copy(images,result.path) + + +plotMA="plotMA.png" +nblines=length(celList)%/%3 + as.numeric((length(celList)%%3)!=0) +png(plotMA,width=800,height =300*nblines ) +par(mfrow=c(nblines,3)) +MAplot(data) +dev.off() +htmlfile=gsub(x=htmlfile,pattern = "###PLOTMA###",replacement = plotMA, fixed = TRUE) +file.copy(plotMA,result.path) + + +if (normalization == "rma") { + eset <- rma(data) +} else if (normalization == "quantile") { + eset = rma(data,background = FALSE,normalize = TRUE) +} else if (normalization == "background"){ + eset = rma(data,background = TRUE ,normalize = FALSE) +} else if (normalization == "log2") { + eset = rma(data,background = FALSE ,normalize = FALSE) +} + + +boxplotnorm="boxplotnorm.png" +png(boxplotnorm,width=800,height = 400) +par(mar=c(7,5,1,1)) +boxplot(data.frame(exprs(eset)),las=2,outline=FALSE) +dev.off() +htmlfile=gsub(x=htmlfile,pattern = "###BOXPLOTNORM###",replacement = boxplotnorm, fixed = TRUE) +file.copy(boxplotnorm,result.path) + +plotMAnorm="plotMAnorm.png" +nblines=length(celList)%/%3 + as.numeric((length(celList)%%3)!=0) +png(plotMAnorm,width=800,height =300*nblines ) +par(mfrow=c(nblines,3)) +#for (i in 1:length(celList)){ +MAplot(eset) +#} + +dev.off() +htmlfile=gsub(x=htmlfile,pattern = "###PLOTMANORM###",replacement = plotMAnorm, fixed = TRUE) +file.copy(plotMAnorm,result.path) +#write.table(tolower(c(condition1Name,condition2Name)),quote = FALSE,col.names = FALSE, row.names=FALSE,file=result_export_conditions) +#saveConditions=c(condition1Name,condition2Name) +save(eset,file=result_export_eset) +write(htmlfile,result) + +#l=list() +#for(i in 1:length(esets)) +#{ +# l[[paste("study",i,sep="")]]<-res[[i]] +#} +#l[["Meta"]]=res[[length(res)-1]] +#showVenn(res,file.path(temp.files.path,"venn.png")) +#writeLines(c("

Venn diagram

"),file.conn) +#writeLines(c("

"),file.conn) +#writeLines(c(""),file.conn) +#close(file.conn) \ No newline at end of file diff -r f8a2f1fec8ef -r 93451f832736 AffyQCnormalization.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/AffyQCnormalization.xml Tue Mar 21 10:28:47 2017 -0400 @@ -0,0 +1,62 @@ + + Quality control and normalization of affymetrix expression data + + + r-smagexp + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r f8a2f1fec8ef -r 93451f832736 AffyQCnormalization_tpl.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/AffyQCnormalization_tpl.html Tue Mar 21 10:28:47 2017 -0400 @@ -0,0 +1,37 @@ + + + + + + Quality Control + + + + + +

Raw data

+ +

+ + + +

Normalized data

+ +

+ + + diff -r f8a2f1fec8ef -r 93451f832736 Analyse.R --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Analyse.R Tue Mar 21 10:28:47 2017 -0400 @@ -0,0 +1,145 @@ +library(Biobase) +library(GEOquery) +library(GEOmetadb) +library(limma) +library(jsonlite) +library(affy) +library(dplyr) + +cargs<-commandArgs() +cargs<-cargs[(which(cargs=="--args")+1):length(cargs)] +nbargs=length(cargs) + +load(cargs[[nbargs-13]]) +targetFile=cargs[[nbargs-12]] +condition1Name=cargs[[nbargs-11]] +condition1=cargs[[nbargs-10]] +condition2Name=cargs[[nbargs-9]] +condition2=cargs[[nbargs-8]] +nbresult=cargs[[nbargs-7]] +result_export_eset=cargs[[nbargs-6]] +result=cargs[[nbargs-5]] +result.path=cargs[[nbargs-4]] +result.tabular=cargs[[nbargs-3]] +result.template=cargs[[nbargs-2]] + +#file.copy(targetFile,"./targetFile.txt") + +condition1_tmp <- strsplit(condition1,",") +condition1 <-unlist(condition1_tmp) + +condition2_tmp <- strsplit(condition2,",") +condition2<-unlist(condition2_tmp) + +conditions=c(condition1,condition2) + +#nbresult=1000 +dir.create(result.path, showWarnings = TRUE, recursive = FALSE) + +targets <- read.table(targetFile,sep="\t") + +eset=eset[,which(rownames(eset@phenoData@data) %in% conditions)] +#condition1Name=make.names(condition1Name) +#condition2Name=make.names(condition2Name) +#condition1Name=gsub("_","",condition1Name) +#condition2Name=gsub("_","",condition2Name) +#condition1Name +#condition2Name + + +eset@phenoData@data$source_name_ch1="" +eset@phenoData@data$source_name_ch1[which(rownames(eset@phenoData@data) %in% condition1)]=condition1Name +eset@phenoData@data$source_name_ch1[which(rownames(eset@phenoData@data) %in% condition2)]=condition2Name +#condition1Name +#condition2Name + +condNames=paste0("G",as.numeric(as.character(pData(eset)["source_name_ch1"][,1])!=condition1Name)) +#condNames=make.names(targets[,2]) +#condNames=gsub("_","",condNames) + +f <- as.factor(condNames) +#eset$description <- factors +design <- model.matrix(~ 0+f) +design + +colnames(design) <- levels(f) +colnames(design) +fit <- lmFit(eset, design) +fit +#cont.matrix <- makeContrasts(C1=paste0(condition1Name,"-",condition2Name), levels=design) +cont.matrix <- makeContrasts(G0-G1, levels=design) +cont.matrix +fit2 <- contrasts.fit(fit, cont.matrix) +fit2 <- eBayes(fit2) +fit2 +tT <- topTable(fit2, adjust="fdr", sort.by="B", number=nbresult) + +#head(exprs(eset)) + +gpl <- annotation(eset) +if (substr(x = gpl,1,3)!="GPL"){ + #if the annotation info does not start with "GPL" we retrieve the correspondin GPL annotation + mapping=read.csv("/galaxy-tools/transcriptomics/db/gplToBioc.csv",stringsAsFactors=FALSE) + gpl=mapping[which(mapping$bioc_package==annotation(eset)),]$gpl + gpl=gpl[1] + + annotation(eset)=gpl + + platf <- getGEO(gpl, AnnotGPL=TRUE) + ncbifd <- data.frame(attr(dataTable(platf), "table")) + + fData(eset)["ID"]=row.names(fData(eset)) + fData(eset)=merge(x=fData(eset),y=ncbifd,all.x = TRUE, by = "ID") + colnames(fData(eset))[4]="ENTREZ_GENE_ID" + row.names(fData(eset))=fData(eset)[,"ID"] + + tT <- add_rownames(tT, "ID") + +} else { + + gpl <- annotation(eset) + platf <- getGEO(gpl, AnnotGPL=TRUE) + ncbifd <- data.frame(attr(dataTable(platf), "table")) + + if (!("ID" %in% colnames(tT))){ + tT <- add_rownames(tT, "ID")} + +} + +tT <- merge(tT, ncbifd, by="ID") +tT <- tT[order(tT$P.Value), ] +tT <- subset(tT, select=c("Platform_SPOTID","ID","adj.P.Val","P.Value","t","B","logFC","Gene.symbol","Gene.title","Gene.ID","Chromosome.annotation","GO.Function.ID")) +tT<-format(tT, digits=2, nsmall=2) +head(tT) +colnames(tT)=gsub(pattern = "\\.",replacement = "_",colnames(tT)) +matrixtT=as.matrix(tT) +datajson=toJSON(matrixtT,pretty = TRUE) + +htmlfile=readChar(result.template, file.info(result.template)$size) +htmlfile=gsub(x=htmlfile,pattern = "###DATAJSON###",replacement = datajson, fixed = TRUE) +dir.create(result.path, showWarnings = TRUE, recursive = FALSE) + +boxplot="boxplot.png" +png(boxplot,width=800,height = 400) +par(mar=c(7,5,1,1)) +boxplot(exprs(eset),las=2,outline=FALSE) +dev.off() +htmlfile=gsub(x=htmlfile,pattern = "###BOXPLOT###",replacement = boxplot, fixed = TRUE) +file.copy(boxplot,result.path) + +histopvalue="histopvalue.png" + +png(histopvalue,width=800,height = 400) +par(mfrow=c(1,2)) +hist(fit2$F.p.value,nclass=100,main="Histogram of p-values", xlab="p-values",ylab="frequency") +volcanoplot(fit2,coef=1,highlight=10,main="Volcano plot") +htmlfile=gsub(x=htmlfile,pattern = "###HIST###",replacement = histopvalue, fixed = TRUE) +dev.off() +file.copy(histopvalue,result.path) + +#write.table(tolower(c(condition1Name,condition2Name)),quote = FALSE,col.names = FALSE, row.names=FALSE,file=result_export_conditions) +saveConditions=c(condition1Name,condition2Name) +save(eset,saveConditions,file=result_export_eset) +write.table(x=tT[,-1],file=result.tabular,quote=FALSE,row.names=FALSE,col.names=TRUE,sep="\t") +write(htmlfile,result) + diff -r f8a2f1fec8ef -r 93451f832736 Analyse.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Analyse.xml Tue Mar 21 10:28:47 2017 -0400 @@ -0,0 +1,93 @@ + + Perform gene expression analysis thanks to limma + + + r-smagexp + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r f8a2f1fec8ef -r 93451f832736 Analyse_tpl.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Analyse_tpl.html Tue Mar 21 10:28:47 2017 -0400 @@ -0,0 +1,162 @@ + + + + + + Limma analysis results + + + + + + + + + + + + + + + +

Boxplots

+ + +

P-value histogram and Volcano plot

+ + + + +

+ + diff -r f8a2f1fec8ef -r 93451f832736 GEOQuery.R --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/GEOQuery.R Tue Mar 21 10:28:47 2017 -0400 @@ -0,0 +1,56 @@ + +library(GEOquery) + + +cargs<-commandArgs() +cargs<-cargs[(which(cargs=="--args")+1):length(cargs)] + +GEOQueryID<-cargs[[1]] +GEOQueryData<-cargs[[2]] +GEOQueryRData<-cargs[[3]] +conditionFile<-cargs[[4]] +transformation<-cargs[[5]] + +data1 = getGEO(GEOQueryID) +eset=data1[[1]] + +#check if datas are in log2 space +normalization<-function(data){ + ex <- exprs(data) + qx <- as.numeric(quantile(ex, c(0., 0.25, 0.5, 0.75, 0.99, 1.0), na.rm=T)) + LogC <- (qx[5] > 100) || + (qx[6]-qx[1] > 50 && qx[2] > 0) || + (qx[2] > 0 && qx[2] < 1 && qx[4] > 1 && qx[4] < 2) + if (LogC) { ex[which(ex <= 0)] <- NaN + return (log2(ex)) } else { + return (ex) + } +} + +if (transformation=="auto"){ + exprs(eset)=normalization(eset) +} else if (transformation=="yes"){ + exprs(eset)=log2(exprs(eset)) +} + +matrixData=exprs(eset) +write.table(matrixData,col.names=NA,row.names=TRUE,sep="\t",file=GEOQueryData) + +#Construcion of condition file +#if there is data in "source_name_ch1" field, we keep this data as a condition +#else we keep the "description" field data. +if (length(unique(tolower(pData(data1[[1]])["source_name_ch1"][,1])))>1) +{ + conditions=pData(data1[[1]])["source_name_ch1"] + description=paste0(as.vector(pData(data1[[1]])["geo_accession"][,1]), " ",as.vector(pData(data1[[1]])["title"][,1]), " ", as.vector(conditions[,1])) +} else +{ + conditions=pData(data1[[1]])["description"] + description=paste0(as.vector(pData(data1[[1]])["geo_accession"][,1]), " ",as.vector(pData(data1[[1]])["title"][,1]), " ", as.vector(conditions[,1])) +} + +conditions[,1]=tolower(conditions[,1]) +pData(eset)["source_name_ch1"]=conditions + +write.table(cbind(conditions,description),quote = FALSE,col.names = FALSE, row.names=TRUE,file=conditionFile,sep="\t") +save(eset,conditions,file=GEOQueryRData) \ No newline at end of file diff -r f8a2f1fec8ef -r 93451f832736 GEOQuery.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/GEOQuery.xml Tue Mar 21 10:28:47 2017 -0400 @@ -0,0 +1,66 @@ + + GEOQuery wrapper + + + r-smagexp + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r f8a2f1fec8ef -r 93451f832736 ImportDataFromMatrix.R --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ImportDataFromMatrix.R Tue Mar 21 10:28:47 2017 -0400 @@ -0,0 +1,74 @@ +library(Biobase) +library(GEOquery) +library(GEOmetadb) +library(limma) +library(jsonlite) +library(affy) +library(dplyr) +library(affyPLM) + +cargs<-commandArgs() +cargs<-cargs[(which(cargs=="--args")+1):length(cargs)] +nbargs=length(cargs) + +dataFile=cargs[[nbargs-9]] +normalization=cargs[[nbargs-8]] +conditionsFile=cargs[[nbargs-7]] +annotation=cargs[[nbargs-6]] +result_export_eset=cargs[[nbargs-5]] +result=cargs[[nbargs-4]] +result.path=cargs[[nbargs-3]] +result.template=cargs[[nbargs-2]] + +dir.create(result.path, showWarnings = TRUE, recursive = FALSE) + +data=as.matrix(read.table(file = dataFile)) +conditions=read.table(file=conditionsFile,sep = "\t",row.names=1) +htmlfile=readChar(result.template, file.info(result.template)$size) + +colnames(conditions)=c("source_name_ch1","description") +phenodata<-new("AnnotatedDataFrame",data=conditions) + +eset=ExpressionSet(assayData=data,phenoData=phenodata,annotation=annotation) + +if (normalization == "quantile") { + eset <- normalize.ExpressionSet.quantiles(eset, transfn="log") +} else if (normalization == "log2") { + exprs(eset) = log2(exprs(eset)) +} + +boxplotnorm="boxplotnorm.png" +png(boxplotnorm,width=800,height = 400) +par(mar=c(7,5,1,1)) +boxplot(data.frame(exprs(eset)),las=2,outline=FALSE) +dev.off() +htmlfile=gsub(x=htmlfile,pattern = "###BOXPLOTNORM###",replacement = boxplotnorm, fixed = TRUE) +file.copy(boxplotnorm,result.path) + +plotMAnorm="plotMAnorm.png" +nblines=length(colnames(data))%/%3 + as.numeric((length(colnames(data))%%3)!=0) +png(plotMAnorm,width=800,height =300*nblines ) +par(mfrow=c(nblines,3)) +#for (i in 1:length(colnames(data))){ + MAplot(eset) +#} + +dev.off() +htmlfile=gsub(x=htmlfile,pattern = "###PLOTMANORM###",replacement = plotMAnorm, fixed = TRUE) +file.copy(plotMAnorm,result.path) +#write.table(tolower(c(condition1Name,condition2Name)),quote = FALSE,col.names = FALSE, row.names=FALSE,file=result_export_conditions) +#saveConditions=c(condition1Name,condition2Name) +save(eset,file=result_export_eset) +write(htmlfile,result) + +#l=list() +#for(i in 1:length(esets)) +#{ +# l[[paste("study",i,sep="")]]<-res[[i]] +#} +#l[["Meta"]]=res[[length(res)-1]] +#showVenn(res,file.path(temp.files.path,"venn.png")) +#writeLines(c("

Venn diagram

"),file.conn) +#writeLines(c("

"),file.conn) +#writeLines(c(""),file.conn) +#close(file.conn) \ No newline at end of file diff -r f8a2f1fec8ef -r 93451f832736 ImportDataFromMatrix.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ImportDataFromMatrix.xml Tue Mar 21 10:28:47 2017 -0400 @@ -0,0 +1,79 @@ + + Quality control and normalization of a custom matrix expression data + + + r-smagexp + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r f8a2f1fec8ef -r 93451f832736 ImportDataFromMatrix_tpl.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ImportDataFromMatrix_tpl.html Tue Mar 21 10:28:47 2017 -0400 @@ -0,0 +1,22 @@ + + + + + + Quality Control + + + + + +

Normalized data

+ +

+ + + diff -r f8a2f1fec8ef -r 93451f832736 MetaMA.R --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/MetaMA.R Tue Mar 21 10:28:47 2017 -0400 @@ -0,0 +1,209 @@ +library(metaMA) +library(affy) +library(annaffy) +library(VennDiagram) +library(GEOquery) + +cargs<-commandArgs() +cargs<-cargs[(which(cargs=="--args")+1):length(cargs)] + +nbargs=length(cargs) +rdataList=list() +condition1List=list() +condition2List=list() + +for (i in 1:(nbargs-5)) +{ + Rdata=cargs[[i]] + #condition1=cargs[[i+1]] + #condition2=cargs[[i+2]] + load(Rdata) + + rdataList=c(rdataList,(eset)) + #condition1List=c(condition1List,condition1) + #condition2List=c(condition2List,condition2) + condition1List=c(condition1List,saveConditions[1]) + condition2List=c(condition2List,saveConditions[2]) + +} + +#tables<-cargs[[1]] +#tech<-cargs[[2]] +result.html<-cargs[[nbargs-4]] +result.path<-cargs[[nbargs-3]] +#result.venn<-cargs[[nbargs-3]] +result.template<-cargs[[nbargs-2]] + +#sink("/dev/null") +#dir.create(temp.files.path,recursive=TRUE) +#file.conn=file(diag.html,open="w") + +#writeLines(c(""),file.conn) + +showVenn<-function(res,file) +{ + venn.plot<-venn.diagram(x = c(res[c(1:(length(res)-3))],meta=list(res$Meta)), + filename = NULL, col = "black", + fill = c(1:(length(res)-2)), + margin=0.05, alpha = 0.6) + jpeg(file) + grid.draw(venn.plot) + dev.off() +} + + +library("org.Hs.eg.db") +x <- org.Hs.egUNIGENE +mapped_genes <- mappedkeys(x) +link <- as.list(x[mapped_genes]) + +probe2unigene<-function(expset){ + #construction of the map probe->unigene + probes=rownames(exprs(expset)) + gene_id=fData(expset)[probes,"ENTREZ_GENE_ID"] + unigene=link[gene_id] + names(unigene)<-probes + probe_unigene=unigene +} + +unigene2probe<-function(map) +{ + suppressWarnings(x <- cbind(unlist(map), names(map))) + unigene_probe=split(x[,2], x[,1]) +} + +convert2metaMA<-function(listStudies,mergemeth=mean) +{ + if (!(class(listStudies) %in% c("list"))) { + stop("listStudies must be a list") + } + conv_unigene=lapply(listStudies, + FUN=function(x) unigene2probe(probe2unigene(x))) + + id=lapply(conv_unigene,names) + inter=Reduce(intersect,id) + if(length(inter)<=0){stop("no common genes")} + print(paste(length(inter),"genes in common")) + esets=lapply(1:length(listStudies),FUN=function(i){ + l=lapply(conv_unigene[[i]][inter], + FUN=function(x) exprs(listStudies[[i]])[x,,drop=TRUE]) + esetsgr=t(sapply(l,FUN=function(ll) if(is.null(dim(ll))){ll} + else{apply(ll,2,mergemeth)})) + esetsgr + }) + return(list(esets=esets,conv.unigene=conv_unigene)) +} + +normalization<-function(data){ + ex <- exprs(data) + qx <- as.numeric(quantile(ex, c(0., 0.25, 0.5, 0.75, 0.99, 1.0), na.rm=T)) + LogC <- (qx[5] > 100) || + (qx[6]-qx[1] > 50 && qx[2] > 0) || + (qx[2] > 0 && qx[2] < 1 && qx[4] > 1 && qx[4] < 2) + if (LogC) { ex[which(ex <= 0)] <- NaN + return (log2(ex)) } else { + return (ex) + } +} + + + + +filterCondition<-function(gset,condition1, condition2){ + selected=c(which((tolower(as.character(pData(gset)["source_name_ch1"][,1]))==condition1)), + which(tolower(as.character(pData(gset)["source_name_ch1"][,1]))==condition2)) + + return(gset[,selected]) + } + +rdatalist <- lapply(rdataList, FUN=function(datalist) normalization(datalist)) + +classes=list() +filteredRdataList=list() +for (i in 1:length(rdatalist)) +{ + currentData=rdataList[[i]] + currentCondition1=condition1List[[i]] + currentCondition2=condition2List[[i]] + #currentData=filterCondition(currentData,currentCondition1,currentCondition2) + currentClasses=as.numeric(tolower(as.character(pData(currentData)["source_name_ch1"][,1]))==currentCondition1) + filteredRdataList=c(filteredRdataList,currentData) + classes=c(classes,list(currentClasses)) + #write(file="~/galaxy-modal/classes.txt",classes) +} + +#rdataList=filteredRdataList +conv=convert2metaMA(rdataList) +esets=conv$esets +conv_unigene=conv$conv.unigene + +#write(file="~/galaxy-modal/esets.txt",length(esets)) +#write(file="~/galaxy-modal/classes.txt",length(classes)) +res=pvalcombination(esets=esets,classes=classes,moderated="limma") +resIDDIRR=IDDIRR(res$Meta,res$AllIndStudies) +length(res$Meta) +Hs.Meta=rownames(esets[[1]])[res$Meta] +origId.Meta=lapply(conv_unigene,FUN=function(vec) as.vector(unlist(vec[Hs.Meta]))) + +gpllist <- lapply(rdataList, FUN=function(ann) annotation(ann)) +platflist <- lapply(gpllist, FUN=function(gpl) getGEO(gpl, AnnotGPL=TRUE)) +ncbifdlist <- lapply(platflist, FUN=function(data) data.frame(attr(dataTable(data), "table"))) +ncbifdresult=lapply(1:length(origId.Meta), FUN=function(i) ncbifdlist[[i]][which(ncbifdlist[[i]]$ID %in% origId.Meta[[i]]),]) +ncbidfannot=do.call(rbind,ncbifdresult) +ncbidfannot <- subset(ncbidfannot, select=c("Platform_SPOTID","ID","Gene.symbol","Gene.title","Gene.ID","Chromosome.annotation","GO.Function.ID")) + +library(jsonlite) +matrixncbidfannot=as.matrix(ncbidfannot) +datajson=toJSON(matrixncbidfannot,pretty = TRUE) +summaryjson=toJSON(as.matrix(t(resIDDIRR)),pretty = TRUE) + + +#vennsplit=strsplit(result.venn,split="/")[[1]] +#venn=paste0("./",vennsplit[length(vennsplit)]) + + +vennFilename="venn.png" +vennFile=file.path(result.path,vennFilename) +htmlfile=readChar(result.template, file.info(result.template)$size) +htmlfile=gsub(x=htmlfile,pattern = "###DATAJSON###",replacement = datajson, fixed = TRUE) +htmlfile=gsub(x=htmlfile,pattern = "###SUMMARYJSON###",replacement = summaryjson, fixed = TRUE) +htmlfile=gsub(x=htmlfile,pattern = "###VENN###",replacement = vennFilename, fixed = TRUE) +write(htmlfile,result.html) + +library(VennDiagram) +flog.threshold(ERROR) + +#venn.plot<-venn.diagram(x = c(res[c(1:(length(res)-3))],meta=list(res$Meta)),filename = v, col = "black", fill = c(1:(length(res)-2)), margin=0.05, alpha = 0.6,imagetype = "png") +dir.create(result.path, showWarnings = TRUE, recursive = FALSE) + +showVenn<-function(liste,file) +{ + venn.plot<-venn.diagram(x = liste, + filename = vennFilename, col = "black", + fill = 1:length(liste)+1, + margin=0.05, alpha = 0.6,imagetype = "png") +# png(file); +# grid.draw(venn.plot); +# dev.off(); + +} + +l=list() +for(i in 1:length(esets)) +{ + l[[paste("study",i,sep="")]]<-res[[i]] +} +l[["Meta"]]=res[[length(res)-1]] +showVenn(l,vennFile) +file.copy(vennFilename,result.path) +#l=list() +#for(i in 1:length(esets)) +#{ +# l[[paste("study",i,sep="")]]<-res[[i]] +#} +#l[["Meta"]]=res[[length(res)-1]] +#showVenn(res,result.venn) +#writeLines(c("

Venn diagram

"),file.conn) +#writeLines(c("

"),file.conn) +#writeLines(c(""),file.conn) +#close(file.conn) \ No newline at end of file diff -r f8a2f1fec8ef -r 93451f832736 MetaMA.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/MetaMA.xml Tue Mar 21 10:28:47 2017 -0400 @@ -0,0 +1,51 @@ + + Perform meta-analysis thanks to metaMA. + + + r-smagexp + + + + + + + + + + + + + + + + + + + + diff -r f8a2f1fec8ef -r 93451f832736 MetaMa_tpl.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/MetaMa_tpl.html Tue Mar 21 10:28:47 2017 -0400 @@ -0,0 +1,188 @@ + + + + + + DataTables example - Ajax sourced data + + + + + + + + + + + + + + + + + + + + + +

Venn diagram

+ +

Summary

+
+DE : Number of differentially expressed genes
+IDD (Integration Driven discoveries) : number of genes that are declared DE in the meta-analysis that were not identified in any of the individual studies alone
+Loss : Number of genes that are identified DE in individual studies but not in meta-analysis
+IDR (Integration-driven Discovery Rate) : corresponding proportions of IDD
+IRR (Integration-driven Revision) : corresponding proportions of Loss
+

Details

+ +

+ + diff -r f8a2f1fec8ef -r 93451f832736 MetaRNASeq.R --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/MetaRNASeq.R Tue Mar 21 10:28:47 2017 -0400 @@ -0,0 +1,213 @@ +cargs <- commandArgs() +cargs <- cargs[(which(cargs == "--args")+1):length(cargs)] +nbargs=length(cargs) +listfiles=vector() +listfilenames=vector() +for (i in seq(1,nbargs-6,2)) { + listfiles=c(listfiles,cargs[[i]]) + listfilenames=c(listfilenames,cargs[[i+1]]) +} +#mod<-cargs[[length(cargs) - 6]] +outputfile <- cargs[[length(cargs) - 5]] +result.html = cargs[[length(cargs) - 4]] +html.files.path=cargs[[length(cargs) - 3]] +result.template=cargs[[length(cargs) - 2]] + +alpha=0.05 + +#print(comparison) + +listData=lapply(listfiles,read.table) +orderData=lapply(listData, function(x) x[order(x[1]), ]) +rawpval=lapply(orderData,function(x) x[6]) +rawpval=lapply(rawpval, function(x) as.numeric(unlist(x))) + +DE=list() +DE=lapply(orderData, function(x) ifelse(x[7]<=0.05,1,0)) + +FC=list() +FC=lapply(orderData, function(x) x[3]) + +DE=as.data.frame(DE) +colnames(DE)=listfilenames +FC=as.data.frame(FC) +colnames(FC)=listfilenames +# the comparison must only have two values and the conds must +# be a vector from those values, at least one of each. + +#if (length(comparison) != 2) { +# stop("Comparison type must be a tuple: ", cargs[length(cargs) - 8]) +#} + +sink("/dev/null") +dir.create(html.files.path, recursive=TRUE) +#library(DESeq) +#library(HTSFilter) + +#DE=list() +#FC=list() +#i=1 + +# Open the html output file +#file.conn = file(diag.html, open="w") + +#writeLines( c(""), file.conn) + +# Perform deseq analysis on each study +#for(i in 1:length(listfiles)) +#{ +# f=listfiles[i] +# fname=listfilenames[i] +# study_name=unlist(strsplit(fname,"[.]"))[1] +# print(paste0("study.name ",study_name)) +# d <- read.table(f, sep=" ", header=TRUE, row.names=1) +# conds<-sapply(strsplit(colnames(d),"[.]"),FUN=function(x) x[1]) +# if (length(unique(conds)) != 2) { +# warning(as.data.frame(strsplit(colnames(d),"[.]"))) +# stop("You can only have two columns types: ", paste(conds,collapse=" ")) +# } +# if (!identical(sort(comparison), sort(unique(conds)))) { +# stop("Column types must use the two names from Comparison type, and vice versa. Must have at least one of each in the Column types.\nColumn types: ", cargs[2], "\n", "Comparison type: ", cargs[3]) +# } +# if (length(d) != length(conds)) { +# stop("Number of total sample columns in counts file must correspond to the columns types field. E.g. if column types is 'kidney,kidney,liver,liver' then number of sample columns in counts file must be 4 as well.") +# } +# +# cds <- newCountDataSet(d, conds) +# cds <- estimateSizeFactors(cds) +# +# cdsBlind <- estimateDispersions( cds, method="blind" ) +# +# if (length(conds) != 2) { +# cds <- estimateDispersions( cds ) +# norep = FALSE +# } +# +# if (length(conds) == 2) { +# cds <- estimateDispersions( cds, method=method, sharingMode=mod, fitType="parametric" ) +# norep = TRUE +# } +# +# filter<-HTSFilter(cds, plot=FALSE) +# cds.filter<-filter$filteredData +# on.index<-which(filter$on==1) +# +# res<-as.data.frame(matrix(NA,nrow=nrow(cds),ncol=ncol(cds))) +# nbT <- nbinomTest(cds.filter, comparison[1], comparison[2]) +# colnames(res)<-colnames(nbT) +# res[on.index,]<-nbT +# #write.table(res[order(res$padj), ], file=outputfile, quote=FALSE, row.names=FALSE, sep="\t") +# +# +# temp.pval.plot = file.path( html.files.path, paste("PvalHist",i,".png",sep="")) +# png( temp.pval.plot, width=500, height=500 ) +# hist(res$pval, breaks=100, col="skyblue", border="slateblue", main="") +# dev.off() +# +# writeLines( c("

P-value histogram for ",study_name,"

"), file.conn) +# writeLines( c("

"), file.conn) +# +# #on enregistre la p-value +# rawpval[[study_name]]<-res$pval +# DE[[study_name]]<-ifelse(res$padj<=alpha,1,0) +# FC[[study_name]]<-res$log2FoldChange +# +# i=i+1 +#} + + +# combinations +library(metaRNASeq) +fishcomb<-fishercomb(rawpval, BHth=alpha) +warning(length(rawpval)) +invnormcomb<-invnorm(rawpval, nrep=c(8,8), BHth=alpha) +#DE[["fishercomb"]]<-ifelse(fishcomb$adjpval<=alpha,1,0) +#DE[["invnormcomb"]]<-ifelse(invnormcomb$adjpval<=alpha,1,0) + +signsFC<-mapply(FC,FUN=function(x) sign(x)) +sumsigns<-apply(signsFC,1,sum) +commonsgnFC<-ifelse(abs(sumsigns)==dim(signsFC)[2],sign(sumsigns),0) + +DEresults <- data.frame(DE=DE,"DE.fishercomb"=ifelse(fishcomb$adjpval<=alpha,1,0),"DE.invnorm"=ifelse(invnormcomb$adjpval<=alpha,1,0)) + +unionDE <- unique(c(fishcomb$DEindices,invnormcomb$DEindices)) +FC.selecDE <- data.frame(DEresults[unionDE,],FC[unionDE,],signFC=commonsgnFC[unionDE]) +keepDE <- FC.selecDE[which(abs(FC.selecDE$signFC)==1),] + +fishcomb_de <- rownames(keepDE)[which(keepDE[,"DE.fishercomb"]==1)] +invnorm_de <- rownames(keepDE)[which(keepDE[,"DE.invnorm"]==1)] +indstudy_de = list() +for (i in 1:length(listfiles)) { + currentIndstudy_de = rownames(keepDE)[which(keepDE[,i]==1)] + indstudy_de[[listfilenames[i]]]=currentIndstudy_de +} + +IDDIRRfishcomb=IDD.IRR(fishcomb_de,indstudy_de) +IDDIRRinvnorm=IDD.IRR(invnorm_de,indstudy_de) + +#conflits<-data.frame(ID=listData[[1]][rownames(DEresults),1],Fishercomb=DEresults[["DE.fishercomb"]],Invnormcomb=DEresults[["DE.invnorm"]],sign=commonsgnFC) +conflits<-data.frame(ID=listData[[1]][rownames(DEresults),1],DE=DEresults,FC=FC,signFC=commonsgnFC) +#write DE outputfile +write.table(conflits, outputfile,sep="\t",,row.names=FALSE) +library(VennDiagram) +DE_num=apply(DEresults, 2, FUN=function(x) which(x==1)) +venn.plot<-venn.diagram(x=as.list(DE_num),filename=NULL, col="black", fill=1:length(DE_num)+1,alpha=0.6) +temp.venn.plot = file.path( html.files.path, paste("venn.png")) +png(temp.venn.plot,width=500,height=500) +grid.draw(venn.plot) +dev.off() + +library(jsonlite) +matrixConflits=as.matrix(conflits) +datajson=toJSON(matrixConflits,pretty = TRUE) +summaryFishcombjson=toJSON(as.matrix(t(IDDIRRfishcomb)),pretty = TRUE) +summaryinvnormjson=toJSON(as.matrix(t(IDDIRRinvnorm)),pretty = TRUE) + + +#vennsplit=strsplit(result.venn,split="/")[[1]] +#venn=paste0("./",vennsplit[length(vennsplit)]) + + +vennFilename="venn.png" +vennFile=file.path(html.files.path,vennFilename) +htmlfile=readChar(result.template, file.info(result.template)$size) +htmlfile=gsub(x=htmlfile,pattern = "###DATAJSON###",replacement = datajson, fixed = TRUE) +htmlfile=gsub(x=htmlfile,pattern = "###FISHSUMMARYJSON###",replacement = summaryFishcombjson, fixed = TRUE) +htmlfile=gsub(x=htmlfile,pattern = "###INVSUMMARYJSON###",replacement = summaryinvnormjson, fixed = TRUE) +htmlfile=gsub(x=htmlfile,pattern = "###VENN###",replacement = vennFilename, fixed = TRUE) +write(htmlfile,result.html) + +#library(VennDiagram) +#flog.threshold(ERROR) +# +##venn.plot<-venn.diagram(x = c(res[c(1:(length(res)-3))],meta=list(res$Meta)),filename = v, col = "black", fill = c(1:(length(res)-2)), margin=0.05, alpha = 0.6,imagetype = "png") +#dir.create(result.path, showWarnings = TRUE, recursive = FALSE) +# +#showVenn<-function(liste,file) +#{ +# venn.plot<-venn.diagram(x = liste, +# filename = vennFilename, col = "black", +# fill = 1:length(liste)+1, +# margin=0.05, alpha = 0.6,imagetype = "png") +## png(file); +## grid.draw(venn.plot); +## dev.off(); +# +#} +# +#l=list() +#for(i in 1:length(esets)) +#{ +# l[[paste("study",i,sep="")]]<-res[[i]] +#} +#l[["Meta"]]=res[[length(res)-1]] +#showVenn(l,vennFile) +#file.copy(vennFilename,result.path) + + +#writeLines( c("

Venn Plot

"), file.conn) +#writeLines( c("

"), file.conn) +#writeLines( c(""), file.conn) +#close(file.conn) +#print("passe6") +#sink(NULL) diff -r f8a2f1fec8ef -r 93451f832736 MetaRNASeq_tpl.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/MetaRNASeq_tpl.html Tue Mar 21 10:28:47 2017 -0400 @@ -0,0 +1,167 @@ + + + + + + DataTables example - Ajax sourced data + + + + + + + + + + + + + + + + + + + + + +

Venn diagram

+ +

Fisher combination summary

+
+ +

Invnorm combination summary

+
+DE : Number of differentially expressed genes
+IDD (Integration Driven discoveries) : number of genes that are declared DE in the meta-analysis that were not identifed in any of the individual studies alone
+Loss : Number of genes that are identified DE in individual studies but not in meta-analysis
+IDR (Integration-driven Discovery Rate) : corresponding proportions of IDD
+IRR (Integration-driven Revision) : corresponding proportions of Loss
+ + + + + + + diff -r f8a2f1fec8ef -r 93451f832736 MetaRNAseq.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/MetaRNAseq.xml Tue Mar 21 10:28:47 2017 -0400 @@ -0,0 +1,35 @@ + + perform meta-analysis thanks to metaRNAseq + + + r-smagexp + + + + + + + + + + + + + + + + + + + diff -r f8a2f1fec8ef -r 93451f832736 repository_dependencies.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/repository_dependencies.xml Tue Mar 21 10:28:47 2017 -0400 @@ -0,0 +1,4 @@ + + + + diff -r f8a2f1fec8ef -r 93451f832736 stderr_wrapper.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/stderr_wrapper.py Tue Mar 21 10:28:47 2017 -0400 @@ -0,0 +1,57 @@ +#!/usr/bin/python + +""" +Wrapper that executes a program with its arguments but reports standard error +messages only if the program exit status was not 0. This is useful to prevent +Galaxy to interpret that there was an error if something was printed on stderr, +e.g. if this was simply a warning. +Example: ./stderr_wrapper.py myprog arg1 -f arg2 +Author: Florent Angly +""" + +import sys, subprocess + +assert sys.version_info[:2] >= ( 2, 4 ) + +def stop_err( msg ): + sys.stderr.write( "%s\n" % msg ) + sys.exit() + +def __main__(): + # Get command-line arguments + args = sys.argv + # Remove name of calling program, i.e. ./stderr_wrapper.py + args.pop(0) + # If there are no arguments left, we're done + if len(args) == 0: + return + + # If one needs to silence stdout + args.append( ">" ) + args.append( "/dev/null" ) + + #cmdline = " ".join(args) + #print cmdline + try: + # Run program + proc = subprocess.Popen( args=args, shell=False, stderr=subprocess.PIPE ) + returncode = proc.wait() + # Capture stderr, allowing for case where it's very large + stderr = '' + buffsize = 1048576 + try: + while True: + stderr += proc.stderr.read( buffsize ) + if not stderr or len( stderr ) % buffsize != 0: + break + except OverflowError: + pass + # Running Grinder failed: write error message to stderr + if returncode != 0: + raise Exception, stderr + except Exception, e: + # Running Grinder failed: write error message to stderr + stop_err( 'Error: ' + str( e ) ) + + +if __name__ == "__main__": __main__()