Mercurial > repos > sblanck > smagexp
changeset 2:93451f832736 draft
Uploaded
author | sblanck |
---|---|
date | Tue, 21 Mar 2017 10:28:47 -0400 |
parents | f8a2f1fec8ef |
children | b01c4f260085 |
files | AffyQCnormalization.R AffyQCnormalization.xml AffyQCnormalization_tpl.html Analyse.R Analyse.xml Analyse_tpl.html GEOQuery.R GEOQuery.xml ImportDataFromMatrix.R ImportDataFromMatrix.xml ImportDataFromMatrix_tpl.html MetaMA.R MetaMA.xml MetaMa_tpl.html MetaRNASeq.R MetaRNASeq_tpl.html MetaRNAseq.xml repository_dependencies.xml stderr_wrapper.py |
diffstat | 19 files changed, 1830 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/AffyQCnormalization.R Tue Mar 21 10:28:47 2017 -0400 @@ -0,0 +1,110 @@ +library(Biobase) +library(GEOquery) +library(GEOmetadb) +library(limma) +library(jsonlite) +library(affy) +library(affyPLM) +library(dplyr) + +cargs<-commandArgs() +cargs<-cargs[(which(cargs=="--args")+1):length(cargs)] +nbargs=length(cargs) +celList=vector() +celFileNameList=vector() +for (i in seq(1,nbargs-7,2)) +{ + celList=c(celList,cargs[[i]]) + celFileNameList=c(celFileNameList,cargs[[i+1]]) +} + + +normalization=cargs[[nbargs-6]] +result_export_eset=cargs[[nbargs-5]] +result=cargs[[nbargs-4]] +result.path=cargs[[nbargs-3]] +result.template=cargs[[nbargs-2]] + +dir.create(result.path, showWarnings = TRUE, recursive = TRUE) +for(i in 1:length(celList)) +{ + file.copy(celList[i],paste0("./",celFileNameList[i])) +} + +data <- ReadAffy(filenames=celFileNameList, celfile.path=".") +htmlfile=readChar(result.template, file.info(result.template)$size) + +boxplot="boxplot.png" +png(boxplot,width=800,height = 400) +par(mar=c(7,5,1,1)) +boxplot(data,las=2,outline=FALSE) +dev.off() +htmlfile=gsub(x=htmlfile,pattern = "###BOXPLOT###",replacement = boxplot, fixed = TRUE) +file.copy(boxplot,result.path) + +images="images.png" +nblines=length(celList)%/%4 + as.numeric((length(celList)%%4)!=0) +png(images,width=800,height = 200*nblines) +par(mfrow=c(nblines,4)) +image(data) +dev.off() +htmlfile=gsub(x=htmlfile,pattern = "###IMAGES###",replacement = images, fixed = TRUE) +file.copy(images,result.path) + + +plotMA="plotMA.png" +nblines=length(celList)%/%3 + as.numeric((length(celList)%%3)!=0) +png(plotMA,width=800,height =300*nblines ) +par(mfrow=c(nblines,3)) +MAplot(data) +dev.off() +htmlfile=gsub(x=htmlfile,pattern = "###PLOTMA###",replacement = plotMA, fixed = TRUE) +file.copy(plotMA,result.path) + + +if (normalization == "rma") { + eset <- rma(data) +} else if (normalization == "quantile") { + eset = rma(data,background = FALSE,normalize = TRUE) +} else if (normalization == "background"){ + eset = rma(data,background = TRUE ,normalize = FALSE) +} else if (normalization == "log2") { + eset = rma(data,background = FALSE ,normalize = FALSE) +} + + +boxplotnorm="boxplotnorm.png" +png(boxplotnorm,width=800,height = 400) +par(mar=c(7,5,1,1)) +boxplot(data.frame(exprs(eset)),las=2,outline=FALSE) +dev.off() +htmlfile=gsub(x=htmlfile,pattern = "###BOXPLOTNORM###",replacement = boxplotnorm, fixed = TRUE) +file.copy(boxplotnorm,result.path) + +plotMAnorm="plotMAnorm.png" +nblines=length(celList)%/%3 + as.numeric((length(celList)%%3)!=0) +png(plotMAnorm,width=800,height =300*nblines ) +par(mfrow=c(nblines,3)) +#for (i in 1:length(celList)){ +MAplot(eset) +#} + +dev.off() +htmlfile=gsub(x=htmlfile,pattern = "###PLOTMANORM###",replacement = plotMAnorm, fixed = TRUE) +file.copy(plotMAnorm,result.path) +#write.table(tolower(c(condition1Name,condition2Name)),quote = FALSE,col.names = FALSE, row.names=FALSE,file=result_export_conditions) +#saveConditions=c(condition1Name,condition2Name) +save(eset,file=result_export_eset) +write(htmlfile,result) + +#l=list() +#for(i in 1:length(esets)) +#{ +# l[[paste("study",i,sep="")]]<-res[[i]] +#} +#l[["Meta"]]=res[[length(res)-1]] +#showVenn(res,file.path(temp.files.path,"venn.png")) +#writeLines(c("<h2>Venn diagram</h2>"),file.conn) +#writeLines(c("<img src='venn.png'><br/><br/>"),file.conn) +#writeLines(c("</body></html>"),file.conn) +#close(file.conn) \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/AffyQCnormalization.xml Tue Mar 21 10:28:47 2017 -0400 @@ -0,0 +1,62 @@ +<tool id="QCnormalization" name="QCnormalization" version="0.1.0"> + <description>Quality control and normalization of affymetrix expression data</description> + <requirements> + <!--container type="docker">sblanck/smat</container--> + <requirement type="package" version="0.1.0">r-smagexp</requirement> + </requirements> + <command interpreter="python"> + <![CDATA[ + stderr_wrapper.py Rscript + ${__tool_directory__}/AffyQCnormalization.R + #for $input in $inputs + "${input}" + "${input.name}" + #end for + "${normalization}" + $result_export_eset + $result_html + $result_html.files_path + /${__tool_directory__}/AffyQCnormalization_tpl.html + ]]> + </command> + + <inputs> + + <param name="inputs" type="data" format="cel" multiple="true" label=".CEL files" help=".CEL files to be used"/> + + <param name="normalization" type="select" label="Preprocessing/normalization"> + + <option value="rma">rma (backgroung correction + quantile normalization + log2)</option> + <option value="quantile">quantile normalization + log2</option> + <option value="background">background correction + log2</option> + <option value="log2">log2 only</option> + + + </param> + + </inputs> + <outputs> + <data format="rdata" name="result_export_eset" label="export normalized expressionSet"/> + <data format="html" name="result_html" label="QC result"/> + </outputs> + + <help> +<![CDATA[ + **What it does** + +The QCnormalization tool offers to ensure the quality of the data and to normalize them. Several normalization methods are available : + +* rma normalization +* quantile normalization + log2 +* background correction + log2 +* log2 only + +**Results** + +- Several quality figures : microarray images, boxplots and MA plots +- Rdata object containing the normalized data for further analysis + +]]> +</help> + +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/AffyQCnormalization_tpl.html Tue Mar 21 10:28:47 2017 -0400 @@ -0,0 +1,37 @@ +<!DOCTYPE html> +<html> +<head> + <meta http-equiv="Content-type" content="text/html; charset=utf-8"> + <meta name="viewport" content="width=device-width,initial-scale=1"> + <title>Quality Control</title> + +</head> + +<body> + +<h2>Raw data</h2> +<table><tr><td> +<img src='###IMAGES###' width="800"><br/> +</td></tr></table> + +<table><tr><td> +<img src='###BOXPLOT###' width="800"><br/> +</td></tr></table> + +<table><tr><td> +<img src='###PLOTMA###' width="800"><br/> +</td></tr></table> + + + +<h2>Normalized data</h2> +<table><tr><td> +<img src='###BOXPLOTNORM###' width="800"><br/> +</td></tr></table> + +<table><tr><td> +<img src='###PLOTMANORM###' width="800"><br/> +</td></tr></table> + +</body> +</html>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Analyse.R Tue Mar 21 10:28:47 2017 -0400 @@ -0,0 +1,145 @@ +library(Biobase) +library(GEOquery) +library(GEOmetadb) +library(limma) +library(jsonlite) +library(affy) +library(dplyr) + +cargs<-commandArgs() +cargs<-cargs[(which(cargs=="--args")+1):length(cargs)] +nbargs=length(cargs) + +load(cargs[[nbargs-13]]) +targetFile=cargs[[nbargs-12]] +condition1Name=cargs[[nbargs-11]] +condition1=cargs[[nbargs-10]] +condition2Name=cargs[[nbargs-9]] +condition2=cargs[[nbargs-8]] +nbresult=cargs[[nbargs-7]] +result_export_eset=cargs[[nbargs-6]] +result=cargs[[nbargs-5]] +result.path=cargs[[nbargs-4]] +result.tabular=cargs[[nbargs-3]] +result.template=cargs[[nbargs-2]] + +#file.copy(targetFile,"./targetFile.txt") + +condition1_tmp <- strsplit(condition1,",") +condition1 <-unlist(condition1_tmp) + +condition2_tmp <- strsplit(condition2,",") +condition2<-unlist(condition2_tmp) + +conditions=c(condition1,condition2) + +#nbresult=1000 +dir.create(result.path, showWarnings = TRUE, recursive = FALSE) + +targets <- read.table(targetFile,sep="\t") + +eset=eset[,which(rownames(eset@phenoData@data) %in% conditions)] +#condition1Name=make.names(condition1Name) +#condition2Name=make.names(condition2Name) +#condition1Name=gsub("_","",condition1Name) +#condition2Name=gsub("_","",condition2Name) +#condition1Name +#condition2Name + + +eset@phenoData@data$source_name_ch1="" +eset@phenoData@data$source_name_ch1[which(rownames(eset@phenoData@data) %in% condition1)]=condition1Name +eset@phenoData@data$source_name_ch1[which(rownames(eset@phenoData@data) %in% condition2)]=condition2Name +#condition1Name +#condition2Name + +condNames=paste0("G",as.numeric(as.character(pData(eset)["source_name_ch1"][,1])!=condition1Name)) +#condNames=make.names(targets[,2]) +#condNames=gsub("_","",condNames) + +f <- as.factor(condNames) +#eset$description <- factors +design <- model.matrix(~ 0+f) +design + +colnames(design) <- levels(f) +colnames(design) +fit <- lmFit(eset, design) +fit +#cont.matrix <- makeContrasts(C1=paste0(condition1Name,"-",condition2Name), levels=design) +cont.matrix <- makeContrasts(G0-G1, levels=design) +cont.matrix +fit2 <- contrasts.fit(fit, cont.matrix) +fit2 <- eBayes(fit2) +fit2 +tT <- topTable(fit2, adjust="fdr", sort.by="B", number=nbresult) + +#head(exprs(eset)) + +gpl <- annotation(eset) +if (substr(x = gpl,1,3)!="GPL"){ + #if the annotation info does not start with "GPL" we retrieve the correspondin GPL annotation + mapping=read.csv("/galaxy-tools/transcriptomics/db/gplToBioc.csv",stringsAsFactors=FALSE) + gpl=mapping[which(mapping$bioc_package==annotation(eset)),]$gpl + gpl=gpl[1] + + annotation(eset)=gpl + + platf <- getGEO(gpl, AnnotGPL=TRUE) + ncbifd <- data.frame(attr(dataTable(platf), "table")) + + fData(eset)["ID"]=row.names(fData(eset)) + fData(eset)=merge(x=fData(eset),y=ncbifd,all.x = TRUE, by = "ID") + colnames(fData(eset))[4]="ENTREZ_GENE_ID" + row.names(fData(eset))=fData(eset)[,"ID"] + + tT <- add_rownames(tT, "ID") + +} else { + + gpl <- annotation(eset) + platf <- getGEO(gpl, AnnotGPL=TRUE) + ncbifd <- data.frame(attr(dataTable(platf), "table")) + + if (!("ID" %in% colnames(tT))){ + tT <- add_rownames(tT, "ID")} + +} + +tT <- merge(tT, ncbifd, by="ID") +tT <- tT[order(tT$P.Value), ] +tT <- subset(tT, select=c("Platform_SPOTID","ID","adj.P.Val","P.Value","t","B","logFC","Gene.symbol","Gene.title","Gene.ID","Chromosome.annotation","GO.Function.ID")) +tT<-format(tT, digits=2, nsmall=2) +head(tT) +colnames(tT)=gsub(pattern = "\\.",replacement = "_",colnames(tT)) +matrixtT=as.matrix(tT) +datajson=toJSON(matrixtT,pretty = TRUE) + +htmlfile=readChar(result.template, file.info(result.template)$size) +htmlfile=gsub(x=htmlfile,pattern = "###DATAJSON###",replacement = datajson, fixed = TRUE) +dir.create(result.path, showWarnings = TRUE, recursive = FALSE) + +boxplot="boxplot.png" +png(boxplot,width=800,height = 400) +par(mar=c(7,5,1,1)) +boxplot(exprs(eset),las=2,outline=FALSE) +dev.off() +htmlfile=gsub(x=htmlfile,pattern = "###BOXPLOT###",replacement = boxplot, fixed = TRUE) +file.copy(boxplot,result.path) + +histopvalue="histopvalue.png" + +png(histopvalue,width=800,height = 400) +par(mfrow=c(1,2)) +hist(fit2$F.p.value,nclass=100,main="Histogram of p-values", xlab="p-values",ylab="frequency") +volcanoplot(fit2,coef=1,highlight=10,main="Volcano plot") +htmlfile=gsub(x=htmlfile,pattern = "###HIST###",replacement = histopvalue, fixed = TRUE) +dev.off() +file.copy(histopvalue,result.path) + +#write.table(tolower(c(condition1Name,condition2Name)),quote = FALSE,col.names = FALSE, row.names=FALSE,file=result_export_conditions) +saveConditions=c(condition1Name,condition2Name) +save(eset,saveConditions,file=result_export_eset) +write.table(x=tT[,-1],file=result.tabular,quote=FALSE,row.names=FALSE,col.names=TRUE,sep="\t") +write(htmlfile,result) +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Analyse.xml Tue Mar 21 10:28:47 2017 -0400 @@ -0,0 +1,93 @@ +<tool id="LimmaAnalyse" name="Limma analysis" version="0.3.0"> + <description>Perform gene expression analysis thanks to limma</description> + <requirements> + <!--container type="docker">sblanck/smagexp</container--> + <requirement type="package" version="0.1.0">r-smagexp</requirement> + </requirements> + <command interpreter="python"> + <![CDATA[ + stderr_wrapper.py Rscript + ${__tool_directory__}/Analyse.R + "${rdataset}" + "${conditions}" + "${selectCondition1}" + "${condition1}" + "${selectCondition2}" + "${condition2}" + "${nbresult}" + $result_export_eset + $result_html + $result_html.files_path + $result_tabular + ${__tool_directory__}/Analyse_tpl.html + ]]> + </command> + + <inputs> + <param name="rdataset" type="data" format="rdata" label="RData" help="RData to be used"/> + <param name="conditions" type="data" format="cond" label="Conditions" help="conditions associated with the rData file"/> + <param name="selectCondition1" type="select" label="condition 1"> + <options from_dataset="conditions"> + <column name="name" index="1"/> + <column name="value" index="1"/> + <filter type="unique_value" column="1"/> + </options> + </param> + <param name="condition1" type="select" display="checkboxes" label="Condition 1" multiple="true"> + <options from_dataset="conditions"> + <column name="name" index="2"/> + <column name="value" index="0"/> + <filter type="param_value" ref="selectCondition1" column="1"/> + </options> + </param> + <param name="selectCondition2" type="select" label="condition 2"> + <options from_dataset="conditions"> + <column name="name" index="1"/> + <column name="value" index="1"/> + <filter type="unique_value" column="1"/> + </options> + </param> + <param name="condition2" type="select" display="checkboxes" label="Condition 2" multiple="true"> + <options from_dataset="conditions"> + <column name="name" index="2"/> + <column name="value" index="0"/> + <filter type="param_value" ref="selectCondition2" column="1"/> + </options> + </param> + <param name="nbresult" type="integer" value="1000" min="1" label="number of top genes" help="Number of genes to be displayed in result datatable"/> + </inputs> + <outputs> + <data format="html" name="result_html" label="Results of analysis of ${rdataset.name}"/> + <data format="rdata" name="result_export_eset" label="Export of expression set of ${rdataset.name}"/> + <data format="tabular" name="result_tabular" label="Text Results of analysis of ${rdataset.name}"/> + + </outputs> + <help> +<![CDATA[ +**What it does** + +The Limma analysis tool performs single analysis either of data previously retrieved from GEO database or normalized affymetrix .CEL files data. +Given a .cond file, it runs a standard limma differential expression analysis. + +**Example** of .cond file + +The .cond file should look like this +:: + + Sample ID Condition Description + GSM80460 series of 16 tumors GSM80460 OSCE-2T SERIES OF 16 TUMORS + GSM80461 series of 16 tumors GSM80461 OSCE-4T Series of 16 Tumors + GSM80462 series of 16 tumors GSM80462 OSCE-6T Series of 16 Tumors + GSM80476 series of 4 normals GSM80476 OSCE-2N Series of 4 Normals + GSM80477 series of 4 normals GSM80477 OSCE-9N Series of 4 Normals + + +**Results** + +- Boxplots, p-value histograms and a volcano plot +- Table summarizing the differentially expressed genes and their annotations. This table is sortable and requestable. +- Rdata object to perform further meta-analysis. +]]> + </help> + +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Analyse_tpl.html Tue Mar 21 10:28:47 2017 -0400 @@ -0,0 +1,162 @@ +<!DOCTYPE html> +<html> +<head> + <meta http-equiv="Content-type" content="text/html; charset=utf-8"> + <meta name="viewport" content="width=device-width,initial-scale=1"> + <title>Limma analysis results</title> + <link rel="stylesheet" type="text/css" href="https://cdn.datatables.net/1.10.10/css/jquery.dataTables.min.css"> + <link rel="stylesheet" type="text/css" href="https://cdn.datatables.net/buttons/1.1.0/css/buttons.dataTables.min.css"> + <style type="text/css" class="init"> + +td.details-control { + background: url('../../../static/images/fugue/toggle-expand.png') no-repeat center center; + cursor: pointer; +} +tr.shown td.details-control { + background: url('../../../static/images/fugue/toggle.png') no-repeat center center; +} + + .dataTable th,.dataTable td { + + text-overflow: ellipsis; + max-width: 200px; + min-width: 40px; + -o-text-overflow: ellipsis; + -ms-text-overflow: ellipsis; + -moz-text-overflow: ellipsis; + width: 100px; + white-space:nowrap; + overflow: hidden; + + + padding: 10px; + +} + .details th,.details td { + + max-width: 1500px; + min-width: 1000px; + padding: 10px; + +} + + + </style> + <style> + + + </style> + + <script type="text/javascript" language="javascript" src="https://code.jquery.com/jquery-1.11.3.min.js"> + </script> + <script type="text/javascript" language="javascript" src="https://cdn.datatables.net/1.10.10/js/jquery.dataTables.min.js"> + </script> + <script type="text/javascript" language="javascript" src="https://cdn.datatables.net/buttons/1.1.0/js/dataTables.buttons.min.js"> + </script> + <script type="text/javascript" language="javascript" src="https://cdn.datatables.net/buttons/1.1.0/js/buttons.flash.min.js"> + </script> + <script type="text/javascript" language="javascript" src="https://cdnjs.cloudflare.com/ajax/libs/jszip/2.5.0/jszip.min.js"> + </script> + <script type="text/javascript" language="javascript" src="https://cdn.rawgit.com/bpampuch/pdfmake/0.1.18/build/pdfmake.min.js"> + </script> + <script type="text/javascript" language="javascript" src="https://cdn.rawgit.com/bpampuch/pdfmake/0.1.18/build/vfs_fonts.js"> + </script> + <script type="text/javascript" language="javascript" src="https://cdn.datatables.net/buttons/1.1.0/js/buttons.html5.min.js"> + </script> + <script type="text/javascript" class="init"> + + + +function format ( d ) { + // `d` is the original data object for the row + var test=d[11].split("///"); + var strLength=test.length; + var result=""; + for (var i=0;i<strLength;i++) { + result=result+'<a href="http://amigo.geneontology.org/amigo/term/'+test[i]+'">'+test[i]+'</a>'+', '; + } + return '<table cellpadding="5" cellspacing="0" border="0" style="padding-left:50px;">'+ + '<tr>'+ + '<td>Gene Symbol:</td>'+ + '<td style="width:100%;overflow: hidden;white-space: normal;">'+'<a href="http://www.ncbi.nlm.nih.gov/gene/'+d[9]+'">'+d[7]+'</a>'+'</td>'+ + '</tr>'+ + '<tr>'+ + '<td>Gene Title:</td>'+ + '<td style="width:100%;overflow: hidden;white-space: normal;">'+d[8]+'</td>'+ + '</tr>'+ + '<tr>'+ + '<td>GO Function ID:</td>'+ + '<td style="width:100%;overflow: hidden;white-space: normal;">'+result+'</td>'+ + '</tr>'+ + '</table>'; +} + +var dataSet=###DATAJSON###; + +$(document).ready(function() { + var table = $('#example').DataTable( { + "scrollX": true, + data: dataSet, + "columns": [ + { + "className": 'details-control', + "orderable": false, + "data": null, + "defaultContent": '' + }, + {title : "ID" }, + {title : "adj_P_Val" }, + {title : "P_Value" }, + {title : "t" }, + {title : "B" }, + {title : "logFC" }, + {title : "Gene_symbol"}, + {title : "Gene_title"}, + {title : "Gene_ID"}, + {title : "Chromosome_annotation"}, + {title : "GO_Function_ID"} + + ], + "order": [[2, 'asc']], + dom: 'Bfrtlip', + buttons: [ + 'copy', 'csv', 'excel' + ] + + } ); + + // Add event listener for opening and closing details + $('#example tbody').on('click', 'td.details-control', function () { + var tr = $(this).closest('tr'); + var row = table.row( tr ); + + if ( row.child.isShown() ) { + // This row is already open - close it + row.child.hide(); + tr.removeClass('shown'); + } + else { + // Open this row + row.child( format(row.data()) ).show(); + tr.addClass('shown'); + } + } ); +} ); + + + + </script> +</head> +<h2>Boxplots</h2> +<img src='###BOXPLOT###'><br/> +</td><td> +<body> +<h2>P-value histogram and Volcano plot</h2> +<img src='###HIST###'><br/> +</td><td> +<body> +<table id="example" class="compact stripe row-border" cellspacing="0" width="100%"> + + </table> +</body> +</html>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/GEOQuery.R Tue Mar 21 10:28:47 2017 -0400 @@ -0,0 +1,56 @@ + +library(GEOquery) + + +cargs<-commandArgs() +cargs<-cargs[(which(cargs=="--args")+1):length(cargs)] + +GEOQueryID<-cargs[[1]] +GEOQueryData<-cargs[[2]] +GEOQueryRData<-cargs[[3]] +conditionFile<-cargs[[4]] +transformation<-cargs[[5]] + +data1 = getGEO(GEOQueryID) +eset=data1[[1]] + +#check if datas are in log2 space +normalization<-function(data){ + ex <- exprs(data) + qx <- as.numeric(quantile(ex, c(0., 0.25, 0.5, 0.75, 0.99, 1.0), na.rm=T)) + LogC <- (qx[5] > 100) || + (qx[6]-qx[1] > 50 && qx[2] > 0) || + (qx[2] > 0 && qx[2] < 1 && qx[4] > 1 && qx[4] < 2) + if (LogC) { ex[which(ex <= 0)] <- NaN + return (log2(ex)) } else { + return (ex) + } +} + +if (transformation=="auto"){ + exprs(eset)=normalization(eset) +} else if (transformation=="yes"){ + exprs(eset)=log2(exprs(eset)) +} + +matrixData=exprs(eset) +write.table(matrixData,col.names=NA,row.names=TRUE,sep="\t",file=GEOQueryData) + +#Construcion of condition file +#if there is data in "source_name_ch1" field, we keep this data as a condition +#else we keep the "description" field data. +if (length(unique(tolower(pData(data1[[1]])["source_name_ch1"][,1])))>1) +{ + conditions=pData(data1[[1]])["source_name_ch1"] + description=paste0(as.vector(pData(data1[[1]])["geo_accession"][,1]), " ",as.vector(pData(data1[[1]])["title"][,1]), " ", as.vector(conditions[,1])) +} else +{ + conditions=pData(data1[[1]])["description"] + description=paste0(as.vector(pData(data1[[1]])["geo_accession"][,1]), " ",as.vector(pData(data1[[1]])["title"][,1]), " ", as.vector(conditions[,1])) +} + +conditions[,1]=tolower(conditions[,1]) +pData(eset)["source_name_ch1"]=conditions + +write.table(cbind(conditions,description),quote = FALSE,col.names = FALSE, row.names=TRUE,file=conditionFile,sep="\t") +save(eset,conditions,file=GEOQueryRData) \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/GEOQuery.xml Tue Mar 21 10:28:47 2017 -0400 @@ -0,0 +1,66 @@ +<tool id="GEOQuery" name="GEOQuery"> + <description>GEOQuery wrapper</description> + <requirements> + <!--container type="docker">sblanck/smagexp</container--> + <requirement type="package" version="0.1.0">r-smagexp</requirement> + </requirements> + <command interpreter="python"> + <![CDATA[ + stderr_wrapper.py Rscript + ${__tool_directory__}/GEOQuery.R + $GEOQueryID + $GEOQueryData + $GEOQueryRData + $conditions + $transformation + ]]> + </command> + + <inputs> + <param name="GEOQueryID" type="text" size="12" optional="false" label="GEOQuery ID" help=""> + <validator type="empty_field"/> + </param> + <param name="transformation" type="select" label="log2 transformation"> + <option value="auto">auto</option> + <option value="yes">yes</option> + <option value="no">no</option> + </param> + </inputs> + + <outputs> + <data format="tabular" name="GEOQueryData" label="GEOQuery Data of ${GEOQueryID}"/> + <data format="cond" name="conditions" label="conditions of ${GEOQueryID}"/> + <data format="rdata" name="GEOQueryRData" label="GEOQuery RData of ${GEOQueryID}"/> + </outputs> + <help> +<![CDATA[ +**What it does** + +This tool fetches microarray data directly from GEO database, based on the GEOQuery R package. Given a GSE accession ID, it returns an Rdata object containing the data and a text file (.cond file) summarizing the conditions of the experiment. +The .cond file is a text file containing one line per sample in the experiment. Each line is made of 3 columns: + + - Sample ID + - Condition of the biological sample + - Description of the biological sample + +**Example** of .cond file +:: + + Sample ID Condition Description + GSM80460 series of 16 tumors GSM80460 OSCE-2T SERIES OF 16 TUMORS + GSM80461 series of 16 tumors GSM80461 OSCE-4T Series of 16 Tumors + GSM80462 series of 16 tumors GSM80462 OSCE-6T Series of 16 Tumors + GSM80476 series of 4 normals GSM80476 OSCE-2N Series of 4 Normals + GSM80477 series of 4 normals GSM80477 OSCE-9N Series of 4 Normals + +**Results** + +- Rdata object containing data for further analysis +- Condition (.cond) file summarizing conditions of the experiment +- Tabular (.txt) file containing expression data for each sample +]]> + + </help> + + +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ImportDataFromMatrix.R Tue Mar 21 10:28:47 2017 -0400 @@ -0,0 +1,74 @@ +library(Biobase) +library(GEOquery) +library(GEOmetadb) +library(limma) +library(jsonlite) +library(affy) +library(dplyr) +library(affyPLM) + +cargs<-commandArgs() +cargs<-cargs[(which(cargs=="--args")+1):length(cargs)] +nbargs=length(cargs) + +dataFile=cargs[[nbargs-9]] +normalization=cargs[[nbargs-8]] +conditionsFile=cargs[[nbargs-7]] +annotation=cargs[[nbargs-6]] +result_export_eset=cargs[[nbargs-5]] +result=cargs[[nbargs-4]] +result.path=cargs[[nbargs-3]] +result.template=cargs[[nbargs-2]] + +dir.create(result.path, showWarnings = TRUE, recursive = FALSE) + +data=as.matrix(read.table(file = dataFile)) +conditions=read.table(file=conditionsFile,sep = "\t",row.names=1) +htmlfile=readChar(result.template, file.info(result.template)$size) + +colnames(conditions)=c("source_name_ch1","description") +phenodata<-new("AnnotatedDataFrame",data=conditions) + +eset=ExpressionSet(assayData=data,phenoData=phenodata,annotation=annotation) + +if (normalization == "quantile") { + eset <- normalize.ExpressionSet.quantiles(eset, transfn="log") +} else if (normalization == "log2") { + exprs(eset) = log2(exprs(eset)) +} + +boxplotnorm="boxplotnorm.png" +png(boxplotnorm,width=800,height = 400) +par(mar=c(7,5,1,1)) +boxplot(data.frame(exprs(eset)),las=2,outline=FALSE) +dev.off() +htmlfile=gsub(x=htmlfile,pattern = "###BOXPLOTNORM###",replacement = boxplotnorm, fixed = TRUE) +file.copy(boxplotnorm,result.path) + +plotMAnorm="plotMAnorm.png" +nblines=length(colnames(data))%/%3 + as.numeric((length(colnames(data))%%3)!=0) +png(plotMAnorm,width=800,height =300*nblines ) +par(mfrow=c(nblines,3)) +#for (i in 1:length(colnames(data))){ + MAplot(eset) +#} + +dev.off() +htmlfile=gsub(x=htmlfile,pattern = "###PLOTMANORM###",replacement = plotMAnorm, fixed = TRUE) +file.copy(plotMAnorm,result.path) +#write.table(tolower(c(condition1Name,condition2Name)),quote = FALSE,col.names = FALSE, row.names=FALSE,file=result_export_conditions) +#saveConditions=c(condition1Name,condition2Name) +save(eset,file=result_export_eset) +write(htmlfile,result) + +#l=list() +#for(i in 1:length(esets)) +#{ +# l[[paste("study",i,sep="")]]<-res[[i]] +#} +#l[["Meta"]]=res[[length(res)-1]] +#showVenn(res,file.path(temp.files.path,"venn.png")) +#writeLines(c("<h2>Venn diagram</h2>"),file.conn) +#writeLines(c("<img src='venn.png'><br/><br/>"),file.conn) +#writeLines(c("</body></html>"),file.conn) +#close(file.conn) \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ImportDataFromMatrix.xml Tue Mar 21 10:28:47 2017 -0400 @@ -0,0 +1,79 @@ +<tool id="importMatrixData" name="Import custom data"> + <description>Quality control and normalization of a custom matrix expression data</description> + <requirements> + <!--container type="docker">sblanck/smagexp</container--> + <requirement type="package" version="0.1.0">r-smagexp</requirement> + </requirements> + <command interpreter="python"> + <![CDATA[ + stderr_wrapper.py Rscript + ${__tool_directory__}/ImportDataFromMatrix.R + $input + $normalization + $conditions + $annotations + $result_export_eset + $result_html + $result_html.files_path + ${__tool_directory__}/ImportDataFromMatrix_tpl.html + ]]> + </command> + + + <inputs> + <param name="input" type="data" format="tabular" label="Input data" help="Input data"/> + <param name="normalization" type="select" label="Preprocessing/normalization"> + <option value="quantile">quantile normalization + log2</option> + <option value="log2">log2 only</option> + <option value="none">none</option> + </param> + <param name="conditions" type="data" format="cond" label="Conditions" help="conditions associated with the input file"/> + <param name="annotations" type="text" label="Annotation GPL code" help="GPL code for annotations"/> + </inputs> + + <outputs> + <data format="rdata" name="result_export_eset" label="export normalized expressionSet"/> + <data format="html" name="result_html" label="QC result"/> + </outputs> + + <help> +<![CDATA[ +**What it does** + +This tool imports data stored in a tabular text file. +Column titles (chip IDs) must match the IDs of the .cond file. +GPL annotation code is also required to fetch annotations from GEO. + +**Exemple** + +Header of input tabular text file +:: + +"" "GSM80460" "GSM80461" "GSM80462" "GSM80463" "GSM80464" +"1007_s_at" -0.0513991525066443 0.306845500314283 0.0854246562526777 -0.142417044615852 0.0854246562526777 +"1053_at" -0.187707155126729 -0.488026018218199 -0.282789700980404 0.160920188181103 0.989865622866287 +"117_at" 0.814755482809874 -2.15842936260448 -0.125006361067033 -0.256700472111743 0.0114956388378294 +"121_at" -0.0558912008920451 -0.0649174766813385 0.49467161164755 -0.0892673380970874 0.113700499164728 +"1294_at" 0.961993677420255 -0.0320936297098533 -0.169744675832317 -0.0969617298870879 -0.181149439104566 +"1316_at" 0.0454429707611671 0.43616183931445 -0.766111939825723 -0.182786075741673 0.599317793698226 +"1405_i_at" 2.23450132056221 0.369606070031838 -1.06190243892591 -0.190997225060914 0.595503660502742 + + +The .cond file should look like this +:: + + Sample ID Condition Description + GSM80460 series of 16 tumors GSM80460 OSCE-2T SERIES OF 16 TUMORS + GSM80461 series of 16 tumors GSM80461 OSCE-4T Series of 16 Tumors + GSM80462 series of 16 tumors GSM80462 OSCE-6T Series of 16 Tumors + GSM80476 series of 4 normals GSM80476 OSCE-2N Series of 4 Normals + GSM80477 series of 4 normals GSM80477 OSCE-9N Series of 4 Normals + + +**Results** + - Bboxplots and MA plots + - Rdata object containing the data for further analysis. +]]> + </help> + +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ImportDataFromMatrix_tpl.html Tue Mar 21 10:28:47 2017 -0400 @@ -0,0 +1,22 @@ +<!DOCTYPE html> +<html> +<head> + <meta http-equiv="Content-type" content="text/html; charset=utf-8"> + <meta name="viewport" content="width=device-width,initial-scale=1"> + <title>Quality Control</title> + +</head> + +<body> + +<h2>Normalized data</h2> +<table><tr><td> +<img src='###BOXPLOTNORM###' width="800"><br/> +</td></tr></table> + +<table><tr><td> +<img src='###PLOTMANORM###' width="800"><br/> +</td></tr></table> + +</body> +</html>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/MetaMA.R Tue Mar 21 10:28:47 2017 -0400 @@ -0,0 +1,209 @@ +library(metaMA) +library(affy) +library(annaffy) +library(VennDiagram) +library(GEOquery) + +cargs<-commandArgs() +cargs<-cargs[(which(cargs=="--args")+1):length(cargs)] + +nbargs=length(cargs) +rdataList=list() +condition1List=list() +condition2List=list() + +for (i in 1:(nbargs-5)) +{ + Rdata=cargs[[i]] + #condition1=cargs[[i+1]] + #condition2=cargs[[i+2]] + load(Rdata) + + rdataList=c(rdataList,(eset)) + #condition1List=c(condition1List,condition1) + #condition2List=c(condition2List,condition2) + condition1List=c(condition1List,saveConditions[1]) + condition2List=c(condition2List,saveConditions[2]) + +} + +#tables<-cargs[[1]] +#tech<-cargs[[2]] +result.html<-cargs[[nbargs-4]] +result.path<-cargs[[nbargs-3]] +#result.venn<-cargs[[nbargs-3]] +result.template<-cargs[[nbargs-2]] + +#sink("/dev/null") +#dir.create(temp.files.path,recursive=TRUE) +#file.conn=file(diag.html,open="w") + +#writeLines(c("<html><body bgcolor='lightgray'>"),file.conn) + +showVenn<-function(res,file) +{ + venn.plot<-venn.diagram(x = c(res[c(1:(length(res)-3))],meta=list(res$Meta)), + filename = NULL, col = "black", + fill = c(1:(length(res)-2)), + margin=0.05, alpha = 0.6) + jpeg(file) + grid.draw(venn.plot) + dev.off() +} + + +library("org.Hs.eg.db") +x <- org.Hs.egUNIGENE +mapped_genes <- mappedkeys(x) +link <- as.list(x[mapped_genes]) + +probe2unigene<-function(expset){ + #construction of the map probe->unigene + probes=rownames(exprs(expset)) + gene_id=fData(expset)[probes,"ENTREZ_GENE_ID"] + unigene=link[gene_id] + names(unigene)<-probes + probe_unigene=unigene +} + +unigene2probe<-function(map) +{ + suppressWarnings(x <- cbind(unlist(map), names(map))) + unigene_probe=split(x[,2], x[,1]) +} + +convert2metaMA<-function(listStudies,mergemeth=mean) +{ + if (!(class(listStudies) %in% c("list"))) { + stop("listStudies must be a list") + } + conv_unigene=lapply(listStudies, + FUN=function(x) unigene2probe(probe2unigene(x))) + + id=lapply(conv_unigene,names) + inter=Reduce(intersect,id) + if(length(inter)<=0){stop("no common genes")} + print(paste(length(inter),"genes in common")) + esets=lapply(1:length(listStudies),FUN=function(i){ + l=lapply(conv_unigene[[i]][inter], + FUN=function(x) exprs(listStudies[[i]])[x,,drop=TRUE]) + esetsgr=t(sapply(l,FUN=function(ll) if(is.null(dim(ll))){ll} + else{apply(ll,2,mergemeth)})) + esetsgr + }) + return(list(esets=esets,conv.unigene=conv_unigene)) +} + +normalization<-function(data){ + ex <- exprs(data) + qx <- as.numeric(quantile(ex, c(0., 0.25, 0.5, 0.75, 0.99, 1.0), na.rm=T)) + LogC <- (qx[5] > 100) || + (qx[6]-qx[1] > 50 && qx[2] > 0) || + (qx[2] > 0 && qx[2] < 1 && qx[4] > 1 && qx[4] < 2) + if (LogC) { ex[which(ex <= 0)] <- NaN + return (log2(ex)) } else { + return (ex) + } +} + + + + +filterCondition<-function(gset,condition1, condition2){ + selected=c(which((tolower(as.character(pData(gset)["source_name_ch1"][,1]))==condition1)), + which(tolower(as.character(pData(gset)["source_name_ch1"][,1]))==condition2)) + + return(gset[,selected]) + } + +rdatalist <- lapply(rdataList, FUN=function(datalist) normalization(datalist)) + +classes=list() +filteredRdataList=list() +for (i in 1:length(rdatalist)) +{ + currentData=rdataList[[i]] + currentCondition1=condition1List[[i]] + currentCondition2=condition2List[[i]] + #currentData=filterCondition(currentData,currentCondition1,currentCondition2) + currentClasses=as.numeric(tolower(as.character(pData(currentData)["source_name_ch1"][,1]))==currentCondition1) + filteredRdataList=c(filteredRdataList,currentData) + classes=c(classes,list(currentClasses)) + #write(file="~/galaxy-modal/classes.txt",classes) +} + +#rdataList=filteredRdataList +conv=convert2metaMA(rdataList) +esets=conv$esets +conv_unigene=conv$conv.unigene + +#write(file="~/galaxy-modal/esets.txt",length(esets)) +#write(file="~/galaxy-modal/classes.txt",length(classes)) +res=pvalcombination(esets=esets,classes=classes,moderated="limma") +resIDDIRR=IDDIRR(res$Meta,res$AllIndStudies) +length(res$Meta) +Hs.Meta=rownames(esets[[1]])[res$Meta] +origId.Meta=lapply(conv_unigene,FUN=function(vec) as.vector(unlist(vec[Hs.Meta]))) + +gpllist <- lapply(rdataList, FUN=function(ann) annotation(ann)) +platflist <- lapply(gpllist, FUN=function(gpl) getGEO(gpl, AnnotGPL=TRUE)) +ncbifdlist <- lapply(platflist, FUN=function(data) data.frame(attr(dataTable(data), "table"))) +ncbifdresult=lapply(1:length(origId.Meta), FUN=function(i) ncbifdlist[[i]][which(ncbifdlist[[i]]$ID %in% origId.Meta[[i]]),]) +ncbidfannot=do.call(rbind,ncbifdresult) +ncbidfannot <- subset(ncbidfannot, select=c("Platform_SPOTID","ID","Gene.symbol","Gene.title","Gene.ID","Chromosome.annotation","GO.Function.ID")) + +library(jsonlite) +matrixncbidfannot=as.matrix(ncbidfannot) +datajson=toJSON(matrixncbidfannot,pretty = TRUE) +summaryjson=toJSON(as.matrix(t(resIDDIRR)),pretty = TRUE) + + +#vennsplit=strsplit(result.venn,split="/")[[1]] +#venn=paste0("./",vennsplit[length(vennsplit)]) + + +vennFilename="venn.png" +vennFile=file.path(result.path,vennFilename) +htmlfile=readChar(result.template, file.info(result.template)$size) +htmlfile=gsub(x=htmlfile,pattern = "###DATAJSON###",replacement = datajson, fixed = TRUE) +htmlfile=gsub(x=htmlfile,pattern = "###SUMMARYJSON###",replacement = summaryjson, fixed = TRUE) +htmlfile=gsub(x=htmlfile,pattern = "###VENN###",replacement = vennFilename, fixed = TRUE) +write(htmlfile,result.html) + +library(VennDiagram) +flog.threshold(ERROR) + +#venn.plot<-venn.diagram(x = c(res[c(1:(length(res)-3))],meta=list(res$Meta)),filename = v, col = "black", fill = c(1:(length(res)-2)), margin=0.05, alpha = 0.6,imagetype = "png") +dir.create(result.path, showWarnings = TRUE, recursive = FALSE) + +showVenn<-function(liste,file) +{ + venn.plot<-venn.diagram(x = liste, + filename = vennFilename, col = "black", + fill = 1:length(liste)+1, + margin=0.05, alpha = 0.6,imagetype = "png") +# png(file); +# grid.draw(venn.plot); +# dev.off(); + +} + +l=list() +for(i in 1:length(esets)) +{ + l[[paste("study",i,sep="")]]<-res[[i]] +} +l[["Meta"]]=res[[length(res)-1]] +showVenn(l,vennFile) +file.copy(vennFilename,result.path) +#l=list() +#for(i in 1:length(esets)) +#{ +# l[[paste("study",i,sep="")]]<-res[[i]] +#} +#l[["Meta"]]=res[[length(res)-1]] +#showVenn(res,result.venn) +#writeLines(c("<h2>Venn diagram</h2>"),file.conn) +#writeLines(c("<img src='venn.png'><br/><br/>"),file.conn) +#writeLines(c("</body></html>"),file.conn) +#close(file.conn) \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/MetaMA.xml Tue Mar 21 10:28:47 2017 -0400 @@ -0,0 +1,51 @@ +<tool id="metaMA" name="Microarray data meta-analysis" version="0.1.0"> + <description>Perform meta-analysis thanks to metaMA.</description> + <requirements> + <!--container type="docker">sblanck/smagexp</container--> + <requirement type="package" version="0.1.0">r-smagexp</requirement> + </requirements> + <command interpreter="python"> + <![CDATA[ + stderr_wrapper.py Rscript + ${__tool_directory__}/MetaMA.R + #for $currentInput in $inputList + "${currentInput.rdataset}" + #end for + $result_html + $result_html.extra_files_path + ${__tool_directory__}/MetaMa_tpl.html + ]]> + </command> + + <inputs> + <repeat name="inputList" title="Datasets"> + <param name="rdataset" type="data" format="rdata" label="RData" help="RData to be used"/> + </repeat> + </inputs> + + <outputs> + <data format="html" name="result_html" label="Meta-analysis results"/> + </outputs> + + <help> +<![CDATA[ +**What it does** + +Given several Rdata object this tool run a meta-analysis using the metaMA R package. + +**Results** + +- Venn Diagram summarizing the results of the meta-analysis +- A list of indicators to evaluate the quality of the performance of the meta-analysis + + - DE : Number of differentially expressed genes + - IDD (Integration Driven discoveries) : number of genes that are declared differentially expressed in the meta-analysis that were not identified in any of the single studies alone + - Loss : Number of genes that are identified differentially expressed in single studies but not in meta-analysis + - DR (Integration-driven Discovery Rate) : corresponding proportion of IDD + - IRR (Integration-driven Revision) : corresponding proportion of Loss + +- Fully sortable and requestable table, with gene annotations and hypertext links to NCBI gene database. +]]> + </help> + +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/MetaMa_tpl.html Tue Mar 21 10:28:47 2017 -0400 @@ -0,0 +1,188 @@ +<!DOCTYPE html> +<html> +<head> + <meta http-equiv="Content-type" content="text/html; charset=utf-8"> + <meta name="viewport" content="width=device-width,initial-scale=1"> + <title>DataTables example - Ajax sourced data</title> + <link rel="stylesheet" type="text/css" href="https://cdn.datatables.net/1.10.10/css/jquery.dataTables.min.css"> + <link rel="stylesheet" type="text/css" href="https://cdn.datatables.net/buttons/1.1.0/css/buttons.dataTables.min.css"> + + <!--link rel="stylesheet" type="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.6/css/bootstrap.min.css"> + <link rel="stylesheet" type="https://cdn.datatables.net/1.10.11/css/dataTables.bootstrap.min.css"--> + + <style type="text/css" class="init"> + +td.details-control { + background: url('../../../static/images/fugue/toggle-expand.png') no-repeat center center; + cursor: pointer; +} +tr.shown td.details-control { + background: url('../../../static/images/fugue/toggle.png') no-repeat center center; +} + + .dataTable th,.dataTable td { + + text-overflow: ellipsis; + max-width: 200px; + min-width: 40px; + -o-text-overflow: ellipsis; + -ms-text-overflow: ellipsis; + -moz-text-overflow: ellipsis; + width: 100px; + white-space:nowrap; + overflow: hidden; + + + padding: 10px; + +} + + + </style> + + <script type="text/javascript" language="javascript" src="https://code.jquery.com/jquery-1.12.0.min.js"> + </script> + <script type="text/javascript" language="javascript" src="https://cdn.datatables.net/1.10.11/js/jquery.dataTables.min.js"> + </script> + <!--script type="text/javascript" language="javascript" src="https://cdn.datatables.net/1.10.11/js/dataTables.bootstrap.min.js"> + </script--> + <script type="text/javascript" language="javascript" src="https://cdn.datatables.net/buttons/1.1.0/js/dataTables.buttons.min.js"> + </script> + <script type="text/javascript" language="javascript" src="https://cdn.datatables.net/buttons/1.1.0/js/buttons.flash.min.js"> + </script> + <script type="text/javascript" language="javascript" src="https://cdnjs.cloudflare.com/ajax/libs/jszip/2.5.0/jszip.min.js"> + </script> + <script type="text/javascript" language="javascript" src="https://cdn.rawgit.com/bpampuch/pdfmake/0.1.18/build/pdfmake.min.js"> + </script> + <script type="text/javascript" language="javascript" src="https://cdn.rawgit.com/bpampuch/pdfmake/0.1.18/build/vfs_fonts.js"> + </script> + <script type="text/javascript" language="javascript" src="https://cdn.datatables.net/buttons/1.1.0/js/buttons.html5.min.js"> + </script> + <script type="text/javascript" class="init"> + + + +function format ( d ) { + // `d` is the original data object for the row + var test=d[6].split("///"); + var strLength=test.length; + var result=""; + for (var i=0;i<strLength;i++) { + result=result+'<a href="http://amigo.geneontology.org/amigo/term/'+test[i]+'">'+test[i]+'</a>'+', '; + } + return '<table cellpadding="5" cellspacing="0" border="0" style="padding-left:50px;">'+ + '<tr>'+ + '<td>Gene Symbol:</td>'+ + '<td style="width:100%;overflow: hidden;white-space: normal;">'+'<a href="http://www.ncbi.nlm.nih.gov/gene/'+d[4]+'">'+d[2]+'</a>'+'</td>'+ + '</tr>'+ + '<tr>'+ + '<td>Gene Title:</td>'+ + '<td style="width:100%;overflow: hidden;white-space: normal;">'+d[3]+'</td>'+ + '</tr>'+ + '<tr>'+ + '<td>GO Function ID:</td>'+ + '<td style="width:100%;overflow: hidden;white-space: normal;">'+result+'</td>'+ + '</tr>'+ + '</table>'; +} + +var dataSet=###DATAJSON###; +var summarySet=###SUMMARYJSON###; + +$(document).ready(function() { + var table = $('#example').DataTable( { + "scrollX": true, + data: dataSet, + "columns": [ + { + "className": 'details-control', + "orderable": false, + "data": null, + "defaultContent": '' + }, + {title : "ID" }, + {title : "Gene_symbol"}, + {title : "Gene_title"}, + {title : "Gene_ID"}, + {title : "Chromosome_annotation"}, + {title : "GO_Function_ID"} + + ], + + dom: 'Bfrtlip', + buttons: [ + 'copy', 'csv', 'excel' + ] + + } ); + + // Add event listener for opening and closing details + $('#example tbody').on('click', 'td.details-control', function () { + var tr = $(this).closest('tr'); + var row = table.row( tr ); + + if ( row.child.isShown() ) { + // This row is already open - close it + row.child.hide(); + tr.removeClass('shown'); + } + else { + // Open this row + row.child( format(row.data()) ).show(); + tr.addClass('shown'); + } + } ); +} ); + + + +$(document).ready(function() { + $('#summary').DataTable( { + "columnDefs": [ + { className: "dt-center", targets: '_all' } + ], + "paging": false, + "ordering": false, + "info": false, + "searching": false, + data: summarySet, + "columns": [ + {title : "DE" }, + {title : "IDD" }, + {title : "Loss" }, + {title : "IDR" }, + {title : "IRR" }, + + + ], + + } ); +} ); + + + + </script> +</head> + +<body> + +<table><tr><td> +<h2>Venn diagram</h2> +<img src='###VENN###' height="400" width="400"><br/> +</td><td> + +</td></tr></table> + +<h2>Summary</h2> +<table id="summary" class="compact stripe row-border" cellspacing="0" width="100%"> </table> +</br> +DE : Number of differentially expressed genes </br> +IDD (Integration Driven discoveries) : number of genes that are declared DE in the meta-analysis that were not identified in any of the individual studies alone</br> +Loss : Number of genes that are identified DE in individual studies but not in meta-analysis </br> +IDR (Integration-driven Discovery Rate) : corresponding proportions of IDD </br> +IRR (Integration-driven Revision) : corresponding proportions of Loss</br> +<h2>Details</h2> +<table id="example" class="compact stripe row-border" cellspacing="0" width="100%"> +</table> +</body> +</html>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/MetaRNASeq.R Tue Mar 21 10:28:47 2017 -0400 @@ -0,0 +1,213 @@ +cargs <- commandArgs() +cargs <- cargs[(which(cargs == "--args")+1):length(cargs)] +nbargs=length(cargs) +listfiles=vector() +listfilenames=vector() +for (i in seq(1,nbargs-6,2)) { + listfiles=c(listfiles,cargs[[i]]) + listfilenames=c(listfilenames,cargs[[i+1]]) +} +#mod<-cargs[[length(cargs) - 6]] +outputfile <- cargs[[length(cargs) - 5]] +result.html = cargs[[length(cargs) - 4]] +html.files.path=cargs[[length(cargs) - 3]] +result.template=cargs[[length(cargs) - 2]] + +alpha=0.05 + +#print(comparison) + +listData=lapply(listfiles,read.table) +orderData=lapply(listData, function(x) x[order(x[1]), ]) +rawpval=lapply(orderData,function(x) x[6]) +rawpval=lapply(rawpval, function(x) as.numeric(unlist(x))) + +DE=list() +DE=lapply(orderData, function(x) ifelse(x[7]<=0.05,1,0)) + +FC=list() +FC=lapply(orderData, function(x) x[3]) + +DE=as.data.frame(DE) +colnames(DE)=listfilenames +FC=as.data.frame(FC) +colnames(FC)=listfilenames +# the comparison must only have two values and the conds must +# be a vector from those values, at least one of each. + +#if (length(comparison) != 2) { +# stop("Comparison type must be a tuple: ", cargs[length(cargs) - 8]) +#} + +sink("/dev/null") +dir.create(html.files.path, recursive=TRUE) +#library(DESeq) +#library(HTSFilter) + +#DE=list() +#FC=list() +#i=1 + +# Open the html output file +#file.conn = file(diag.html, open="w") + +#writeLines( c("<html><body>"), file.conn) + +# Perform deseq analysis on each study +#for(i in 1:length(listfiles)) +#{ +# f=listfiles[i] +# fname=listfilenames[i] +# study_name=unlist(strsplit(fname,"[.]"))[1] +# print(paste0("study.name ",study_name)) +# d <- read.table(f, sep=" ", header=TRUE, row.names=1) +# conds<-sapply(strsplit(colnames(d),"[.]"),FUN=function(x) x[1]) +# if (length(unique(conds)) != 2) { +# warning(as.data.frame(strsplit(colnames(d),"[.]"))) +# stop("You can only have two columns types: ", paste(conds,collapse=" ")) +# } +# if (!identical(sort(comparison), sort(unique(conds)))) { +# stop("Column types must use the two names from Comparison type, and vice versa. Must have at least one of each in the Column types.\nColumn types: ", cargs[2], "\n", "Comparison type: ", cargs[3]) +# } +# if (length(d) != length(conds)) { +# stop("Number of total sample columns in counts file must correspond to the columns types field. E.g. if column types is 'kidney,kidney,liver,liver' then number of sample columns in counts file must be 4 as well.") +# } +# +# cds <- newCountDataSet(d, conds) +# cds <- estimateSizeFactors(cds) +# +# cdsBlind <- estimateDispersions( cds, method="blind" ) +# +# if (length(conds) != 2) { +# cds <- estimateDispersions( cds ) +# norep = FALSE +# } +# +# if (length(conds) == 2) { +# cds <- estimateDispersions( cds, method=method, sharingMode=mod, fitType="parametric" ) +# norep = TRUE +# } +# +# filter<-HTSFilter(cds, plot=FALSE) +# cds.filter<-filter$filteredData +# on.index<-which(filter$on==1) +# +# res<-as.data.frame(matrix(NA,nrow=nrow(cds),ncol=ncol(cds))) +# nbT <- nbinomTest(cds.filter, comparison[1], comparison[2]) +# colnames(res)<-colnames(nbT) +# res[on.index,]<-nbT +# #write.table(res[order(res$padj), ], file=outputfile, quote=FALSE, row.names=FALSE, sep="\t") +# +# +# temp.pval.plot = file.path( html.files.path, paste("PvalHist",i,".png",sep="")) +# png( temp.pval.plot, width=500, height=500 ) +# hist(res$pval, breaks=100, col="skyblue", border="slateblue", main="") +# dev.off() +# +# writeLines( c("<h2>P-value histogram for ",study_name,"</h2>"), file.conn) +# writeLines( c("<img src='PvalHist",i,".png'><br/><br/>"), file.conn) +# +# #on enregistre la p-value +# rawpval[[study_name]]<-res$pval +# DE[[study_name]]<-ifelse(res$padj<=alpha,1,0) +# FC[[study_name]]<-res$log2FoldChange +# +# i=i+1 +#} + + +# combinations +library(metaRNASeq) +fishcomb<-fishercomb(rawpval, BHth=alpha) +warning(length(rawpval)) +invnormcomb<-invnorm(rawpval, nrep=c(8,8), BHth=alpha) +#DE[["fishercomb"]]<-ifelse(fishcomb$adjpval<=alpha,1,0) +#DE[["invnormcomb"]]<-ifelse(invnormcomb$adjpval<=alpha,1,0) + +signsFC<-mapply(FC,FUN=function(x) sign(x)) +sumsigns<-apply(signsFC,1,sum) +commonsgnFC<-ifelse(abs(sumsigns)==dim(signsFC)[2],sign(sumsigns),0) + +DEresults <- data.frame(DE=DE,"DE.fishercomb"=ifelse(fishcomb$adjpval<=alpha,1,0),"DE.invnorm"=ifelse(invnormcomb$adjpval<=alpha,1,0)) + +unionDE <- unique(c(fishcomb$DEindices,invnormcomb$DEindices)) +FC.selecDE <- data.frame(DEresults[unionDE,],FC[unionDE,],signFC=commonsgnFC[unionDE]) +keepDE <- FC.selecDE[which(abs(FC.selecDE$signFC)==1),] + +fishcomb_de <- rownames(keepDE)[which(keepDE[,"DE.fishercomb"]==1)] +invnorm_de <- rownames(keepDE)[which(keepDE[,"DE.invnorm"]==1)] +indstudy_de = list() +for (i in 1:length(listfiles)) { + currentIndstudy_de = rownames(keepDE)[which(keepDE[,i]==1)] + indstudy_de[[listfilenames[i]]]=currentIndstudy_de +} + +IDDIRRfishcomb=IDD.IRR(fishcomb_de,indstudy_de) +IDDIRRinvnorm=IDD.IRR(invnorm_de,indstudy_de) + +#conflits<-data.frame(ID=listData[[1]][rownames(DEresults),1],Fishercomb=DEresults[["DE.fishercomb"]],Invnormcomb=DEresults[["DE.invnorm"]],sign=commonsgnFC) +conflits<-data.frame(ID=listData[[1]][rownames(DEresults),1],DE=DEresults,FC=FC,signFC=commonsgnFC) +#write DE outputfile +write.table(conflits, outputfile,sep="\t",,row.names=FALSE) +library(VennDiagram) +DE_num=apply(DEresults, 2, FUN=function(x) which(x==1)) +venn.plot<-venn.diagram(x=as.list(DE_num),filename=NULL, col="black", fill=1:length(DE_num)+1,alpha=0.6) +temp.venn.plot = file.path( html.files.path, paste("venn.png")) +png(temp.venn.plot,width=500,height=500) +grid.draw(venn.plot) +dev.off() + +library(jsonlite) +matrixConflits=as.matrix(conflits) +datajson=toJSON(matrixConflits,pretty = TRUE) +summaryFishcombjson=toJSON(as.matrix(t(IDDIRRfishcomb)),pretty = TRUE) +summaryinvnormjson=toJSON(as.matrix(t(IDDIRRinvnorm)),pretty = TRUE) + + +#vennsplit=strsplit(result.venn,split="/")[[1]] +#venn=paste0("./",vennsplit[length(vennsplit)]) + + +vennFilename="venn.png" +vennFile=file.path(html.files.path,vennFilename) +htmlfile=readChar(result.template, file.info(result.template)$size) +htmlfile=gsub(x=htmlfile,pattern = "###DATAJSON###",replacement = datajson, fixed = TRUE) +htmlfile=gsub(x=htmlfile,pattern = "###FISHSUMMARYJSON###",replacement = summaryFishcombjson, fixed = TRUE) +htmlfile=gsub(x=htmlfile,pattern = "###INVSUMMARYJSON###",replacement = summaryinvnormjson, fixed = TRUE) +htmlfile=gsub(x=htmlfile,pattern = "###VENN###",replacement = vennFilename, fixed = TRUE) +write(htmlfile,result.html) + +#library(VennDiagram) +#flog.threshold(ERROR) +# +##venn.plot<-venn.diagram(x = c(res[c(1:(length(res)-3))],meta=list(res$Meta)),filename = v, col = "black", fill = c(1:(length(res)-2)), margin=0.05, alpha = 0.6,imagetype = "png") +#dir.create(result.path, showWarnings = TRUE, recursive = FALSE) +# +#showVenn<-function(liste,file) +#{ +# venn.plot<-venn.diagram(x = liste, +# filename = vennFilename, col = "black", +# fill = 1:length(liste)+1, +# margin=0.05, alpha = 0.6,imagetype = "png") +## png(file); +## grid.draw(venn.plot); +## dev.off(); +# +#} +# +#l=list() +#for(i in 1:length(esets)) +#{ +# l[[paste("study",i,sep="")]]<-res[[i]] +#} +#l[["Meta"]]=res[[length(res)-1]] +#showVenn(l,vennFile) +#file.copy(vennFilename,result.path) + + +#writeLines( c("<h2>Venn Plot</h2>"), file.conn) +#writeLines( c("<img src='venn.png'><br/><br/>"), file.conn) +#writeLines( c("</body></html>"), file.conn) +#close(file.conn) +#print("passe6") +#sink(NULL)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/MetaRNASeq_tpl.html Tue Mar 21 10:28:47 2017 -0400 @@ -0,0 +1,167 @@ +<!DOCTYPE html> +<html> +<head> + <meta http-equiv="Content-type" content="text/html; charset=utf-8"> + <meta name="viewport" content="width=device-width,initial-scale=1"> + <title>DataTables example - Ajax sourced data</title> + <link rel="stylesheet" type="text/css" href="https://cdn.datatables.net/1.10.10/css/jquery.dataTables.min.css"> + <link rel="stylesheet" type="text/css" href="https://cdn.datatables.net/buttons/1.1.0/css/buttons.dataTables.min.css"> + + <!--link rel="stylesheet" type="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.6/css/bootstrap.min.css"> + <link rel="stylesheet" type="https://cdn.datatables.net/1.10.11/css/dataTables.bootstrap.min.css"--> + + <style type="text/css" class="init"> + +td.details-control { + background: url('../../../static/images/fugue/toggle-expand.png') no-repeat center center; + cursor: pointer; +} +tr.shown td.details-control { + background: url('../../../static/images/fugue/toggle.png') no-repeat center center; +} + + .dataTable th,.dataTable td { + + text-overflow: ellipsis; + max-width: 200px; + min-width: 40px; + -o-text-overflow: ellipsis; + -ms-text-overflow: ellipsis; + -moz-text-overflow: ellipsis; + width: 100px; + white-space:nowrap; + overflow: hidden; + + + padding: 10px; + +} + + + </style> + + <script type="text/javascript" language="javascript" src="https://code.jquery.com/jquery-1.12.0.min.js"> + </script> + <script type="text/javascript" language="javascript" src="https://cdn.datatables.net/1.10.11/js/jquery.dataTables.min.js"> + </script> + <!--script type="text/javascript" language="javascript" src="https://cdn.datatables.net/1.10.11/js/dataTables.bootstrap.min.js"> + </script--> + <script type="text/javascript" language="javascript" src="https://cdn.datatables.net/buttons/1.1.0/js/dataTables.buttons.min.js"> + </script> + <script type="text/javascript" language="javascript" src="https://cdn.datatables.net/buttons/1.1.0/js/buttons.flash.min.js"> + </script> + <script type="text/javascript" language="javascript" src="https://cdnjs.cloudflare.com/ajax/libs/jszip/2.5.0/jszip.min.js"> + </script> + <script type="text/javascript" language="javascript" src="https://cdn.rawgit.com/bpampuch/pdfmake/0.1.18/build/pdfmake.min.js"> + </script> + <script type="text/javascript" language="javascript" src="https://cdn.rawgit.com/bpampuch/pdfmake/0.1.18/build/vfs_fonts.js"> + </script> + <script type="text/javascript" language="javascript" src="https://cdn.datatables.net/buttons/1.1.0/js/buttons.html5.min.js"> + </script> + <script type="text/javascript" class="init"> + +var dataSet=###DATAJSON###; +var fishsummarySet=###FISHSUMMARYJSON###; +var invsummarySet=###INVSUMMARYJSON###; + +$(document).ready(function() { + var table = $('#example').DataTable( { + "scrollX": true, + data: dataSet, + "columns": [ + {title : "ID" }, + {title : "Fishercomb" }, + {title : "Invnormcomb" }, + {title : "Sign" }, + ], + "order": [[0, 'asc']], + dom: 'Bfrtlip', + buttons: [ + 'copy', 'csv', 'excel' + ] + + } ); + +} ); + + + +$(document).ready(function() { + $('#fishsummary').DataTable( { + "columnDefs": [ + { className: "dt-center", targets: '_all' }], + "paging": false, + "ordering": false, + "info": false, + "searching": false, + data: fishsummarySet, + "columns": [ + {title : "DE" }, + {title : "IDD" }, + {title : "Loss" }, + {title : "IDR" }, + {title : "IRR" } + + + ] + + } ); +} ); + +$(document).ready(function() { + $('#invsummary').DataTable( { + "columnDefs": [ + { className: "dt-center", targets: '_all' } + ], + "paging": false, + "ordering": false, + "info": false, + "searching": false, + data: invsummarySet, + "columns": [ + {title : "DE" }, + {title : "IDD" }, + {title : "Loss" }, + {title : "IDR" }, + {title : "IRR" } + + + ] + + } ); +} ); + + + + </script> +</head> + +<body> + +<table><tr><td> +<h2>Venn diagram</h2> +<img src='###VENN###' height="400" width="400"><br/> +</td><td> + +</td></tr></table> + +<h2>Fisher combination summary</h2> +<table id="fishsummary" class="compact stripe row-border" cellspacing="0" width="100%"> </table> +</br> + +<h2>Invnorm combination summary</h2> +<table id="invsummary" class="compact stripe row-border" cellspacing="0" width="100%"> </table> +</br> +DE : Number of differentially expressed genes </br> +IDD (Integration Driven discoveries) : number of genes that are declared DE in the meta-analysis that were not identifed in any of the individual studies alone</br> +Loss : Number of genes that are identified DE in individual studies but not in meta-analysis </br> +IDR (Integration-driven Discovery Rate) : corresponding proportions of IDD </br> +IRR (Integration-driven Revision) : corresponding proportions of Loss</br> +<!--h2>Details</h2--> + + + +<!--table id="example" class="compact stripe row-border" cellspacing="0" width="100%"> +</table--> +</body> +</html>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/MetaRNAseq.xml Tue Mar 21 10:28:47 2017 -0400 @@ -0,0 +1,35 @@ +<tool id="metarnaseq" name="RNA-seq data meta-analysis"> + <description>perform meta-analysis thanks to metaRNAseq</description> + <requirements> + <!--container type="docker">sblanck/smagexp</container--> + <requirement type="package" version="0.1.0">r-smagexp</requirement> + </requirements> + <command interpreter="python"> + <![CDATA[ + stderr_wrapper.py Rscript + ${__tool_directory__}/MetaRNASeq.R + #for $currentInput in $inputList + "${currentInput}" + "${currentInput.name}" + #end for + $top_table + $diagnostic_html + "$diagnostic_html.files_path" + ${__tool_directory__}/MetaRNASeq_tpl.html + ]]> + </command> + + <inputs> + <param format="tabular" name="inputList" multiple="true" type="data" optional="false" label="Counts file" help="Must have the same number of row in each study. The experimental conditions must be specified in the header of each file"/> + </inputs> + + <outputs> + <data format="tabular" name="top_table" label="Summary of meta-analysis and single studie analysis from ${tool.name} on ${on_string}"/> + <data format="html" name="diagnostic_html" label="Charts for ${tool.name} on ${on_string}"/> + </outputs> + + <help> + + </help> + +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/repository_dependencies.xml Tue Mar 21 10:28:47 2017 -0400 @@ -0,0 +1,4 @@ +<?xml version="1.0"?> +<repositories description="smagexp datatypes"> + <repository changeset_revision="f66f8566fd75" name="smagexp_datatypes" owner="sblanck" toolshed="https://testtoolshed.g2.bx.psu.edu" /> +</repositories>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/stderr_wrapper.py Tue Mar 21 10:28:47 2017 -0400 @@ -0,0 +1,57 @@ +#!/usr/bin/python + +""" +Wrapper that executes a program with its arguments but reports standard error +messages only if the program exit status was not 0. This is useful to prevent +Galaxy to interpret that there was an error if something was printed on stderr, +e.g. if this was simply a warning. +Example: ./stderr_wrapper.py myprog arg1 -f arg2 +Author: Florent Angly +""" + +import sys, subprocess + +assert sys.version_info[:2] >= ( 2, 4 ) + +def stop_err( msg ): + sys.stderr.write( "%s\n" % msg ) + sys.exit() + +def __main__(): + # Get command-line arguments + args = sys.argv + # Remove name of calling program, i.e. ./stderr_wrapper.py + args.pop(0) + # If there are no arguments left, we're done + if len(args) == 0: + return + + # If one needs to silence stdout + args.append( ">" ) + args.append( "/dev/null" ) + + #cmdline = " ".join(args) + #print cmdline + try: + # Run program + proc = subprocess.Popen( args=args, shell=False, stderr=subprocess.PIPE ) + returncode = proc.wait() + # Capture stderr, allowing for case where it's very large + stderr = '' + buffsize = 1048576 + try: + while True: + stderr += proc.stderr.read( buffsize ) + if not stderr or len( stderr ) % buffsize != 0: + break + except OverflowError: + pass + # Running Grinder failed: write error message to stderr + if returncode != 0: + raise Exception, stderr + except Exception, e: + # Running Grinder failed: write error message to stderr + stop_err( 'Error: ' + str( e ) ) + + +if __name__ == "__main__": __main__()