changeset 120:5c94a732bd62 draft

planemo upload for repository https://bitbucket.org/EMCbioinf/galaxy-tool-shed-tools/raw/master/edger_with_design_matrix commit 608485ef08f6221f3247dafe3f8b0ab451871795
author yhoogstrate
date Sat, 12 Dec 2015 05:21:15 -0500
parents 0a05f6a91d71
children 6c90a67a13fa
files edgeR_Differential_Gene_Expression.xml test-data/Differential_Gene_Expression/differentially_expressed_genes.significant.tabular.txt
diffstat 2 files changed, 68 insertions(+), 19 deletions(-) [+]
line wrap: on
line diff
--- a/edgeR_Differential_Gene_Expression.xml	Thu Dec 10 10:34:10 2015 -0500
+++ b/edgeR_Differential_Gene_Expression.xml	Sat Dec 12 05:21:15 2015 -0500
@@ -81,6 +81,7 @@
             $design_matrix
             $contrast
             
+            $analysis_report_genes
             $fdr
             
             $output_count_edgeR 
@@ -174,28 +175,29 @@
 design_matrix_file                  <- args[2]
 contrast                            <- args[3]
 
-fdr                                 <- args[4]
+truncate_table_by_fdr               <- args[4]
+fdr                                 <- as.double(args[5])
 
-output_count_edgeR                  <- args[5]
-output_cpm                          <- args[6]
+output_count_edgeR                  <- args[6]
+output_cpm                          <- args[7]
 
-output_xpkm                         <- args[7]        ##FPKM file - to be implemented
+output_xpkm                         <- args[8]        ##FPKM file - to be implemented
 
-output_raw_counts                   <- args[8]
+output_raw_counts                   <- args[9]
 
-output_MDSplot_logFC                <- args[9]
-output_MDSplot_logFC_coordinates    <- args[10]
+output_MDSplot_logFC                <- args[10]
+output_MDSplot_logFC_coordinates    <- args[11]
 
-output_MDSplot_bcv                  <- args[11]
-output_MDSplot_bcv_coordinates      <- args[12]
+output_MDSplot_bcv                  <- args[12]
+output_MDSplot_bcv_coordinates      <- args[13]
 
-output_BCVplot                      <- args[13]
-output_MAplot                       <- args[14]
-output_PValue_distribution_plot     <- args[15]
-output_hierarchical_clustering_plot <- args[16]
-output_heatmap_plot                 <- args[17]
-output_RData_obj                    <- args[18]
-output_format_images                <- args[19]
+output_BCVplot                      <- args[14]
+output_MAplot                       <- args[15]
+output_PValue_distribution_plot     <- args[16]
+output_hierarchical_clustering_plot <- args[17]
+output_heatmap_plot                 <- args[18]
+output_RData_obj                    <- args[19]
+output_format_images                <- args[20]
 
 
 ## Obtain read-counts
@@ -388,7 +390,13 @@
 
   lrt <- glmLRT(fit, contrast=cont[,1])
   write(paste("Exporting DGE results to file...",output_count_edgeR,sep=""),stdout())
-  write.table(file=output_count_edgeR,topTags(lrt,n=nrow(read_counts))\$table,sep="\t",row.names=TRUE,col.names=NA)
+  
+  if(truncate_table_by_fdr =="all") {
+    write.table(file=output_count_edgeR,topTags(lrt,n=nrow(read_counts))\$table,sep="\t",row.names=TRUE,col.names=NA)
+  }
+  else {
+    write.table(file=output_count_edgeR,subset(topTags(lrt,n=nrow(read_counts))\$table, FDR < fdr),sep="\t",row.names=TRUE,col.names=NA)
+  }
   write.table(file=output_cpm,cpm(dge,normalized.lib.sizes=TRUE),sep="\t",row.names=TRUE,col.names=NA)
 
   ## todo EXPORT FPKM
@@ -530,7 +538,12 @@
             </when>
         </conditional>
         
-        <param name="fdr" type="float" min="0" max="1" value="0.05" label="False Discovery Rate (FDR)" />
+        <param name="analysis_report_genes" type="select" label="Report differentially expressed genes">
+            <option value="all" selected="true">All genes</option>
+            <option value="significant">Only significant (defined by FDR cutoff)</option>
+        </param>
+        
+        <param name="fdr" type="float" min="0" max="1" value="0.01" label="False Discovery Rate (FDR) cutoff" help="Used to highlight significant genes in figures" />
         
         <param name="outputs" type="select" label="Optional desired outputs" multiple="true" display="checkboxes">
             <option value="make_output_raw_counts">Raw counts table</option>
@@ -661,8 +674,36 @@
             
             <param name="contrast" value="E-C"/>
         
+            <param name="analysis_report_genes" value="all"/>
+            <param name="fdr" value="0.01" />
+            
+            <output name="output_count_edgeR" file="Differential_Gene_Expression/differentially_expressed_genes.tabular.txt" />
+        </test>
+        <test>
+            <param name="analysis_select" value="multi_factor" />
+            
+            <param name="expression_matrix" value="Differential_Gene_Expression/expression_matrix.tabular.txt" />
+            <param name="design_matrix" value="Differential_Gene_Expression/design_matrix.tabular.txt" />
+            
+            <param name="contrast" value="E-C"/>
+        
+            <param name="analysis_report_genes" value="significant"/>
             <param name="fdr" value="0.05" />
             
+            <output name="output_count_edgeR" file="Differential_Gene_Expression/differentially_expressed_genes.significant.tabular.txt" />
+        </test>
+        <test>
+            <param name="analysis_select" value="2_factor" />
+            
+            <param name="factorLevel_control" value="C" />
+            <param name="countsFile_control" value="Differential_Gene_Expression/C1,Differential_Gene_Expression/C2,Differential_Gene_Expression/C3,Differential_Gene_Expression/C4" ftype="tabular" />
+            
+            <param name="factorLevel_condition" value="E" />
+            <param name="countsFile_condition" value="Differential_Gene_Expression/E1,Differential_Gene_Expression/E2,Differential_Gene_Expression/E3,Differential_Gene_Expression/E4" ftype="tabular" />
+        
+            <param name="analysis_report_genes" value="all"/>
+            <param name="fdr" value="0.01" />
+            
             <output name="output_count_edgeR" file="Differential_Gene_Expression/differentially_expressed_genes.tabular.txt" />
         </test>
         <test>
@@ -674,9 +715,10 @@
             <param name="factorLevel_condition" value="E" />
             <param name="countsFile_condition" value="Differential_Gene_Expression/E1,Differential_Gene_Expression/E2,Differential_Gene_Expression/E3,Differential_Gene_Expression/E4" ftype="tabular" />
         
+            <param name="analysis_report_genes" value="significant"/>
             <param name="fdr" value="0.05" />
             
-            <output name="output_count_edgeR" file="Differential_Gene_Expression/differentially_expressed_genes.tabular.txt" />
+            <output name="output_count_edgeR" file="Differential_Gene_Expression/differentially_expressed_genes.significant.tabular.txt" />
         </test>
     </tests>
     
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/Differential_Gene_Expression/differentially_expressed_genes.significant.tabular.txt	Sat Dec 12 05:21:15 2015 -0500
@@ -0,0 +1,7 @@
+""	"genes"	"logFC"	"logCPM"	"LR"	"PValue"	"FDR"
+"15"	"RET"	1.94897640107286	13.2940435307943	77.6545995415986	1.22730171935022e-18	3.55917498611563e-17
+"24"	"HSPA8"	0.607138087178614	18.9380827005326	16.8408380186893	4.06490891119454e-05	0.000589411792123208
+"12"	"IFI44L"	-0.665544707287881	11.7020333673755	13.7144720195324	0.000212808308075529	0.00205714697806344
+"10"	"MYO18A"	-0.608389235629078	14.1586814058554	10.0030349277278	0.00156282461006963	0.0113304784230048
+"4"	"DDX11"	0.719283453206409	12.4597575302041	9.1203698809081	0.00252778847312638	0.014661173144133
+"26"	"NR2C2AP"	0.538719097450497	12.9331552590697	7.88314604309164	0.00498976028708414	0.0241171747209067