Mercurial > repos > yhoogstrate > edger_with_design_matrix
changeset 119:0a05f6a91d71 draft
planemo upload for repository https://bitbucket.org/EMCbioinf/galaxy-tool-shed-tools/raw/master/edger_with_design_matrix commit b3dcbc6b4e0510909aef9969da9941bed18599e6
author | yhoogstrate |
---|---|
date | Thu, 10 Dec 2015 10:34:10 -0500 |
parents | 7e98e8bcfbf7 |
children | 5c94a732bd62 |
files | README.rst edgeR_Differential_Gene_Expression.xml test-data/Differential_Gene_Expression/C1 test-data/Differential_Gene_Expression/C2 test-data/Differential_Gene_Expression/C3 test-data/Differential_Gene_Expression/C4 test-data/Differential_Gene_Expression/E1 test-data/Differential_Gene_Expression/E2 test-data/Differential_Gene_Expression/E3 test-data/Differential_Gene_Expression/E4 |
diffstat | 10 files changed, 354 insertions(+), 54 deletions(-) [+] |
line wrap: on
line diff
--- a/README.rst Wed Dec 09 10:42:33 2015 -0500 +++ b/README.rst Thu Dec 10 10:34:10 2015 -0500 @@ -1,6 +1,13 @@ EdgeR wrapper for Galaxy ======================== +This is a wrapper for the RNA-Seq differentially gene expression analysis tool EdgeR. +This wrapper contains 2 flavours of tests, a classical 2 group analysis and a more +sophistiacted multi-factor analysis. + +Input data can be generated using so called count tools. The wrapper has been written +to be compatible with at least featureCounts (by yhoogstrate) and HTSeq-count (by iuc). + http://www.bioconductor.org/packages/release/bioc/html/edgeR.html Implementation of EdgeR supporting quite advanced experimental @@ -42,17 +49,4 @@ **This wrapper**: - Copyright (C) 2013-2015 Youri Hoogstrate - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see <http://www.gnu.org/licenses/>. +GPL (>=2)
--- a/edgeR_Differential_Gene_Expression.xml Wed Dec 09 10:42:33 2015 -0500 +++ b/edgeR_Differential_Gene_Expression.xml Thu Dec 10 10:34:10 2015 -0500 @@ -36,7 +36,46 @@ <version_command>echo $(R --version | grep version | grep -v GNU)", EdgeR version" $(R --vanilla --slave -e "library(edgeR) ; cat(sessionInfo()\$otherPkgs\$edgeR\$Version)" 2> /dev/null | grep -v -i "WARNING: ")</version_command> - <command> + <command><![CDATA[ + #if $analysis_type.analysis_select == "multi_factor" + #set $expression_matrix = $analysis_type.expression_matrix + #set $design_matrix = $analysis_type.design_matrix + #set $contrast = $analysis_type.contrast + #else + ## Design and Expression matrices do not exist - create them + #set $expression_matrix = "expression_matrix.txt" + #set $design_matrix = "design_matrix.txt" + #set $contrast = str($analysis_type.factorLevel_condition)+"-"+str($analysis_type.factorLevel_control) + + ## -- Create expression matrix + cut -f 1 "$analysis_type.countsFile_control[1]" > gene_ids.column.txt && + #for $file in $analysis_type.countsFile_control: + cut -f 2 "${file}" > "${file}.expression_column.txt" && + #end for + #for $file in $analysis_type.countsFile_condition: + cut -f 2 "${file}" > "${file}.expression_column.txt" && + #end for + + paste + gene_ids.column.txt + #for $file in $analysis_type.countsFile_control: + "${file}.expression_column.txt" + #end for + #for $file in $analysis_type.countsFile_condition: + "${file}.expression_column.txt" + #end for + > "${expression_matrix}" && + + ## -- Create design matrix matrix + echo "sample-name Condition" >> ${design_matrix} && + #for $file in $analysis_type.countsFile_control: + echo "${file.name} ${analysis_type.factorLevel_control}" >> ${design_matrix} && + #end for + #for $file in $analysis_type.countsFile_condition: + echo "${file.name} ${analysis_type.factorLevel_condition}" >> ${design_matrix} && + #end for + #end if + R --vanilla --slave -f $R_script '--args $expression_matrix $design_matrix @@ -47,7 +86,7 @@ $output_count_edgeR $output_cpm - /dev/null <!-- Calculation of FPKM/RPKM should come here --> + /dev/null ### Calculation of FPKM/RPKM should come here #if $output_raw_counts: $output_raw_counts @@ -117,6 +156,7 @@ $output_format_images ' + ]]> </command> <configfiles> @@ -458,10 +498,37 @@ </configfiles> <inputs> - <param name="expression_matrix" type="data" format="tabular" label="Expression (read count) matrix" /> - <param name="design_matrix" type="data" format="tabular" label="Design matrix" help="Ensure your samplenames are identical to those in the expression matrix. Preferentially, create the contrast matrix using 'edgeR: Design- from Expression matrix'." /> - - <param name="contrast" type="text" label="Contrast (biological question)" help="e.g. 'tumor-normal' or '(G1+G2)/2-G3' using the factors chosen in the design matrix. Read the 'makeContrasts' manual from Limma package for more info: http://www.bioconductor.org/packages/release/bioc/html/limma.html and http://www.bioconductor.org/packages/release/bioc/vignettes/limma/inst/doc/usersguide.pdf." /> + <conditional name="analysis_type"> + <param name="analysis_select" type="select" label="Analysis type"> + <option value="2_factor" selected="true">2-Group test</option> + <option value="multi_factor">Multigroup test and/or complex designs with e.g. blocking</option> + </param> + <when value="2_factor"> + <param name="factorLevel_control" type="text" value="Control" + label="Specify a factor level" help="Only letters, numbers and underscores will be retained in this field"> + <sanitizer> + <valid initial="string.letters,string.digits"><add value="_" /></valid> + </sanitizer> + </param> + <param name="countsFile_control" type="data" format="tabular,csv" multiple="true" label="Counts file(s)"/> + + <param name="factorLevel_condition" type="text" value="Condition" + label="Specify a factor level" help="Only letters, numbers and underscores will be retained in this field"> + <sanitizer> + <valid initial="string.letters,string.digits"><add value="_" /></valid> + </sanitizer> + </param> + <param name="countsFile_condition" type="data" format="tabular,csv" multiple="true" label="Counts file(s)"/> + </when> + <when value="multi_factor"> + <param name="expression_matrix" type="data" format="tabular,csv" label="Expression (read count) matrix" /> + <param name="design_matrix" type="data" format="tabular,csv" label="Design matrix" + help="Ensure your samplenames are identical to those in the expression matrix. Preferentially, create the contrast matrix using 'edgeR: Design- from Expression matrix'." /> + + <param name="contrast" type="text" label="Contrast (biological question)" + help="e.g. 'tumor-normal' or '(G1+G2)/2-G3' using the factors chosen in the design matrix. Read the 'makeContrasts' manual from Limma package for more info: http://www.bioconductor.org/packages/release/bioc/html/limma.html and http://www.bioconductor.org/packages/release/bioc/vignettes/limma/inst/doc/usersguide.pdf." /> + </when> + </conditional> <param name="fdr" type="float" min="0" max="1" value="0.05" label="False Discovery Rate (FDR)" /> @@ -482,19 +549,23 @@ <param name="output_format_images" type="select" label="Output format of images" display="radio"> <option value="png">Portable network graphics (.png)</option> <option value="pdf">Portable document format (.pdf)</option> - <option value="svg">Scalable vector graphics (.svg)</option> + <option value="svg" selected="true">Scalable vector graphics (.svg)</option> </param> </inputs> <outputs> - <data format="tabular" name="output_count_edgeR" label="edgeR DGE on ${design_matrix.hid}: ${design_matrix.name} - differentially expressed genes" /> - <data format="tabular" name="output_cpm" label="edgeR DGE on ${design_matrix.hid}: ${design_matrix.name} - CPM" /> + <data format="tabular" name="output_count_edgeR" label="edgeR DGE on ${on_string}: differentially expressed genes" > + <actions> + <action name="column_names" type="metadata" default="original_gene_position,genes,logFC,logCPM,LR,PValue,FDR" /> + </actions> + </data> + <data format="tabular" name="output_cpm" label="edgeR DGE on ${on_string}: CPM" /> - <data format="tabular" name="output_raw_counts" label="edgeR DGE on ${design_matrix.hid}: ${design_matrix.name} - raw counts"> + <data format="tabular" name="output_raw_counts" label="edgeR DGE on ${on_string}: raw counts"> <filter>outputs and ("make_output_raw_counts" in outputs)</filter> </data> - <data format="png" name="output_MDSplot_logFC" label="edgeR DGE on ${design_matrix.hid}: ${design_matrix.name} - MDS-plot (logFC method)"> + <data format="png" name="output_MDSplot_logFC" label="edgeR DGE on ${on_string}: MDS-plot (logFC method)"> <filter>outputs and ("make_output_MDSplot_logFC" in outputs)</filter> <change_format> @@ -504,11 +575,11 @@ </change_format> </data> - <data format="tabular" name="output_MDSplot_logFC_coordinates" label="edgeR DGE on ${design_matrix.hid}: ${design_matrix.name} - MDS-plot coordinates table (logFC method)"> + <data format="tabular" name="output_MDSplot_logFC_coordinates" label="edgeR DGE on ${on_string}: MDS-plot coordinates table (logFC method)"> <filter>outputs and ("make_output_MDSplot_logFC_coordinates" in outputs)</filter> </data> - <data format="png" name="output_MDSplot_bcv" label="edgeR DGE on ${design_matrix.hid}: ${design_matrix.name} - MDS-plot (bcv method)"> + <data format="png" name="output_MDSplot_bcv" label="edgeR DGE on ${on_string}: MDS-plot (bcv method)"> <filter>outputs and ("make_output_MDSplot_bcv" in outputs)</filter> <change_format> @@ -518,11 +589,11 @@ </change_format> </data> - <data format="tabular" name="output_MDSplot_bcv_coordinates" label="edgeR DGE on ${design_matrix.hid}: ${design_matrix.name} - MDS-plot coordinates table (BCV method)"> + <data format="tabular" name="output_MDSplot_bcv_coordinates" label="edgeR DGE on ${on_string}: MDS-plot coordinates table (BCV method)"> <filter>outputs and ("make_output_MDSplot_bcv_coordinates" in outputs)</filter> </data> - <data format="png" name="output_BCVplot" label="edgeR DGE on ${design_matrix.hid}: ${design_matrix.name} - BCV-plot"> + <data format="png" name="output_BCVplot" label="edgeR DGE on ${on_string}: BCV-plot"> <filter>outputs and ("make_output_BCVplot" in outputs)</filter> <change_format> @@ -532,7 +603,7 @@ </change_format> </data> - <data format="png" name="output_MAplot" label="edgeR DGE on ${design_matrix.hid}: ${design_matrix.name} - MA-plot"> + <data format="png" name="output_MAplot" label="edgeR DGE on ${on_string}: MA-plot"> <filter>outputs and ("make_output_MAplot" in outputs)</filter> <change_format> @@ -542,7 +613,7 @@ </change_format> </data> - <data format="png" name="output_PValue_distribution_plot" label="edgeR DGE on ${design_matrix.hid}: ${design_matrix.name} - P-Value distribution"> + <data format="png" name="output_PValue_distribution_plot" label="edgeR DGE on ${on_string}: P-Value distribution"> <filter>outputs and ("make_output_PValue_distribution_plot" in outputs)</filter> <change_format> @@ -552,7 +623,7 @@ </change_format> </data> - <data format="png" name="output_hierarchical_clustering_plot" label="edgeR DGE on ${design_matrix.hid}: ${design_matrix.name} - Hierarchical custering"> + <data format="png" name="output_hierarchical_clustering_plot" label="edgeR DGE on ${on_string}: Hierarchical custering"> <filter>outputs and ("make_output_hierarchical_clustering_plot" in outputs)</filter> <change_format> @@ -562,7 +633,7 @@ </change_format> </data> - <data format="png" name="output_heatmap_plot" label="edgeR DGE on ${design_matrix.hid}: ${design_matrix.name} - Heatmap"> + <data format="png" name="output_heatmap_plot" label="edgeR DGE on ${on_string}: Heatmap"> <filter>outputs and ("make_output_heatmap_plot" in outputs)</filter> <change_format> @@ -572,17 +643,19 @@ </change_format> </data> - <data format="RData" name="output_RData_obj" label="edgeR DGE on ${design_matrix.hid}: ${design_matrix.name} - R data object"> + <data format="RData" name="output_RData_obj" label="edgeR DGE on ${on_string}: R data object"> <filter>outputs and ("make_output_RData_obj" in outputs)</filter> </data> - <data format="txt" name="output_R" label="edgeR DGE on ${design_matrix.hid}: ${design_matrix.name} - R output (debug)" > + <data format="txt" name="output_R" label="edgeR DGE on ${on_string}: R output (debug)" > <filter>outputs and ("make_output_R_stdout" in outputs)</filter> </data> </outputs> <tests> <test> + <param name="analysis_select" value="multi_factor" /> + <param name="expression_matrix" value="Differential_Gene_Expression/expression_matrix.tabular.txt" /> <param name="design_matrix" value="Differential_Gene_Expression/design_matrix.tabular.txt" /> @@ -590,7 +663,18 @@ <param name="fdr" value="0.05" /> - <param name="output_format_images" value="png" /> + <output name="output_count_edgeR" file="Differential_Gene_Expression/differentially_expressed_genes.tabular.txt" /> + </test> + <test> + <param name="analysis_select" value="2_factor" /> + + <param name="factorLevel_control" value="C" /> + <param name="countsFile_control" value="Differential_Gene_Expression/C1,Differential_Gene_Expression/C2,Differential_Gene_Expression/C3,Differential_Gene_Expression/C4" ftype="tabular" /> + + <param name="factorLevel_condition" value="E" /> + <param name="countsFile_condition" value="Differential_Gene_Expression/E1,Differential_Gene_Expression/E2,Differential_Gene_Expression/E3,Differential_Gene_Expression/E4" ftype="tabular" /> + + <param name="fdr" value="0.05" /> <output name="output_count_edgeR" file="Differential_Gene_Expression/differentially_expressed_genes.tabular.txt" /> </test> @@ -661,24 +745,6 @@ - African-European - 0.5*(Control+Placebo) / Treated -Installation ------------- - -This tool requires no specific configuration. The following dependencies will installed automatically: - -- R -- limma -- edgeR - -License -------- -- R - - GPL 2 & GPL 3 -- limma - - GPL (>=2) -- edgeR - - GPL (>=2) - @CONTACT@ </help>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/Differential_Gene_Expression/C1 Thu Dec 10 10:34:10 2015 -0500 @@ -0,0 +1,30 @@ +Geneid C1 +COMMD10 966 +USP26 1 +DDX17 8544 +DDX11 329 +PTPN20B 0 +SLC35D3 1 +GLOD4 1614 +GIMAP7 0 +TXLNB 15 +MYO18A 1775 +ATG4B 936 +IFI44L 347 +KHSRP 2557 +KCNAB3 20 +RET 331 +IQCG 125 +C20orf118 9 +GPIHBP1 0 +RASSF3 658 +FUT8 4834 +LYSMD3 1333 +LMOD3 12 +HIPK1 24218 +HSPA8 44244 +TAS2R39 0 +NR2C2AP 606 +INADL 4315 +TMEM31 5 +GC 0
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/Differential_Gene_Expression/C2 Thu Dec 10 10:34:10 2015 -0500 @@ -0,0 +1,30 @@ +Geneid C2 +COMMD10 1067 +USP26 0 +DDX17 13335 +DDX11 970 +PTPN20B 0 +SLC35D3 0 +GLOD4 2596 +GIMAP7 1 +TXLNB 29 +MYO18A 4666 +ATG4B 2602 +IFI44L 678 +KHSRP 5001 +KCNAB3 42 +RET 695 +IQCG 193 +C20orf118 20 +GPIHBP1 0 +RASSF3 1060 +FUT8 6459 +LYSMD3 1679 +LMOD3 31 +HIPK1 35223 +HSPA8 58864 +TAS2R39 0 +NR2C2AP 1162 +INADL 6418 +TMEM31 10 +GC 0
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/Differential_Gene_Expression/C3 Thu Dec 10 10:34:10 2015 -0500 @@ -0,0 +1,30 @@ +Geneid C3 +COMMD10 438 +USP26 1 +DDX17 4579 +DDX11 221 +PTPN20B 0 +SLC35D3 0 +GLOD4 965 +GIMAP7 0 +TXLNB 9 +MYO18A 1193 +ATG4B 638 +IFI44L 307 +KHSRP 1593 +KCNAB3 10 +RET 361 +IQCG 84 +C20orf118 3 +GPIHBP1 0 +RASSF3 405 +FUT8 2599 +LYSMD3 666 +LMOD3 7 +HIPK1 14147 +HSPA8 26628 +TAS2R39 0 +NR2C2AP 403 +INADL 2421 +TMEM31 3 +GC 0
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/Differential_Gene_Expression/C4 Thu Dec 10 10:34:10 2015 -0500 @@ -0,0 +1,30 @@ +Geneid C4 +COMMD10 1231 +USP26 0 +DDX17 16358 +DDX11 867 +PTPN20B 0 +SLC35D3 2 +GLOD4 2912 +GIMAP7 0 +TXLNB 25 +MYO18A 4741 +ATG4B 2394 +IFI44L 784 +KHSRP 5513 +KCNAB3 34 +RET 669 +IQCG 229 +C20orf118 14 +GPIHBP1 0 +RASSF3 1277 +FUT8 7977 +LYSMD3 2029 +LMOD3 48 +HIPK1 47991 +HSPA8 76924 +TAS2R39 0 +NR2C2AP 1223 +INADL 8507 +TMEM31 14 +GC 0
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/Differential_Gene_Expression/E1 Thu Dec 10 10:34:10 2015 -0500 @@ -0,0 +1,30 @@ +Geneid E1 +COMMD10 964 +USP26 0 +DDX17 6995 +DDX11 916 +PTPN20B 0 +SLC35D3 1 +GLOD4 1807 +GIMAP7 1 +TXLNB 14 +MYO18A 1669 +ATG4B 1605 +IFI44L 268 +KHSRP 3162 +KCNAB3 28 +RET 2077 +IQCG 118 +C20orf118 6 +GPIHBP1 0 +RASSF3 507 +FUT8 4291 +LYSMD3 868 +LMOD3 19 +HIPK1 19201 +HSPA8 72195 +TAS2R39 0 +NR2C2AP 1293 +INADL 3443 +TMEM31 6 +GC 0
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/Differential_Gene_Expression/E2 Thu Dec 10 10:34:10 2015 -0500 @@ -0,0 +1,30 @@ +Geneid E2 +COMMD10 812 +USP26 0 +DDX17 8079 +DDX11 632 +PTPN20B 0 +SLC35D3 0 +GLOD4 1448 +GIMAP7 0 +TXLNB 15 +MYO18A 1457 +ATG4B 953 +IFI44L 302 +KHSRP 2624 +KCNAB3 34 +RET 1431 +IQCG 116 +C20orf118 13 +GPIHBP1 0 +RASSF3 575 +FUT8 4187 +LYSMD3 1141 +LMOD3 26 +HIPK1 28435 +HSPA8 61132 +TAS2R39 0 +NR2C2AP 761 +INADL 4415 +TMEM31 5 +GC 0
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/Differential_Gene_Expression/E3 Thu Dec 10 10:34:10 2015 -0500 @@ -0,0 +1,30 @@ +Geneid E3 +COMMD10 528 +USP26 0 +DDX17 5994 +DDX11 706 +PTPN20B 0 +SLC35D3 2 +GLOD4 1039 +GIMAP7 0 +TXLNB 6 +MYO18A 1497 +ATG4B 1185 +IFI44L 191 +KHSRP 2434 +KCNAB3 22 +RET 1490 +IQCG 79 +C20orf118 10 +GPIHBP1 0 +RASSF3 401 +FUT8 2974 +LYSMD3 749 +LMOD3 9 +HIPK1 20715 +HSPA8 42728 +TAS2R39 0 +NR2C2AP 726 +INADL 3094 +TMEM31 6 +GC 0
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/Differential_Gene_Expression/E4 Thu Dec 10 10:34:10 2015 -0500 @@ -0,0 +1,30 @@ +Geneid E4 +COMMD10 860 +USP26 0 +DDX17 6596 +DDX11 518 +PTPN20B 0 +SLC35D3 1 +GLOD4 1564 +GIMAP7 0 +TXLNB 17 +MYO18A 1121 +ATG4B 911 +IFI44L 269 +KHSRP 2509 +KCNAB3 10 +RET 1327 +IQCG 107 +C20orf118 9 +GPIHBP1 0 +RASSF3 568 +FUT8 4154 +LYSMD3 1076 +LMOD3 20 +HIPK1 22614 +HSPA8 67106 +TAS2R39 0 +NR2C2AP 902 +INADL 3441 +TMEM31 3 +GC 0