Mercurial > repos > yhoogstrate > edger_with_design_matrix
changeset 108:a02794bb9073 draft
planemo upload for repository https://bitbucket.org/EMCbioinf/galaxy-tool-shed-tools/raw/master/edger_with_design_matrix commit 2700e500a4fb135a20ede7d52221a9d31f1aaa5e-dirty
author | yhoogstrate |
---|---|
date | Tue, 01 Sep 2015 04:45:24 -0400 |
parents | 049d8bc2214e |
children | dfebd3787276 |
files | edgeR_Concatenate_Expression_Matrices.xml edgeR_Convert_DGE_Table_to_Bedgraph.xml edgeR_Design_from_Expression_Matrix.xml edgeR_Differential_Gene_Expression.xml edgeR_macros.xml |
diffstat | 5 files changed, 430 insertions(+), 450 deletions(-) [+] |
line wrap: on
line diff
--- a/edgeR_Concatenate_Expression_Matrices.xml Tue Sep 01 04:32:16 2015 -0400 +++ b/edgeR_Concatenate_Expression_Matrices.xml Tue Sep 01 04:45:24 2015 -0400 @@ -1,168 +1,172 @@ <?xml version="1.0" encoding="UTF-8"?> <tool id="concatenate_expression_matrices" name="edgeR: Concatenate Expression Matrices" version="1.0.0"> - <description>Create a full expression matrix by selecting the desired columns from specific count tables</description> - - <command> - #set $j = 0 - #set $paste = [] - - #if $add_geneids.choice == "true": - #set $filename = str($j)+".txt" - #set paste = paste + [$filename] - - cut -f $add_geneids.column_geneids.value "$add_geneids.sample_geneids" > $filename ; - #set $j += 1 - #end if - - #for $sample in $samples: - ##echo "$sample.column_index" - ##echo "$sample.column_index.value" - - #set $column_str = ",".join([str(x).strip() for x in $sample.column_index.value]) - #set $filename = str($j)+".txt" - #set paste = paste + [$filename] - - cut -f "$column_str" "$sample.sample" > $filename ; - #set $j += 1 - #end for - - #if $add_lengths.choice == "true": - #set $filename = str($j)+".txt" - #set paste = paste + [$filename] - - cut -f $add_lengths.column_lengths.value "$add_lengths.sample_lengths" > $filename ; - #set $j += 1 - #end if - - #set $paste_str = " ".join([str(x).strip() for x in $paste]) - - paste $paste_str > $expression_matrix - </command> - - <inputs> - <conditional name="add_geneids"> - <param name="choice" type="select" label="Add a gene-IDs column at the end of the file" help="Highly recommended to select!" > - <option value="false">No</option> - <option value="true" selected="true">Yes</option> - </param> - <when value="false" /> - <when value="true"> - <param name="sample_geneids" type="data" format="tabular" label="Select Read-count dataset that contains a column for GeneIDs" help="from featureCounts/DEXSeq-count/HTSeq-count, etc." /> - <param name="column_geneids" numerical="false" use_header_names="true" label="Select GeneID column" type="data_column" data_ref="sample_geneids" multiple="false"> - <validator type="no_options" message="Please select at least one column." /> - </param> - </when> - </conditional> - - <repeat name="samples" title="Expression Table" help="E.g. a earlier concatenated table, or a HT-seq, featureCounts or DESeq-count result."> - <param name="sample" type="data" format="tabular" label="Read-count dataset that belongs to a pair" help="from featureCounts/DEXSeq-count/HTSeq-count, etc." /> - <!-- select columns --> - <param name="column_index" numerical="false" use_header_names="true" label="Select columns that are associated with this factor level" type="data_column" data_ref="sample" multiple="true" size="120" display="checkboxes"> - <validator type="no_options" message="Please select at least one column." /> - </param> - </repeat> - - <conditional name="add_lengths"> - <param name="choice" type="select" label="Add a gene-lengths column at the end of the file" help="Optional, only usefull if RPKM/FPKM calculation is desired." > - <option value="false">No</option> - <option value="true">Yes</option> - </param> - <when value="false" /> - <when value="true"> - <param name="sample_lengths" type="data" format="tabular" label="Read-count dataset that belongs to a pair" help="from featureCounts/DEXSeq-count/HTSeq-count, etc." /> - <param name="column_lengths" numerical="false" use_header_names="true" label="Select columns that are associated with this factor level" type="data_column" data_ref="sample_lengths" multiple="false" size="120"> - <validator type="no_options" message="Please select at least one column." /> - </param> - </when> - </conditional> - - <param name="remove_comment_lines" type="boolean" label="Automatically remove 'comment' lines starting with a '#'" truevale="true" falsevalue="false" selected="true" help="Some tools (incl. featureCounts) include comment lines that are not neccesairy for downstream analyis. By enabling this function, these lines will be removed." /> - - </inputs> - - <outputs> - <data format="tabular" name="expression_matrix" label="Expression matrix" /> - </outputs> - - <tests> - <test> - <conditional name="add_geneids"> - <param name="choice" value="true" /> - <param name="sample_geneids" value="GSE51403/GSE51403_expression_matrix_GeneLengths.txt" /> - <param name="column_geneids" value="1" /> - </conditional> - - <param name="samples_0|sample" value="GSE51403/GSE51403_expression_matrix_Control_1.txt" /> - <param name="samples_0|column_index" value="2" /> - - <conditional name="add_lengths"> - <param name="choice" value="false" /> - </conditional> - - <param name="remove_comment_lines" value="false" /> - - <output name="expression_matrix" file="GSE51403/GSE51403_expression_matrix_Control_1.txt" /> - </test> - <test> - <conditional name="add_geneids"> - <param name="choice" value="true" /> - <param name="sample_geneids" value="GSE51403/GSE51403_expression_matrix_GeneLengths.txt" /> - <param name="column_geneids" value="1" /> - </conditional> - - <!-- <repeat name="samples"> --> - <param name="samples_0|sample" value="GSE51403/GSE51403_expression_matrix_Control_1.txt" /> - <param name="samples_0|column_index" value="2" /> - <!-- </repeat> --> - - <!-- <repeat name="samples"> --> - <param name="samples_1|sample" value="GSE51403/GSE51403_expression_matrix_Control_2.txt" /> - <param name="samples_1|column_index" value="2" /> - <!-- </repeat> --> - - <!-- <repeat name="samples"> --> - <param name="samples_2|sample" value="GSE51403/GSE51403_expression_matrix_Control_3.txt" /> - <param name="samples_2|column_index" value="2" /> - <!-- </repeat> --> - - <!-- <repeat name="samples"> --> - <param name="samples_3|sample" value="GSE51403/GSE51403_expression_matrix_Control_4.txt" /> - <param name="samples_3|column_index" value="2" /> - <!-- </repeat> --> - - <!-- <repeat name="samples"> --> - <param name="samples_4|sample" value="GSE51403/GSE51403_expression_matrix_Control_5.txt" /> - <param name="samples_4|column_index" value="2" /> - <!-- </repeat> --> - - <!-- <repeat name="samples"> --> - <param name="samples_5|sample" value="GSE51403/GSE51403_expression_matrix_Control_6.txt" /> - <param name="samples_5|column_index" value="2" /> - <!-- </repeat> --> - - <!-- <repeat name="samples"> --> - <param name="samples_6|sample" value="GSE51403/GSE51403_expression_matrix_Control_7.txt" /> - <param name="samples_6|column_index" value="2" /> - <!-- </repeat> --> - - <!-- <repeat name="samples"> --> - <param name="samples_7|sample" value="GSE51403/GSE51403_expression_matrix_E2.txt" /> - <param name="samples_7|column_index" value="2,3,4,5,6,7,8" /> - <!-- </repeat> --> - - <conditional name="add_lengths"> - <param name="choice" value="true" /> - <param name="sample_lengths" value="GSE51403/GSE51403_expression_matrix_GeneLengths.txt" /> - <param name="column_lengths" value="2" /> - </conditional> - - <param name="remove_comment_lines" value="false" /> - - <output name="expression_matrix" file="GSE51403/GSE51403_expression_matrix_full.txt" /> - </test> - </tests> - - <help> + <description>Create a full expression matrix by selecting the desired columns from specific count tables</description> + + <macros> + <import>edgeR_macros.xml</import> + </macros> + + <command> + #set $j = 0 + #set $paste = [] + + #if $add_geneids.choice == "true": + #set $filename = str($j)+".txt" + #set paste = paste + [$filename] + + cut -f $add_geneids.column_geneids.value "$add_geneids.sample_geneids" > $filename ; + #set $j += 1 + #end if + + #for $sample in $samples: + ##echo "$sample.column_index" + ##echo "$sample.column_index.value" + + #set $column_str = ",".join([str(x).strip() for x in $sample.column_index.value]) + #set $filename = str($j)+".txt" + #set paste = paste + [$filename] + + cut -f "$column_str" "$sample.sample" > $filename ; + #set $j += 1 + #end for + + #if $add_lengths.choice == "true": + #set $filename = str($j)+".txt" + #set paste = paste + [$filename] + + cut -f $add_lengths.column_lengths.value "$add_lengths.sample_lengths" > $filename ; + #set $j += 1 + #end if + + #set $paste_str = " ".join([str(x).strip() for x in $paste]) + + paste $paste_str > $expression_matrix + </command> + + <inputs> + <conditional name="add_geneids"> + <param name="choice" type="select" label="Add a gene-IDs column at the end of the file" help="Highly recommended to select!" > + <option value="false">No</option> + <option value="true" selected="true">Yes</option> + </param> + <when value="false" /> + <when value="true"> + <param name="sample_geneids" type="data" format="tabular" label="Select Read-count dataset that contains a column for GeneIDs" help="from featureCounts/DEXSeq-count/HTSeq-count, etc." /> + <param name="column_geneids" numerical="false" use_header_names="true" label="Select GeneID column" type="data_column" data_ref="sample_geneids" multiple="false"> + <validator type="no_options" message="Please select at least one column." /> + </param> + </when> + </conditional> + + <repeat name="samples" title="Expression Table" help="E.g. a earlier concatenated table, or a HT-seq, featureCounts or DESeq-count result."> + <param name="sample" type="data" format="tabular" label="Read-count dataset that belongs to a pair" help="from featureCounts/DEXSeq-count/HTSeq-count, etc." /> + <!-- select columns --> + <param name="column_index" numerical="false" use_header_names="true" label="Select columns that are associated with this factor level" type="data_column" data_ref="sample" multiple="true" size="120" display="checkboxes"> + <validator type="no_options" message="Please select at least one column." /> + </param> + </repeat> + + <conditional name="add_lengths"> + <param name="choice" type="select" label="Add a gene-lengths column at the end of the file" help="Optional, only usefull if RPKM/FPKM calculation is desired." > + <option value="false">No</option> + <option value="true">Yes</option> + </param> + <when value="false" /> + <when value="true"> + <param name="sample_lengths" type="data" format="tabular" label="Read-count dataset that belongs to a pair" help="from featureCounts/DEXSeq-count/HTSeq-count, etc." /> + <param name="column_lengths" numerical="false" use_header_names="true" label="Select columns that are associated with this factor level" type="data_column" data_ref="sample_lengths" multiple="false" size="120"> + <validator type="no_options" message="Please select at least one column." /> + </param> + </when> + </conditional> + + <param name="remove_comment_lines" type="boolean" label="Automatically remove 'comment' lines starting with a '#'" truevale="true" falsevalue="false" selected="true" help="Some tools (incl. featureCounts) include comment lines that are not neccesairy for downstream analyis. By enabling this function, these lines will be removed." /> + + </inputs> + + <outputs> + <data format="tabular" name="expression_matrix" label="Expression matrix" /> + </outputs> + + <tests> + <test> + <conditional name="add_geneids"> + <param name="choice" value="true" /> + <param name="sample_geneids" value="GSE51403/GSE51403_expression_matrix_GeneLengths.txt" /> + <param name="column_geneids" value="1" /> + </conditional> + + <param name="samples_0|sample" value="GSE51403/GSE51403_expression_matrix_Control_1.txt" /> + <param name="samples_0|column_index" value="2" /> + + <conditional name="add_lengths"> + <param name="choice" value="false" /> + </conditional> + + <param name="remove_comment_lines" value="false" /> + + <output name="expression_matrix" file="GSE51403/GSE51403_expression_matrix_Control_1.txt" /> + </test> + <test> + <conditional name="add_geneids"> + <param name="choice" value="true" /> + <param name="sample_geneids" value="GSE51403/GSE51403_expression_matrix_GeneLengths.txt" /> + <param name="column_geneids" value="1" /> + </conditional> + + <!-- <repeat name="samples"> --> + <param name="samples_0|sample" value="GSE51403/GSE51403_expression_matrix_Control_1.txt" /> + <param name="samples_0|column_index" value="2" /> + <!-- </repeat> --> + + <!-- <repeat name="samples"> --> + <param name="samples_1|sample" value="GSE51403/GSE51403_expression_matrix_Control_2.txt" /> + <param name="samples_1|column_index" value="2" /> + <!-- </repeat> --> + + <!-- <repeat name="samples"> --> + <param name="samples_2|sample" value="GSE51403/GSE51403_expression_matrix_Control_3.txt" /> + <param name="samples_2|column_index" value="2" /> + <!-- </repeat> --> + + <!-- <repeat name="samples"> --> + <param name="samples_3|sample" value="GSE51403/GSE51403_expression_matrix_Control_4.txt" /> + <param name="samples_3|column_index" value="2" /> + <!-- </repeat> --> + + <!-- <repeat name="samples"> --> + <param name="samples_4|sample" value="GSE51403/GSE51403_expression_matrix_Control_5.txt" /> + <param name="samples_4|column_index" value="2" /> + <!-- </repeat> --> + + <!-- <repeat name="samples"> --> + <param name="samples_5|sample" value="GSE51403/GSE51403_expression_matrix_Control_6.txt" /> + <param name="samples_5|column_index" value="2" /> + <!-- </repeat> --> + + <!-- <repeat name="samples"> --> + <param name="samples_6|sample" value="GSE51403/GSE51403_expression_matrix_Control_7.txt" /> + <param name="samples_6|column_index" value="2" /> + <!-- </repeat> --> + + <!-- <repeat name="samples"> --> + <param name="samples_7|sample" value="GSE51403/GSE51403_expression_matrix_E2.txt" /> + <param name="samples_7|column_index" value="2,3,4,5,6,7,8" /> + <!-- </repeat> --> + + <conditional name="add_lengths"> + <param name="choice" value="true" /> + <param name="sample_lengths" value="GSE51403/GSE51403_expression_matrix_GeneLengths.txt" /> + <param name="column_lengths" value="2" /> + </conditional> + + <param name="remove_comment_lines" value="false" /> + + <output name="expression_matrix" file="GSE51403/GSE51403_expression_matrix_full.txt" /> + </test> + </tests> + + <help> edgeR: Concatenate Expression Matrices ####################################### @@ -179,28 +183,8 @@ Input ----- - -Contact -------- - -The tool wrapper has been written by Youri Hoogstrate from the Erasmus -Medical Center (Rotterdam, Netherlands) on behalf of the Translational -Research IT (TraIT) project: - -http://www.ctmm.nl/en/programmas/infrastructuren/traitprojecttranslationeleresearch - -More tools by the Translational Research IT (TraIT) project can be found -in the following toolsheds: - -http://toolshed.dtls.nl/ - -http://toolshed.g2.bx.psu.edu - -http://testtoolshed.g2.bx.psu.edu/ - </help> - - <citations> - <citation type="doi">10.1093/bioinformatics/btp616</citation> - <citation type="doi">10.1093/bioinformatics/btt688</citation> - </citations> +@CONTACT@ + </help> + + <expand macro="citations" /> </tool>
--- a/edgeR_Convert_DGE_Table_to_Bedgraph.xml Tue Sep 01 04:32:16 2015 -0400 +++ b/edgeR_Convert_DGE_Table_to_Bedgraph.xml Tue Sep 01 04:45:24 2015 -0400 @@ -1,90 +1,93 @@ <?xml version="1.0" encoding="UTF-8"?> - <tool id="edger_dge_table_to_bedgraph" name="edgeR: Convert 'differentially expressed genes'-table to bedgraph(s)" version="1.0.0"> - <description>EdgeR's "differentially expressed genes" table to bedgraph(s)</description> - - <requirements> - <requirement type="package" version="1.0.0">edger_dge_table_to_bedgraph</requirement> - </requirements> - - <command interpreter="python"> - edger_dge_table_to_bedgraph - -t $cpm_table - -g $geneset - - #if $logfc: - -c3 $logfc - #end if - - #if $logcpm: - -c4 $logcpm - #end if - - #if $lr: - -c5 $lr - #end if - - #if $pvalue: - -c6 $pvalue - #end if - - #if $fdr: - -c7 $fdr - #end if - </command> - - <inputs> - <param format="tabular" name="cpm_table" type="data" label="'differentially expressed genes'-table as result from EdgeR" help="must have 7 columns of which the 2nd are gene names matching the GTF file" /> - <param format="gtf,gff,gff3" name="geneset" type="data" label="Geneset used for estimating expression levels prior to expression analysis" /> - - <param name="columns" type="select" label="Desired columns" multiple="true" display="checkboxes"> - <option value="c3" selected="true">logFC</option> - <option value="c4">logCPM</option> - <option value="c5">LR</option> - <option value="c6">PValue</option> - <option value="c7" selected="true">FDR</option> - </param> - </inputs> - - <outputs> - <data format="bedgraph" name="logfc" label="logFC from ${cpm_table.name}"> - <filter>"c3" in columns</filter> - </data> - - <data format="bedgraph" name="logcpm" label="logCPM from ${cpm_table.name}"> - <filter>"c4" in columns</filter> - </data> - - <data format="bedgraph" name="lr" label="LR from ${cpm_table.name}"> - <filter>"c5" in columns</filter> - </data> - - <data format="bedgraph" name="pvalue" label="PValue from ${cpm_table.name}"> - <filter>"c6" in columns</filter> - </data> - - <data format="bedgraph" name="fdr" label="FDR from ${cpm_table.name}"> - <filter>"c7" in columns</filter> - </data> - </outputs> - - <tests> - <test> - <param name="cpm_table" value="Convert_DGE_Table_to_Bedgraph/table_01.tabular.txt" /> - <param name="geneset" value="Convert_DGE_Table_to_Bedgraph/genes_01.gtf" /> - - <param name="columns" value="c3,c7" /> - - <output name="logfc" file="Convert_DGE_Table_to_Bedgraph/logFC.output.bedgraph" /> - <output name="fdr" file="Convert_DGE_Table_to_Bedgraph/FDR.output.bedgraph" /> - </test> - </tests> - - <help> - P-values and FDRs are swapped from 1 to 0, and 0 to 1, because this way the most siginificant genes will obtain the highest values which is convenient for visualisation. - </help> - - <citations> - <citation type="doi">10.1093/bioinformatics/btp616</citation> - <citation type="doi">10.1093/bioinformatics/btt688</citation> - </citations> + <tool id="edger_dge_table_to_bedgraph" name="edgeR: Convert 'differentially expressed genes'-table to bedgraph(s)" version="1.0.0"> + <description>EdgeR's "differentially expressed genes" table to bedgraph(s)</description> + + <macros> + <import>edgeR_macros.xml</import> + </macros> + + <requirements> + <requirement type="package" version="1.0.0">edger_dge_table_to_bedgraph</requirement> + </requirements> + + <command interpreter="python"> + edger_dge_table_to_bedgraph + -t $cpm_table + -g $geneset + + #if $logfc: + -c3 $logfc + #end if + + #if $logcpm: + -c4 $logcpm + #end if + + #if $lr: + -c5 $lr + #end if + + #if $pvalue: + -c6 $pvalue + #end if + + #if $fdr: + -c7 $fdr + #end if + </command> + + <inputs> + <param format="tabular" name="cpm_table" type="data" label="'differentially expressed genes'-table as result from EdgeR" help="must have 7 columns of which the 2nd are gene names matching the GTF file" /> + <param format="gtf,gff,gff3" name="geneset" type="data" label="Geneset used for estimating expression levels prior to expression analysis" /> + + <param name="columns" type="select" label="Desired columns" multiple="true" display="checkboxes"> + <option value="c3" selected="true">logFC</option> + <option value="c4">logCPM</option> + <option value="c5">LR</option> + <option value="c6">PValue</option> + <option value="c7" selected="true">FDR</option> + </param> + </inputs> + + <outputs> + <data format="bedgraph" name="logfc" label="logFC from ${cpm_table.name}"> + <filter>"c3" in columns</filter> + </data> + + <data format="bedgraph" name="logcpm" label="logCPM from ${cpm_table.name}"> + <filter>"c4" in columns</filter> + </data> + + <data format="bedgraph" name="lr" label="LR from ${cpm_table.name}"> + <filter>"c5" in columns</filter> + </data> + + <data format="bedgraph" name="pvalue" label="PValue from ${cpm_table.name}"> + <filter>"c6" in columns</filter> + </data> + + <data format="bedgraph" name="fdr" label="FDR from ${cpm_table.name}"> + <filter>"c7" in columns</filter> + </data> + </outputs> + + <tests> + <test> + <param name="cpm_table" value="Convert_DGE_Table_to_Bedgraph/table_01.tabular.txt" /> + <param name="geneset" value="Convert_DGE_Table_to_Bedgraph/genes_01.gtf" /> + + <param name="columns" value="c3,c7" /> + + <output name="logfc" file="Convert_DGE_Table_to_Bedgraph/logFC.output.bedgraph" /> + <output name="fdr" file="Convert_DGE_Table_to_Bedgraph/FDR.output.bedgraph" /> + </test> + </tests> + + <help> + P-values and FDRs are swapped from 1 to 0, and 0 to 1, because this way the most siginificant genes will obtain the highest values which is convenient for visualisation. + + @CONTACT@ + </help> + + <expand macro="citations" /> </tool>
--- a/edgeR_Design_from_Expression_Matrix.xml Tue Sep 01 04:32:16 2015 -0400 +++ b/edgeR_Design_from_Expression_Matrix.xml Tue Sep 01 04:45:24 2015 -0400 @@ -1,119 +1,123 @@ <?xml version="1.0" encoding="UTF-8"?> <tool id="design_from_expression_matrix" name="edgeR: Design- from Expression matrix" version="1.0.0.a"> - <description>Create design- from an expression matrix</description> - - <requirements> - <requirement type="package" version="1.0.0">design_matrix_creator</requirement> - </requirements> - - <command interpreter="python"> - design_matrix_creator - -c $expression_matrix - - #set $unique_columns = [] - #for $factor in $treatments: - #for $level in $factor.rep_factorLevel: - #for $val in $level.factorIndex.value: - #if str($val) not in $unique_columns: - #set $unique_columns = $unique_columns + [str($val)] - #end if - #end for - #end for - #end for - - #set $unique_columns = " ".join(unique_columns) - -s $unique_columns - - -t - #for $factor in $treatments: - #set $factor_str = str($factor.name).encode('base64').replace('\n','') - - #for $level in $factor.rep_factorLevel: - #set $factor_str = $factor_str+":"+str($level.factorLevel).encode('base64').replace('\n','')+":"+",".join([str(x).strip() for x in $level.factorIndex.value]) - #end for - - $factor_str - #end for - - #if $choose_blocking.choice == "true" - -b - #for blocking in $choose_blocking.blocking_repeat: - #set $blocking_str = str($blocking.blocking_name).encode('base64').replace('\n','') - - #for $block_iter in $blocking.block: - #set $block_str = ",".join([str(x).strip() for x in $block_iter.block_index.value]) - #set $blocking_str = $blocking_str+":"+$block_str - #end for - - $blocking_str - #end for - #end if - - -o $design_matrix - </command> - - <inputs> - <param format="tabular" name="expression_matrix" type="data" label="Expression matrix (read counts)" help="You can create a count matrix with the tool" /> - - <repeat name="treatments" title="Factor/Condition" min="1"> - <param name="name" type="text" value="FactorName" label="Specify a name for the factor / condition" help="e.g. 'Tumor vs. Normal', 'Timepoint' or 'DiseaseState'. Field must be non-numerical, preferably only letters!!" /> - <repeat name="rep_factorLevel" title="Factor level" min="1"> - <param name="factorLevel" type="text" value="FactorLevel" label="Specify a condition" help="e.g. Tumor or Normal; Treated, untreaded or placebo. Field must be non-numerical, preferably only letters!!" /> - <param name="factorIndex" numerical="false" use_header_names="true" label="Select columns that are associated with this factor level" type="data_column" data_ref="expression_matrix" multiple="true" size="120" display="checkboxes"> - <validator type="no_options" message="Please select at least one column." /> - </param> - </repeat> - </repeat> - - <conditional name="choose_blocking"> - <param name="choice" type="select" label="Define blocking (paired or grouped samples)" help="e.g. to remove batch effects" > - <option value="false">No</option> - <option value="true">Yes</option> - </param> - <when value="false" /> - <when value="true"> - <repeat name="blocking_repeat" title="Blocking condition" min="1"> - <param name="blocking_name" type="text" value="" label="Specify a name for a blocking condition" help="e.g.'Patients' or 'Batches'" /> - <repeat name="block" title="Block" min="2"> - <param name="block_index" label="Select columns that are associated with this factor level" type="data_column" data_ref="expression_matrix" numerical="True" multiple="true" use_header_names="true" size="120" display="checkboxes"> - <validator type="no_options" message="Please select at least one column." /> - </param> - </repeat> - </repeat> - </when> - </conditional> - </inputs> - - <outputs> - <data format="tabular" name="design_matrix" label="Design matrix" /> - </outputs> - - - <tests> - <test> - <param name="expression_matrix" value="GSE51403/GSE51403_expression_matrix_full.txt" /> - - <!-- <repeat name="treatments"> --> - <param name="treatments_0|name" value="Treatment" /> - <!-- <repeat name="rep_factorLevel"> --> - <param name="treatments_0|rep_factorLevel_0|factorLevel" value="Control" /> - <param name="treatments_0|rep_factorLevel_0|factorIndex" value="2,3,4,5,6,7,8" /> - <!-- </repeat> --> - <!-- <repeat name="rep_factorLevel"> --> - <param name="treatments_0|rep_factorLevel_1|factorLevel" value="E2" /> - <param name="treatments_0|rep_factorLevel_1|factorIndex" value="9,10,11,12,13,14,15" /> - <!-- </repeat> --> - <!-- </repeat> --> - - <conditional name="choose_blocking"> - <param value="false" /> - </conditional> - - <output name="design_matrix" file="GSE51403/GSE51403_design_matrix_full_depth.txt" /> - </test> - </tests> - - <help> + <description>Create design- from an expression matrix</description> + + <macros> + <import>edgeR_macros.xml</import> + </macros> + + <requirements> + <requirement type="package" version="1.0.0">design_matrix_creator</requirement> + </requirements> + + <command interpreter="python"> + design_matrix_creator + -c $expression_matrix + + #set $unique_columns = [] + #for $factor in $treatments: + #for $level in $factor.rep_factorLevel: + #for $val in $level.factorIndex.value: + #if str($val) not in $unique_columns: + #set $unique_columns = $unique_columns + [str($val)] + #end if + #end for + #end for + #end for + + #set $unique_columns = " ".join(unique_columns) + -s $unique_columns + + -t + #for $factor in $treatments: + #set $factor_str = str($factor.name).encode('base64').replace('\n','') + + #for $level in $factor.rep_factorLevel: + #set $factor_str = $factor_str+":"+str($level.factorLevel).encode('base64').replace('\n','')+":"+",".join([str(x).strip() for x in $level.factorIndex.value]) + #end for + + $factor_str + #end for + + #if $choose_blocking.choice == "true" + -b + #for blocking in $choose_blocking.blocking_repeat: + #set $blocking_str = str($blocking.blocking_name).encode('base64').replace('\n','') + + #for $block_iter in $blocking.block: + #set $block_str = ",".join([str(x).strip() for x in $block_iter.block_index.value]) + #set $blocking_str = $blocking_str+":"+$block_str + #end for + + $blocking_str + #end for + #end if + + -o $design_matrix + </command> + + <inputs> + <param format="tabular" name="expression_matrix" type="data" label="Expression matrix (read counts)" help="You can create a count matrix with the tool" /> + + <repeat name="treatments" title="Factor/Condition" min="1"> + <param name="name" type="text" value="FactorName" label="Specify a name for the factor / condition" help="e.g. 'Tumor vs. Normal', 'Timepoint' or 'DiseaseState'. Field must be non-numerical, preferably only letters!!" /> + <repeat name="rep_factorLevel" title="Factor level" min="1"> + <param name="factorLevel" type="text" value="FactorLevel" label="Specify a condition" help="e.g. Tumor or Normal; Treated, untreaded or placebo. Field must be non-numerical, preferably only letters!!" /> + <param name="factorIndex" numerical="false" use_header_names="true" label="Select columns that are associated with this factor level" type="data_column" data_ref="expression_matrix" multiple="true" size="120" display="checkboxes"> + <validator type="no_options" message="Please select at least one column." /> + </param> + </repeat> + </repeat> + + <conditional name="choose_blocking"> + <param name="choice" type="select" label="Define blocking (paired or grouped samples)" help="e.g. to remove batch effects" > + <option value="false">No</option> + <option value="true">Yes</option> + </param> + <when value="false" /> + <when value="true"> + <repeat name="blocking_repeat" title="Blocking condition" min="1"> + <param name="blocking_name" type="text" value="" label="Specify a name for a blocking condition" help="e.g.'Patients' or 'Batches'" /> + <repeat name="block" title="Block" min="2"> + <param name="block_index" label="Select columns that are associated with this factor level" type="data_column" data_ref="expression_matrix" numerical="True" multiple="true" use_header_names="true" size="120" display="checkboxes"> + <validator type="no_options" message="Please select at least one column." /> + </param> + </repeat> + </repeat> + </when> + </conditional> + </inputs> + + <outputs> + <data format="tabular" name="design_matrix" label="Design matrix" /> + </outputs> + + + <tests> + <test> + <param name="expression_matrix" value="GSE51403/GSE51403_expression_matrix_full.txt" /> + + <!-- <repeat name="treatments"> --> + <param name="treatments_0|name" value="Treatment" /> + <!-- <repeat name="rep_factorLevel"> --> + <param name="treatments_0|rep_factorLevel_0|factorLevel" value="Control" /> + <param name="treatments_0|rep_factorLevel_0|factorIndex" value="2,3,4,5,6,7,8" /> + <!-- </repeat> --> + <!-- <repeat name="rep_factorLevel"> --> + <param name="treatments_0|rep_factorLevel_1|factorLevel" value="E2" /> + <param name="treatments_0|rep_factorLevel_1|factorIndex" value="9,10,11,12,13,14,15" /> + <!-- </repeat> --> + <!-- </repeat> --> + + <conditional name="choose_blocking"> + <param value="false" /> + </conditional> + + <output name="design_matrix" file="GSE51403/GSE51403_design_matrix_full_depth.txt" /> + </test> + </tests> + + <help> edgeR: Design- from Expression matrix ##################################### @@ -131,27 +135,8 @@ The test data is coming from: doi: 10.1093/bioinformatics/btt688. http://www.ncbi.nlm.nih.gov/pubmed/24319002 -Contact -------- - -The tool wrapper has been written by Youri Hoogstrate from the Erasmus -Medical Center (Rotterdam, Netherlands) on behalf of the Translational -Research IT (TraIT) project: - -http://www.ctmm.nl/en/programmas/infrastructuren/traitprojecttranslationeleresearch - -More tools by the Translational Research IT (TraIT) project can be found -in the following toolsheds: - -http://toolshed.dtls.nl/ - -http://toolshed.g2.bx.psu.edu - -http://testtoolshed.g2.bx.psu.edu/ - </help> - - <citations> - <citation type="doi">10.1093/bioinformatics/btp616</citation> - <citation type="doi">10.1093/bioinformatics/btt688</citation> - </citations> +@CONTACT@ + </help> + + <expand macro="citations" /> </tool>
--- a/edgeR_Differential_Gene_Expression.xml Tue Sep 01 04:32:16 2015 -0400 +++ b/edgeR_Differential_Gene_Expression.xml Tue Sep 01 04:45:24 2015 -0400 @@ -2,28 +2,32 @@ <tool id="edger_dge" name="edgeR: Differential Gene(Expression) Analysis" version="3.11.0.a"> <description>RNA-Seq gene expression analysis using edgeR (R package)</description> + <macros> + <import>edgeR_macros.xml</import> + </macros> + <requirements> <requirement type="package" version="3.11.0">edger</requirement> </requirements> <stdio> <regex match="Error in[^a-z]+contrasts" - source="both" - level="fatal" - description="Have the design- and expression-matrix been swapped?" /> + source="both" + level="fatal" + description="Have the design- and expression-matrix been swapped?" /> <regex match="Execution halted" - source="both" - level="fatal" /> + source="both" + level="fatal" /> <regex match="Calculating library sizes from column" - source="stderr" - level="log" /> + source="stderr" + level="log" /> <regex match="During startup - Warning messages" - source="stderr" - level="log" /> + source="stderr" + level="log" /> <regex match="Setting LC_[^ ]+ failed" - source="stderr" - level="warning" - description="LOCALE has not been set correctly" /> + source="stderr" + level="warning" + description="LOCALE has not been set correctly" /> </stdio> <version_command>echo $(R --version | grep version | grep -v GNU) " , EdgeR version" $(R --vanilla --slave -e "library(edgeR) ; cat(sessionInfo()\$otherPkgs\$edgeR\$Version)" 2> /dev/null | grep -v -i "WARNING: ")</version_command> @@ -594,9 +598,8 @@ This tool requires no specific configurations. The following dependencies are installed automatically: - R -- Bioconductor - - limma - - edgeR +- limma +- edgeR License ------- @@ -629,29 +632,8 @@ - http://www.ncbi.nlm.nih.gov/pubmed/24319002 - http://dx.doi.org/10.1093/bioinformatics/btt688 -Contact -------- - -The tool wrapper has been written by Youri Hoogstrate from the Erasmus -Medical Center (Rotterdam, Netherlands) on behalf of the Translational -Research IT (TraIT) project: - -http://www.ctmm.nl/en/programmas/infrastructuren/traitprojecttranslationeleresearch - -More tools by the Translational Research IT (TraIT) project can be found -in the following toolsheds: - -http://toolshed.dtls.nl/ - -http://toolshed.g2.bx.psu.edu - -http://testtoolshed.g2.bx.psu.edu/ - -I would like to thank Hina Riaz - Naz Khan for her helpful contribution. +@CONTACT@ </help> - <citations> - <citation type="doi">10.1093/bioinformatics/btp616</citation> - <citation type="doi">10.1093/bioinformatics/btt688</citation> - </citations> + <expand macro="citations" /> </tool>
--- a/edgeR_macros.xml Tue Sep 01 04:32:16 2015 -0400 +++ b/edgeR_macros.xml Tue Sep 01 04:45:24 2015 -0400 @@ -0,0 +1,26 @@ +<macros> + <token name="@CONTACT@">Contact +------- + +The tool wrapper has been written by Youri Hoogstrate from the Erasmus +Medical Center (Rotterdam, Netherlands) on behalf of the Translational +Research IT (TraIT) project: + +http://www.ctmm.nl/en/programmas/infrastructuren/traitprojecttranslationeleresearch + +More tools by the Translational Research IT (TraIT) project can be found +in the following toolsheds: + +http://toolshed.g2.bx.psu.edu + +http://testtoolshed.g2.bx.psu.edu/ + +I would like to thank Hina Riaz - Naz Khan for her helpful contribution.</token> + + <xml name="citations"> + <citations> + <citation type="doi">10.1093/bioinformatics/btp616</citation> + <citation type="doi">10.1093/bioinformatics/btt688</citation> + </citations> + </xml> +</macros>