Mercurial > repos > yhoogstrate > edger_with_design_matrix
diff edgeR_Concatenate_Expression_Matrices.xml @ 27:c6463da87893 draft
Uploaded
author | yhoogstrate |
---|---|
date | Tue, 20 May 2014 05:27:02 -0400 |
parents | |
children | c86f1f86b3c1 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/edgeR_Concatenate_Expression_Matrices.xml Tue May 20 05:27:02 2014 -0400 @@ -0,0 +1,138 @@ +<?xml version="1.0" encoding="UTF-8"?> +<tool id="concatenate_expression_matrices" name="edgeR: Concatenate Expression Matrices"> + <description>Create a full expression matrix by selecting the desired columns from specific count tables</description> + + <command> + #set $j = 0 + #set $paste = [] + + #if $add_geneids.choice == "true": + #set $filename = str($j)+".txt" + #set paste = paste + [$filename] + + cut -f $add_geneids.column_geneids.value "$add_geneids.sample_geneids" > $filename ; + #set $j += 1 + #end if + + #for $sample in $samples: + ##echo "$sample.column_index" + ##echo "$sample.column_index.value" + + #set $column_str = ",".join([str(x).strip() for x in $sample.column_index.value]) + #set $filename = str($j)+".txt" + #set paste = paste + [$filename] + + cut -f "$column_str" "$sample.sample" > $filename ; + #set $j += 1 + #end for + + #if $add_lengths.choice == "true": + #set $filename = str($j)+".txt" + #set paste = paste + [$filename] + + cut -f $add_lengths.column_lengths.value "$add_lengths.sample_lengths" > $filename ; + #set $j += 1 + #end if + + #set $paste_str = " ".join([str(x).strip() for x in $paste]) + + paste $paste_str > $expression_matrix + </command> + + <inputs> + <conditional name="add_geneids"> + <param name="choice" type="select" label="Add a gene-IDs column at the end of the file" help="Highly recommended to select!" > + <option value="false">No</option> + <option value="true" selected="true">Yes</option> + </param> + <when value="false" /> + <when value="true"> + <param name="sample_geneids" type="data" format="tabular" label="Select Read-count dataset that contains a column for GeneIDs" help="from featureCounts/DEXSeq-count/HTSeq-count, etc." /> + <param name="column_geneids" numerical="false" use_header_names="true" label="Select GeneID column" type="data_column" data_ref="sample_geneids" multiple="false"> + <validator type="no_options" message="Please select at least one column." /> + </param> + </when> + </conditional> + + <repeat name="samples" title="Expression Table" help="E.g. a earlier concatenated table, or a HT-seq, featureCounts or DESeq-count result."> + <param name="sample" type="data" format="tabular" label="Read-count dataset that belongs to a pair" help="from featureCounts/DEXSeq-count/HTSeq-count, etc." /> + <!-- select columns --> + <param name="column_index" numerical="false" use_header_names="true" label="Select columns that are associated with this factor level" type="data_column" data_ref="sample" multiple="true" size="120" display="checkboxes"> + <validator type="no_options" message="Please select at least one column." /> + </param> + </repeat> + + <conditional name="add_lengths"> + <param name="choice" type="select" label="Add a gene-lengths column at the end of the file" help="Optional, only usefull if RPKM/FPKM calculation is desired." > + <option value="false">No</option> + <option value="true">Yes</option> + </param> + <when value="false" /> + <when value="true"> + <param name="sample_lengths" type="data" format="tabular" label="Read-count dataset that belongs to a pair" help="from featureCounts/DEXSeq-count/HTSeq-count, etc." /> + <param name="column_lengths" numerical="false" use_header_names="true" label="Select columns that are associated with this factor level" type="data_column" data_ref="sample_lengths" multiple="false" size="120"> + <validator type="no_options" message="Please select at least one column." /> + </param> + </when> + </conditional> + + <param name="remove_comment_lines" type="boolean" label="Automatically remove 'comment' lines starting with a '#'" truevale="true" falsevalue="false" selected="true" help="Some tools (incl. featureCounts) include comment lines that are not neccesairy for downstream analyis. By enabling this function, these lines will be removed." /> + + </inputs> + + <outputs> + <data format="tabular" name="expression_matrix" label="Expression matrix" /> + </outputs> + + <tests> + <test> + <param name="add_geneids.choice" value="true" /> + <param name="choice" value="true" /> + <param name="add_geneids" value="true" /> + + + <!-- +Add a gene-IDs column at the end of the file true +Select Read-count dataset that contains a column for GeneIDs 21: MCF7_featureCounts_E2.txt +Select GeneID column 1 +Read-count dataset that belongs to a pair 14: MCF7_featureCounts_Control_1.txt +Select columns that are associated with this factor level 2 +Read-count dataset that belongs to a pair 15: MCF7_featureCounts_Control_2.txt +Select columns that are associated with this factor level 2 +Read-count dataset that belongs to a pair 16: MCF7_featureCounts_Control_3.txt +Select columns that are associated with this factor level 2 +Read-count dataset that belongs to a pair 17: MCF7_featureCounts_Control_4.txt +Select columns that are associated with this factor level 2 +Read-count dataset that belongs to a pair 18: MCF7_featureCounts_Control_5.txt +Select columns that are associated with this factor level 2 +Read-count dataset that belongs to a pair 22: MCF7_featureCounts_Control_6.txt +Select columns that are associated with this factor level 2 +Read-count dataset that belongs to a pair 20: MCF7_featureCounts_Control_7.txt +Select columns that are associated with this factor level 2 +Read-count dataset that belongs to a pair 21: MCF7_featureCounts_E2.txt +Select columns that are associated with this factor level 2 3 4 5 6 7 8 +Add a gene-lengths column at the end of the file true +Read-count dataset that belongs to a pair 26: MCF7_featureCounts_GeneLengths.txt +Select columns that are associated with this factor level 2 +Automatically remove 'comment' lines starting with a '#' False + --> + + <output name="expression_matrix" file="MCF7_featureCounts_concatenated.txt" /> + </test> + </tests> + + <help> +edgeR: Concatenate Expression Matrices + +**Notes** + +Make sure the tables have an identical number of columns compared to the number of headers. +If you export tables using R, make sure you set: col.names=NA. Otherwise column may be swapped during concatenation. + +**References** + +The test data is coming from: doi: 10.1093/bioinformatics/btt688. +http://www.ncbi.nlm.nih.gov/pubmed/24319002 + + </help> +</tool>