changeset 27:c6463da87893 draft

Uploaded
author yhoogstrate
date Tue, 20 May 2014 05:27:02 -0400
parents 8b7bd6e290c4
children f55cd338fcdd
files edgeR_Concatenate_Expression_Matrices.xml
diffstat 1 files changed, 138 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/edgeR_Concatenate_Expression_Matrices.xml	Tue May 20 05:27:02 2014 -0400
@@ -0,0 +1,138 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<tool id="concatenate_expression_matrices" name="edgeR: Concatenate Expression Matrices">
+	<description>Create a full expression matrix by selecting the desired columns from specific count tables</description>
+	
+	<command>
+		#set $j = 0
+		#set $paste = []
+		
+		#if $add_geneids.choice == "true":
+			#set $filename = str($j)+".txt"
+			#set paste = paste + [$filename]
+			
+			cut -f $add_geneids.column_geneids.value "$add_geneids.sample_geneids" > $filename ;
+			#set $j += 1
+		#end if
+		
+		#for $sample in $samples:
+			##echo "$sample.column_index"
+			##echo "$sample.column_index.value"
+			
+			#set $column_str = ",".join([str(x).strip() for x in $sample.column_index.value])
+			#set $filename = str($j)+".txt"
+			#set paste = paste + [$filename]
+			
+			cut -f "$column_str" "$sample.sample" > $filename ;
+			#set $j += 1
+		#end for
+		
+		#if $add_lengths.choice == "true":
+			#set $filename = str($j)+".txt"
+			#set paste = paste + [$filename]
+			
+			cut -f $add_lengths.column_lengths.value "$add_lengths.sample_lengths" > $filename ;
+			#set $j += 1
+		#end if
+		
+		#set $paste_str = " ".join([str(x).strip() for x in $paste])
+		
+		paste $paste_str > $expression_matrix
+	</command>
+	
+	<inputs>
+		<conditional name="add_geneids">
+			<param name="choice" type="select" label="Add a gene-IDs column at the end of the file" help="Highly recommended to select!" >
+				<option value="false">No</option>
+				<option value="true" selected="true">Yes</option>
+			</param>
+			<when value="false" />
+			<when value="true">
+				<param name="sample_geneids" type="data" format="tabular" label="Select Read-count dataset that contains a column for GeneIDs" help="from featureCounts/DEXSeq-count/HTSeq-count, etc." />
+				<param name="column_geneids" numerical="false" use_header_names="true" label="Select GeneID column" type="data_column" data_ref="sample_geneids" multiple="false">
+					<validator type="no_options" message="Please select at least one column." />
+				</param>
+			</when>
+		</conditional>
+		
+		<repeat name="samples" title="Expression Table" help="E.g. a earlier concatenated table, or a HT-seq, featureCounts or DESeq-count result.">
+			<param name="sample" type="data" format="tabular" label="Read-count dataset that belongs to a pair" help="from featureCounts/DEXSeq-count/HTSeq-count, etc." />
+			<!-- select columns  -->
+			<param name="column_index" numerical="false" use_header_names="true" label="Select columns that are associated with this factor level" type="data_column" data_ref="sample" multiple="true" size="120" display="checkboxes">
+				<validator type="no_options" message="Please select at least one column." />
+			</param>
+		</repeat>
+		
+		<conditional name="add_lengths">
+			<param name="choice" type="select" label="Add a gene-lengths column at the end of the file" help="Optional, only usefull if RPKM/FPKM calculation is desired." >
+				<option value="false">No</option>
+				<option value="true">Yes</option>
+			</param>
+			<when value="false" />
+			<when value="true">
+				<param name="sample_lengths" type="data" format="tabular" label="Read-count dataset that belongs to a pair" help="from featureCounts/DEXSeq-count/HTSeq-count, etc." />
+				<param name="column_lengths" numerical="false" use_header_names="true" label="Select columns that are associated with this factor level" type="data_column" data_ref="sample_lengths" multiple="false" size="120">
+					<validator type="no_options" message="Please select at least one column." />
+				</param>
+			</when>
+		</conditional>
+		
+		<param name="remove_comment_lines" type="boolean" label="Automatically remove 'comment' lines starting with a '#'" truevale="true" falsevalue="false" selected="true" help="Some tools (incl. featureCounts) include comment lines that are not neccesairy for downstream analyis. By enabling this function, these lines will be removed." />
+		
+	</inputs>
+	
+	<outputs>
+		<data format="tabular" name="expression_matrix" label="Expression matrix" />
+	</outputs>
+	
+	<tests>
+		<test>
+			<param name="add_geneids.choice" value="true" />
+			<param name="choice" value="true" />
+			<param name="add_geneids" value="true" />
+			
+			
+			<!--
+Add a gene-IDs column at the end of the file	true	
+Select Read-count dataset that contains a column for GeneIDs	21: MCF7_featureCounts_E2.txt	
+Select GeneID column	1	
+Read-count dataset that belongs to a pair	14: MCF7_featureCounts_Control_1.txt	
+Select columns that are associated with this factor level	2	
+Read-count dataset that belongs to a pair	15: MCF7_featureCounts_Control_2.txt	
+Select columns that are associated with this factor level	2	
+Read-count dataset that belongs to a pair	16: MCF7_featureCounts_Control_3.txt	
+Select columns that are associated with this factor level	2	
+Read-count dataset that belongs to a pair	17: MCF7_featureCounts_Control_4.txt	
+Select columns that are associated with this factor level	2	
+Read-count dataset that belongs to a pair	18: MCF7_featureCounts_Control_5.txt	
+Select columns that are associated with this factor level	2	
+Read-count dataset that belongs to a pair	22: MCF7_featureCounts_Control_6.txt	
+Select columns that are associated with this factor level	2	
+Read-count dataset that belongs to a pair	20: MCF7_featureCounts_Control_7.txt	
+Select columns that are associated with this factor level	2	
+Read-count dataset that belongs to a pair	21: MCF7_featureCounts_E2.txt	
+Select columns that are associated with this factor level	2 3 4 5 6 7 8	
+Add a gene-lengths column at the end of the file	true	
+Read-count dataset that belongs to a pair	26: MCF7_featureCounts_GeneLengths.txt	
+Select columns that are associated with this factor level	2	
+Automatically remove 'comment' lines starting with a '#'	False
+			-->
+			
+			<output name="expression_matrix" file="MCF7_featureCounts_concatenated.txt" />
+		</test>
+	</tests>
+	
+	<help>
+edgeR: Concatenate Expression Matrices
+
+**Notes**
+
+Make sure the tables have an identical number of columns compared to the number of headers.
+If you export tables using R, make sure you set: col.names=NA. Otherwise column may be swapped during concatenation.
+
+**References**
+
+The test data is coming from:  doi: 10.1093/bioinformatics/btt688.
+http://www.ncbi.nlm.nih.gov/pubmed/24319002
+
+	</help>
+</tool>