changeset 94:46745f5666ac draft

Added test
author yhoogstrate
date Sat, 28 Mar 2015 08:09:03 -0400
parents 31335aa52b2e
children 9dac2146b98c
files edgeR_Concatenate_Expression_Matrices.xml edgeR_Convert_DGE_Table_to_Bedgraph.xml edgeR_Design_from_Expression_Matrix.xml edgeR_Differential_Gene_Expression.xml test-data/Convert_DGE_Table_to_Bedgraph/FDR.output.bedgraph test-data/Convert_DGE_Table_to_Bedgraph/genes_01.gtf test-data/Convert_DGE_Table_to_Bedgraph/logFC.output.bedgraph test-data/Convert_DGE_Table_to_Bedgraph/table_01.tabular.txt test-data/Differential_Gene_Expression/design_matrix.tabular.txt test-data/Differential_Gene_Expression/differentially_expressed_genes.tabular.txt test-data/Differential_Gene_Expression/expression_matrix.tabular.txt
diffstat 11 files changed, 271 insertions(+), 33 deletions(-) [+]
line wrap: on
line diff
--- a/edgeR_Concatenate_Expression_Matrices.xml	Wed Mar 18 06:40:01 2015 -0400
+++ b/edgeR_Concatenate_Expression_Matrices.xml	Sat Mar 28 08:09:03 2015 -0400
@@ -86,38 +86,79 @@
 	
 	<tests>
 		<test>
-			<param name="add_geneids.choice" value="true" />
-			<param name="choice" value="true" />
-			<param name="add_geneids" value="true" />
+			<conditional name="add_geneids">
+				<param name="choice" value="true" />
+				<param name="sample_geneids" value="GSE51403/GSE51403_expression_matrix_GeneLengths.txt" />
+				<param name="column_geneids" value="1" />
+			</conditional>
+			
+			<param name="samples_0|sample" value="GSE51403/GSE51403_expression_matrix_Control_1.txt" />
+			<param name="samples_0|column_index" value="2" />
+			
+			<conditional name="add_lengths">
+				<param name="choice" value="false" />
+			</conditional>
+			
+			<param name="remove_comment_lines" value="false" />
 			
+			<output name="expression_matrix" file="GSE51403/GSE51403_expression_matrix_Control_1.txt" />
+		</test>
+		<test>
+			<conditional name="add_geneids">
+				<param name="choice" value="true" />
+				<param name="sample_geneids" value="GSE51403/GSE51403_expression_matrix_GeneLengths.txt" />
+				<param name="column_geneids" value="1" />
+			</conditional>
+			
+			<!-- <repeat name="samples"> -->
+				<param name="samples_0|sample" value="GSE51403/GSE51403_expression_matrix_Control_1.txt" />
+				<param name="samples_0|column_index" value="2" />
+			<!-- </repeat> -->
+			
+			<!-- <repeat name="samples"> -->
+				<param name="samples_1|sample" value="GSE51403/GSE51403_expression_matrix_Control_2.txt" />
+				<param name="samples_1|column_index" value="2" />
+			<!-- </repeat> -->
 			
-			<!--
-Add a gene-IDs column at the end of the file	true	
-Select Read-count dataset that contains a column for GeneIDs	21: MCF7_featureCounts_E2.txt	
-Select GeneID column	1	
-Read-count dataset that belongs to a pair	14: MCF7_featureCounts_Control_1.txt	
-Select columns that are associated with this factor level	2	
-Read-count dataset that belongs to a pair	15: MCF7_featureCounts_Control_2.txt	
-Select columns that are associated with this factor level	2	
-Read-count dataset that belongs to a pair	16: MCF7_featureCounts_Control_3.txt	
-Select columns that are associated with this factor level	2	
-Read-count dataset that belongs to a pair	17: MCF7_featureCounts_Control_4.txt	
-Select columns that are associated with this factor level	2	
-Read-count dataset that belongs to a pair	18: MCF7_featureCounts_Control_5.txt	
-Select columns that are associated with this factor level	2	
-Read-count dataset that belongs to a pair	22: MCF7_featureCounts_Control_6.txt	
-Select columns that are associated with this factor level	2	
-Read-count dataset that belongs to a pair	20: MCF7_featureCounts_Control_7.txt	
-Select columns that are associated with this factor level	2	
-Read-count dataset that belongs to a pair	21: MCF7_featureCounts_E2.txt	
-Select columns that are associated with this factor level	2 3 4 5 6 7 8	
-Add a gene-lengths column at the end of the file	true	
-Read-count dataset that belongs to a pair	26: MCF7_featureCounts_GeneLengths.txt	
-Select columns that are associated with this factor level	2	
-Automatically remove 'comment' lines starting with a '#'	False
-			-->
+			<!-- <repeat name="samples"> -->
+				<param name="samples_2|sample" value="GSE51403/GSE51403_expression_matrix_Control_3.txt" />
+				<param name="samples_2|column_index" value="2" />
+			<!-- </repeat> -->
+			
+			<!-- <repeat name="samples"> -->
+				<param name="samples_3|sample" value="GSE51403/GSE51403_expression_matrix_Control_4.txt" />
+				<param name="samples_3|column_index" value="2" />
+			<!-- </repeat> -->
+			
+			<!-- <repeat name="samples"> -->
+				<param name="samples_4|sample" value="GSE51403/GSE51403_expression_matrix_Control_5.txt" />
+				<param name="samples_4|column_index" value="2" />
+			<!-- </repeat> -->
+			
+			<!-- <repeat name="samples"> -->
+				<param name="samples_5|sample" value="GSE51403/GSE51403_expression_matrix_Control_6.txt" />
+				<param name="samples_5|column_index" value="2" />
+			<!-- </repeat> -->
 			
-			<output name="expression_matrix" file="MCF7_featureCounts_concatenated.txt" />
+			<!-- <repeat name="samples"> -->
+				<param name="samples_6|sample" value="GSE51403/GSE51403_expression_matrix_Control_7.txt" />
+				<param name="samples_6|column_index" value="2" />
+			<!-- </repeat> -->
+			
+			<!-- <repeat name="samples"> -->
+				<param name="samples_7|sample" value="GSE51403/GSE51403_expression_matrix_E2.txt" />
+				<param name="samples_7|column_index" value="2,3,4,5,6,7,8" />
+			<!-- </repeat> -->
+			
+			<conditional name="add_lengths">
+				<param name="choice" value="true" />
+				<param name="sample_lengths" value="GSE51403/GSE51403_expression_matrix_GeneLengths.txt" />
+				<param name="column_lengths" value="2" />
+			</conditional>
+			
+			<param name="remove_comment_lines" value="false" />
+			
+			<output name="expression_matrix" file="GSE51403/GSE51403_expression_matrix_full.txt" />
 		</test>
 	</tests>
 	
@@ -157,4 +198,9 @@
 
 http://testtoolshed.g2.bx.psu.edu/
 	</help>
+	
+	<citations>
+		<citation type="doi">10.1093/bioinformatics/btp616</citation>
+		<citation type="doi">10.1093/bioinformatics/btt688</citation>
+	</citations>
 </tool>
--- a/edgeR_Convert_DGE_Table_to_Bedgraph.xml	Wed Mar 18 06:40:01 2015 -0400
+++ b/edgeR_Convert_DGE_Table_to_Bedgraph.xml	Sat Mar 28 08:09:03 2015 -0400
@@ -1,5 +1,5 @@
 <?xml version="1.0" encoding="UTF-8"?>
-	<tool id="edger_dge_table_to_bedgraph" name="edgeR: convert 'differentially expressed genes'-table to bedgraph(s)" version="1.0.0">
+	<tool id="edger_dge_table_to_bedgraph" name="edgeR: Convert 'differentially expressed genes'-table to bedgraph(s)" version="1.0.0">
 	<description>EdgeR's "differentially expressed genes" table to bedgraph(s)</description>
 	
 	<requirements>
@@ -34,7 +34,7 @@
 	
 	<inputs>
 		<param format="tabular" name="cpm_table" type="data" label="'differentially expressed genes'-table as result from EdgeR" help="must have 7 columns of which the 2nd are gene names matching the GTF file" />
-		<param format="gtf" name="geneset" type="data" label="Geneset used for estimating expression levels prior to expression analysis" />
+		<param format="gtf,gff,gff3" name="geneset" type="data" label="Geneset used for estimating expression levels prior to expression analysis" />
 	
 		<param name="columns" type="select" label="Desired columns" multiple="true" display="checkboxes">
 			<option value="c3" selected="true">logFC</option>
@@ -67,7 +67,24 @@
 		</data>
 	</outputs>
 	
+	<tests>
+		<test>
+			<param name="cpm_table" value="Convert_DGE_Table_to_Bedgraph/table_01.tabular.txt" />
+			<param name="geneset" value="Convert_DGE_Table_to_Bedgraph/genes_01.gtf" />
+			
+			<param name="columns" value="c3,c7" />
+			
+			<output name="logfc" file="Convert_DGE_Table_to_Bedgraph/logFC.output.bedgraph" />
+			<output name="fdr" file="Convert_DGE_Table_to_Bedgraph/FDR.output.bedgraph" />
+		</test>
+	</tests>
+	
 	<help>
 		P-values and FDRs are swapped from 1 to 0, and 0 to 1, because this way the most siginificant genes will obtain the highest values which is convenient for visualisation.
 	</help>
-</tool>
\ No newline at end of file
+	
+	<citations>
+		<citation type="doi">10.1093/bioinformatics/btp616</citation>
+		<citation type="doi">10.1093/bioinformatics/btt688</citation>
+	</citations>
+</tool>
--- a/edgeR_Design_from_Expression_Matrix.xml	Wed Mar 18 06:40:01 2015 -0400
+++ b/edgeR_Design_from_Expression_Matrix.xml	Sat Mar 28 08:09:03 2015 -0400
@@ -88,6 +88,31 @@
 		<data format="tabular" name="design_matrix" label="Design matrix" />
 	</outputs>
 	
+	
+	<tests>
+		<test>
+			<param name="expression_matrix" value="GSE51403/GSE51403_expression_matrix_full.txt" />
+			
+			<!-- <repeat name="treatments"> -->
+				<param name="treatments_0|name" value="Treatment" />
+				<!-- <repeat name="rep_factorLevel"> -->
+					<param name="treatments_0|rep_factorLevel_0|factorLevel" value="Control" />
+					<param name="treatments_0|rep_factorLevel_0|factorIndex" value="2,3,4,5,6,7,8" />
+				<!-- </repeat> -->
+				<!-- <repeat name="rep_factorLevel"> -->
+					<param name="treatments_0|rep_factorLevel_1|factorLevel" value="E2" />
+					<param name="treatments_0|rep_factorLevel_1|factorIndex" value="9,10,11,12,13,14,15" />
+				<!-- </repeat> -->
+			<!-- </repeat> -->
+			
+			<conditional name="choose_blocking">
+				<param value="false" />
+			</conditional>
+			
+			<output name="design_matrix" file="GSE51403/GSE51403_design_matrix_full_depth.txt" />
+		</test>
+	</tests>
+	
 	<help>
 edgeR: Design- from Expression matrix
 #####################################
@@ -124,4 +149,9 @@
 
 http://testtoolshed.g2.bx.psu.edu/
 	</help>
+	
+	<citations>
+		<citation type="doi">10.1093/bioinformatics/btp616</citation>
+		<citation type="doi">10.1093/bioinformatics/btt688</citation>
+	</citations>
 </tool>
--- a/edgeR_Differential_Gene_Expression.xml	Wed Mar 18 06:40:01 2015 -0400
+++ b/edgeR_Differential_Gene_Expression.xml	Sat Mar 28 08:09:03 2015 -0400
@@ -89,6 +89,7 @@
 				> /dev/null
 			#end if
 			
+			<!--
 			2> stderr.txt ; 
 			
 			grep -v 'Calculating library sizes from column' stderr.txt > stderr2.txt ; rm stderr.txt ; mv stderr2.txt stderr.txt ; 
@@ -103,11 +104,25 @@
 			grep -v 'Setting LC_COLLATE failed' stderr.txt > stderr2.txt ; rm stderr.txt ; mv stderr2.txt stderr.txt ; 
 			
 			cat stderr.txt >&amp;2
+			-->
 	</command>
 	
+	<stdio>
+		<regex match="Calculating library sizes from column" 
+				source="stderr" 
+				level="log" />
+		<regex match="During startup - Warning messages"
+				source="stderr"
+				level="log" />
+		<regex match="Setting LC_[^ ]+ failed"
+				source="stderr"
+				level="warning"
+				description="LOCALE has not been set correctly" />
+	</stdio>
+	
 	<inputs>
 		<param name="expression_matrix" type="data" format="tabular" label="Expression (read count) matrix" />
-		<param name="design_matrix" type="data" format="tabular" label="Design matrix" hepl="Ensure your samplenames are identical to those in the expression matrix. Preferentially, create the contrast matrix using 'edgeR: Design- from Expression matrix'." />
+		<param name="design_matrix" type="data" format="tabular" label="Design matrix" help="Ensure your samplenames are identical to those in the expression matrix. Preferentially, create the contrast matrix using 'edgeR: Design- from Expression matrix'." />
 		
 		<param name="contrast" type="text" label="Contrast (biological question)" help="e.g. 'tumor-normal' or '(G1+G2)/2-G3' using the factors chosen in the design matrix. Read the 'makeContrasts' manual from Limma package for more info: http://www.bioconductor.org/packages/release/bioc/html/limma.html and http://www.bioconductor.org/packages/release/bioc/vignettes/limma/inst/doc/usersguide.pdf." />
 		
@@ -509,6 +524,21 @@
 		</data>
 	</outputs>
 	
+	<tests>
+		<test>
+			<param name="expression_matrix" value="Differential_Gene_Expression/expression_matrix.tabular.txt" />
+			<param name="design_matrix" value="Differential_Gene_Expression/design_matrix.tabular.txt" />
+			
+			<param name="contrast" value="E-C"/>
+		
+			<param name="fdr" value="0.05" />
+			
+			<param name="output_format_images" value="png" />
+			
+			<output name="output_count_edgeR" file="Differential_Gene_Expression/differentially_expressed_genes.tabular.txt" />
+		</test>
+	</tests>
+	
 	<help>
 edgeR: Differential Gene(Expression) Analysis
 #############################################
@@ -635,4 +665,9 @@
 
 I would like to thank Hina Riaz - Naz Khan for her helpful contribution.
 	</help>
+	
+	<citations>
+		<citation type="doi">10.1093/bioinformatics/btp616</citation>
+		<citation type="doi">10.1093/bioinformatics/btt688</citation>
+	</citations>
 </tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/Convert_DGE_Table_to_Bedgraph/FDR.output.bedgraph	Sat Mar 28 08:09:03 2015 -0400
@@ -0,0 +1,10 @@
+chr1	1000	1100	0.99
+chr1	2000	2100	0.98
+chr2	1000	1100	0.94
+chr2	2000	2100	0.93
+chr3	1000	1100	0.86
+chr3	2000	2100	0.65
+chrMt	1000	1100	0.34
+chrMt	2000	2100	0.19
+chrX	1000	1100	0.62
+chrX	2000	2100	0.62
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/Convert_DGE_Table_to_Bedgraph/genes_01.gtf	Sat Mar 28 08:09:03 2015 -0400
@@ -0,0 +1,10 @@
+chr1	example	exon	1000	1100	.	.	.	gene_id "GENE01";
+chr1	example	exon	2000	2100	.	.	.	gene_id "GENE02";
+chr2	example	exon	1000	1100	.	.	.	gene_id "GENE03";
+chr2	example	exon	2000	2100	.	.	.	gene_id "GENE04";
+chr3	example	exon	1000	1100	.	.	.	gene_id "GENE05";
+chr3	example	exon	2000	2100	.	.	.	gene_id "GENE06";
+chrX	example	exon	1000	1100	.	.	.	gene_id "GENE07";
+chrX	example	exon	2000	2100	.	.	.	gene_id "GENE08";
+chrMt	example	exon	1000	1100	.	.	.	gene_id "GENE09";
+chrMt	example	exon	2000	2100	.	.	.	gene_id "GENE10";
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/Convert_DGE_Table_to_Bedgraph/logFC.output.bedgraph	Sat Mar 28 08:09:03 2015 -0400
@@ -0,0 +1,10 @@
+chr1	1000	1100	-1.67
+chr1	2000	2100	3.30
+chr2	1000	1100	2.81
+chr2	2000	2100	2.44
+chr3	1000	1100	-1.49
+chr3	2000	2100	1.74
+chrMt	1000	1100	1.99
+chrMt	2000	2100	-1.29
+chrX	1000	1100	2.36
+chrX	2000	2100	2.01
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/Convert_DGE_Table_to_Bedgraph/table_01.tabular.txt	Sat Mar 28 08:09:03 2015 -0400
@@ -0,0 +1,11 @@
+""	"genes"	"logFC"	"logCPM"	"LR"	"PValue"	"FDR"
+"5"	"GENE01"	-1.67	5.06	180.02	0.01	0.01
+"4"	"GENE02"	3.30	4.18	175.31	0.02	0.02
+"7"	"GENE03"	2.81	5.08	166.44	0.07	0.06
+"1"	"GENE04"	2.44	4.92	150.61	0.08	0.07
+"6"	"GENE05"	-1.49	5.03	133.30	0.17	0.14
+"9"	"GENE06"	1.74	4.53	131.57	0.42	0.35
+"8"	"GENE07"	2.36	6.06	123.13	0.46	0.38
+"3"	"GENE08"	2.01	4.55	121.05	0.46	0.38
+"10"	"GENE09"	1.99	5.65	114.51	0.79	0.66
+"2"	"GENE10"	-1.29	4.85	110.17	0.98	0.81
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/Differential_Gene_Expression/design_matrix.tabular.txt	Sat Mar 28 08:09:03 2015 -0400
@@ -0,0 +1,9 @@
+sample-name	Condition
+C1	C
+C2	C
+C3	C
+C4	C
+E1	E
+E2	E
+E3	E
+E4	E
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/Differential_Gene_Expression/differentially_expressed_genes.tabular.txt	Sat Mar 28 08:09:03 2015 -0400
@@ -0,0 +1,30 @@
+""	"genes"	"logFC"	"logCPM"	"LR"	"PValue"	"FDR"
+"15"	"RET"	1.94897640107287	13.2940435307943	77.654599543179	1.22730171836821e-18	3.55917498326782e-17
+"24"	"HSPA8"	0.607138087178611	18.9380827005326	16.8408380175314	4.06490891367457e-05	0.000589411792482813
+"12"	"IFI44L"	-0.665544707287885	11.7020333673755	13.7144720204488	0.000212808307971688	0.00205714697705965
+"10"	"MYO18A"	-0.608389235629078	14.1586814058554	10.0030349278008	0.00156282461000764	0.0113304784225554
+"4"	"DDX11"	0.719283453206407	12.4597575302041	9.12036988131984	0.00252778847255745	0.0146611731408332
+"26"	"NR2C2AP"	0.538719097450498	12.9331552590697	7.88314604327009	0.00498976028659176	0.0241171747185268
+"19"	"RASSF3"	-0.322613484306053	12.4964626371138	4.62652086564883	0.0314813398355935	0.130422693604602
+"3"	"DDX17"	-0.202898462175599	16.1804174471336	2.58563106106719	0.107837421983493	0.390910654690163
+"2"	"USP26"	-2.18026172235612	4.27316802151059	2.23964914988384	0.134511497925546	0.433425937760093
+"27"	"INADL"	-0.171517188407227	15.2186072712825	1.67097950802946	0.19612738879992	0.532259040389235
+"21"	"LYSMD3"	-0.195688416211883	13.32535006408	1.62863926869131	0.201891360147641	0.532259040389235
+"16"	"IQCG"	-0.204602031239979	10.1622224199572	1.27895197461613	0.258093988838365	0.623727139692716
+"14"	"KCNAB3"	0.235558773538239	7.85434240586327	0.626910369343036	0.428490880664559	0.920477060517754
+"6"	"SLC35D3"	0.732791849378157	4.62274850327991	0.571072605743573	0.449833005996972	0.920477060517754
+"20"	"FUT8"	-0.101052861033366	15.3006920075591	0.507763042822674	0.476108824405735	0.920477060517754
+"1"	"COMMD10"	0.124183380838805	12.8850203875481	0.427219144496277	0.513356635936627	0.927280503730139
+"9"	"TXLNB"	-0.182677549934781	7.2825391341052	0.332700921323453	0.56407299924751	0.927280503730139
+"28"	"TMEM31"	-0.242966946412951	6.16897757110336	0.313478542290647	0.575553416108362	0.927280503730139
+"7"	"GLOD4"	-0.0680179216917625	13.8710260882794	0.180025725971987	0.671351132981887	0.998117639327786
+"17"	"C20orf118"	0.14522927187254	6.73904801103973	0.160869778108134	0.688356992639853	0.998117639327786
+"22"	"LMOD3"	0.0653797517936461	7.60390982671528	0.0485282387333417	0.825644327378613	1
+"8"	"GIMAP7"	0.278076976843342	4.26623062002702	0.0410800206726094	0.839383539966141	1
+"23"	"HIPK1"	-0.00503549836369412	17.7957744498389	0.00154544013400937	0.968641570543933	1
+"13"	"KHSRP"	-0.00239769805103114	14.7206397592923	0.000263977505255752	0.987037033025321	1
+"11"	"ATG4B"	0.00130641853526229	13.5252482941211	3.53338139831294e-05	0.995257222366413	1
+"5"	"PTPN20B"	0	4.09631395702755	0	1	1
+"18"	"GPIHBP1"	0	4.09631395702755	0	1	1
+"25"	"TAS2R39"	0	4.09631395702755	0	1	1
+"29"	"GC"	0	4.09631395702755	0	1	1
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/Differential_Gene_Expression/expression_matrix.tabular.txt	Sat Mar 28 08:09:03 2015 -0400
@@ -0,0 +1,30 @@
+Geneid	C1	C2	C3	C4	E1	E2	E3	E4
+COMMD10	966	1067	438	1231	964	812	528	860
+USP26	1	0	1	0	0	0	0	0
+DDX17	8544	13335	4579	16358	6995	8079	5994	6596
+DDX11	329	970	221	867	916	632	706	518
+PTPN20B	0	0	0	0	0	0	0	0
+SLC35D3	1	0	0	2	1	0	2	1
+GLOD4	1614	2596	965	2912	1807	1448	1039	1564
+GIMAP7	0	1	0	0	1	0	0	0
+TXLNB	15	29	9	25	14	15	6	17
+MYO18A	1775	4666	1193	4741	1669	1457	1497	1121
+ATG4B	936	2602	638	2394	1605	953	1185	911
+IFI44L	347	678	307	784	268	302	191	269
+KHSRP	2557	5001	1593	5513	3162	2624	2434	2509
+KCNAB3	20	42	10	34	28	34	22	10
+RET	331	695	361	669	2077	1431	1490	1327
+IQCG	125	193	84	229	118	116	79	107
+C20orf118	9	20	3	14	6	13	10	9
+GPIHBP1	0	0	0	0	0	0	0	0
+RASSF3	658	1060	405	1277	507	575	401	568
+FUT8	4834	6459	2599	7977	4291	4187	2974	4154
+LYSMD3	1333	1679	666	2029	868	1141	749	1076
+LMOD3	12	31	7	48	19	26	9	20
+HIPK1	24218	35223	14147	47991	19201	28435	20715	22614
+HSPA8	44244	58864	26628	76924	72195	61132	42728	67106
+TAS2R39	0	0	0	0	0	0	0	0
+NR2C2AP	606	1162	403	1223	1293	761	726	902
+INADL	4315	6418	2421	8507	3443	4415	3094	3441
+TMEM31	5	10	3	14	6	5	6	3
+GC	0	0	0	0	0	0	0	0