Mercurial > repos > yhoogstrate > edger_with_design_matrix
changeset 94:46745f5666ac draft
Added test
line wrap: on
line diff
--- a/edgeR_Concatenate_Expression_Matrices.xml Wed Mar 18 06:40:01 2015 -0400 +++ b/edgeR_Concatenate_Expression_Matrices.xml Sat Mar 28 08:09:03 2015 -0400 @@ -86,38 +86,79 @@ <tests> <test> - <param name="add_geneids.choice" value="true" /> - <param name="choice" value="true" /> - <param name="add_geneids" value="true" /> + <conditional name="add_geneids"> + <param name="choice" value="true" /> + <param name="sample_geneids" value="GSE51403/GSE51403_expression_matrix_GeneLengths.txt" /> + <param name="column_geneids" value="1" /> + </conditional> + + <param name="samples_0|sample" value="GSE51403/GSE51403_expression_matrix_Control_1.txt" /> + <param name="samples_0|column_index" value="2" /> + + <conditional name="add_lengths"> + <param name="choice" value="false" /> + </conditional> + + <param name="remove_comment_lines" value="false" /> + <output name="expression_matrix" file="GSE51403/GSE51403_expression_matrix_Control_1.txt" /> + </test> + <test> + <conditional name="add_geneids"> + <param name="choice" value="true" /> + <param name="sample_geneids" value="GSE51403/GSE51403_expression_matrix_GeneLengths.txt" /> + <param name="column_geneids" value="1" /> + </conditional> + + <!-- <repeat name="samples"> --> + <param name="samples_0|sample" value="GSE51403/GSE51403_expression_matrix_Control_1.txt" /> + <param name="samples_0|column_index" value="2" /> + <!-- </repeat> --> + + <!-- <repeat name="samples"> --> + <param name="samples_1|sample" value="GSE51403/GSE51403_expression_matrix_Control_2.txt" /> + <param name="samples_1|column_index" value="2" /> + <!-- </repeat> --> - <!-- -Add a gene-IDs column at the end of the file true -Select Read-count dataset that contains a column for GeneIDs 21: MCF7_featureCounts_E2.txt -Select GeneID column 1 -Read-count dataset that belongs to a pair 14: MCF7_featureCounts_Control_1.txt -Select columns that are associated with this factor level 2 -Read-count dataset that belongs to a pair 15: MCF7_featureCounts_Control_2.txt -Select columns that are associated with this factor level 2 -Read-count dataset that belongs to a pair 16: MCF7_featureCounts_Control_3.txt -Select columns that are associated with this factor level 2 -Read-count dataset that belongs to a pair 17: MCF7_featureCounts_Control_4.txt -Select columns that are associated with this factor level 2 -Read-count dataset that belongs to a pair 18: MCF7_featureCounts_Control_5.txt -Select columns that are associated with this factor level 2 -Read-count dataset that belongs to a pair 22: MCF7_featureCounts_Control_6.txt -Select columns that are associated with this factor level 2 -Read-count dataset that belongs to a pair 20: MCF7_featureCounts_Control_7.txt -Select columns that are associated with this factor level 2 -Read-count dataset that belongs to a pair 21: MCF7_featureCounts_E2.txt -Select columns that are associated with this factor level 2 3 4 5 6 7 8 -Add a gene-lengths column at the end of the file true -Read-count dataset that belongs to a pair 26: MCF7_featureCounts_GeneLengths.txt -Select columns that are associated with this factor level 2 -Automatically remove 'comment' lines starting with a '#' False - --> + <!-- <repeat name="samples"> --> + <param name="samples_2|sample" value="GSE51403/GSE51403_expression_matrix_Control_3.txt" /> + <param name="samples_2|column_index" value="2" /> + <!-- </repeat> --> + + <!-- <repeat name="samples"> --> + <param name="samples_3|sample" value="GSE51403/GSE51403_expression_matrix_Control_4.txt" /> + <param name="samples_3|column_index" value="2" /> + <!-- </repeat> --> + + <!-- <repeat name="samples"> --> + <param name="samples_4|sample" value="GSE51403/GSE51403_expression_matrix_Control_5.txt" /> + <param name="samples_4|column_index" value="2" /> + <!-- </repeat> --> + + <!-- <repeat name="samples"> --> + <param name="samples_5|sample" value="GSE51403/GSE51403_expression_matrix_Control_6.txt" /> + <param name="samples_5|column_index" value="2" /> + <!-- </repeat> --> - <output name="expression_matrix" file="MCF7_featureCounts_concatenated.txt" /> + <!-- <repeat name="samples"> --> + <param name="samples_6|sample" value="GSE51403/GSE51403_expression_matrix_Control_7.txt" /> + <param name="samples_6|column_index" value="2" /> + <!-- </repeat> --> + + <!-- <repeat name="samples"> --> + <param name="samples_7|sample" value="GSE51403/GSE51403_expression_matrix_E2.txt" /> + <param name="samples_7|column_index" value="2,3,4,5,6,7,8" /> + <!-- </repeat> --> + + <conditional name="add_lengths"> + <param name="choice" value="true" /> + <param name="sample_lengths" value="GSE51403/GSE51403_expression_matrix_GeneLengths.txt" /> + <param name="column_lengths" value="2" /> + </conditional> + + <param name="remove_comment_lines" value="false" /> + + <output name="expression_matrix" file="GSE51403/GSE51403_expression_matrix_full.txt" /> </test> </tests> @@ -157,4 +198,9 @@ http://testtoolshed.g2.bx.psu.edu/ </help> + + <citations> + <citation type="doi">10.1093/bioinformatics/btp616</citation> + <citation type="doi">10.1093/bioinformatics/btt688</citation> + </citations> </tool>
--- a/edgeR_Convert_DGE_Table_to_Bedgraph.xml Wed Mar 18 06:40:01 2015 -0400 +++ b/edgeR_Convert_DGE_Table_to_Bedgraph.xml Sat Mar 28 08:09:03 2015 -0400 @@ -1,5 +1,5 @@ <?xml version="1.0" encoding="UTF-8"?> - <tool id="edger_dge_table_to_bedgraph" name="edgeR: convert 'differentially expressed genes'-table to bedgraph(s)" version="1.0.0"> + <tool id="edger_dge_table_to_bedgraph" name="edgeR: Convert 'differentially expressed genes'-table to bedgraph(s)" version="1.0.0"> <description>EdgeR's "differentially expressed genes" table to bedgraph(s)</description> <requirements> @@ -34,7 +34,7 @@ <inputs> <param format="tabular" name="cpm_table" type="data" label="'differentially expressed genes'-table as result from EdgeR" help="must have 7 columns of which the 2nd are gene names matching the GTF file" /> - <param format="gtf" name="geneset" type="data" label="Geneset used for estimating expression levels prior to expression analysis" /> + <param format="gtf,gff,gff3" name="geneset" type="data" label="Geneset used for estimating expression levels prior to expression analysis" /> <param name="columns" type="select" label="Desired columns" multiple="true" display="checkboxes"> <option value="c3" selected="true">logFC</option> @@ -67,7 +67,24 @@ </data> </outputs> + <tests> + <test> + <param name="cpm_table" value="Convert_DGE_Table_to_Bedgraph/table_01.tabular.txt" /> + <param name="geneset" value="Convert_DGE_Table_to_Bedgraph/genes_01.gtf" /> + + <param name="columns" value="c3,c7" /> + + <output name="logfc" file="Convert_DGE_Table_to_Bedgraph/logFC.output.bedgraph" /> + <output name="fdr" file="Convert_DGE_Table_to_Bedgraph/FDR.output.bedgraph" /> + </test> + </tests> + <help> P-values and FDRs are swapped from 1 to 0, and 0 to 1, because this way the most siginificant genes will obtain the highest values which is convenient for visualisation. </help> -</tool> \ No newline at end of file + + <citations> + <citation type="doi">10.1093/bioinformatics/btp616</citation> + <citation type="doi">10.1093/bioinformatics/btt688</citation> + </citations> +</tool>
--- a/edgeR_Design_from_Expression_Matrix.xml Wed Mar 18 06:40:01 2015 -0400 +++ b/edgeR_Design_from_Expression_Matrix.xml Sat Mar 28 08:09:03 2015 -0400 @@ -88,6 +88,31 @@ <data format="tabular" name="design_matrix" label="Design matrix" /> </outputs> + + <tests> + <test> + <param name="expression_matrix" value="GSE51403/GSE51403_expression_matrix_full.txt" /> + + <!-- <repeat name="treatments"> --> + <param name="treatments_0|name" value="Treatment" /> + <!-- <repeat name="rep_factorLevel"> --> + <param name="treatments_0|rep_factorLevel_0|factorLevel" value="Control" /> + <param name="treatments_0|rep_factorLevel_0|factorIndex" value="2,3,4,5,6,7,8" /> + <!-- </repeat> --> + <!-- <repeat name="rep_factorLevel"> --> + <param name="treatments_0|rep_factorLevel_1|factorLevel" value="E2" /> + <param name="treatments_0|rep_factorLevel_1|factorIndex" value="9,10,11,12,13,14,15" /> + <!-- </repeat> --> + <!-- </repeat> --> + + <conditional name="choose_blocking"> + <param value="false" /> + </conditional> + + <output name="design_matrix" file="GSE51403/GSE51403_design_matrix_full_depth.txt" /> + </test> + </tests> + <help> edgeR: Design- from Expression matrix ##################################### @@ -124,4 +149,9 @@ http://testtoolshed.g2.bx.psu.edu/ </help> + + <citations> + <citation type="doi">10.1093/bioinformatics/btp616</citation> + <citation type="doi">10.1093/bioinformatics/btt688</citation> + </citations> </tool>
--- a/edgeR_Differential_Gene_Expression.xml Wed Mar 18 06:40:01 2015 -0400 +++ b/edgeR_Differential_Gene_Expression.xml Sat Mar 28 08:09:03 2015 -0400 @@ -89,6 +89,7 @@ > /dev/null #end if + <!-- 2> stderr.txt ; grep -v 'Calculating library sizes from column' stderr.txt > stderr2.txt ; rm stderr.txt ; mv stderr2.txt stderr.txt ; @@ -103,11 +104,25 @@ grep -v 'Setting LC_COLLATE failed' stderr.txt > stderr2.txt ; rm stderr.txt ; mv stderr2.txt stderr.txt ; cat stderr.txt >&2 + --> </command> + <stdio> + <regex match="Calculating library sizes from column" + source="stderr" + level="log" /> + <regex match="During startup - Warning messages" + source="stderr" + level="log" /> + <regex match="Setting LC_[^ ]+ failed" + source="stderr" + level="warning" + description="LOCALE has not been set correctly" /> + </stdio> + <inputs> <param name="expression_matrix" type="data" format="tabular" label="Expression (read count) matrix" /> - <param name="design_matrix" type="data" format="tabular" label="Design matrix" hepl="Ensure your samplenames are identical to those in the expression matrix. Preferentially, create the contrast matrix using 'edgeR: Design- from Expression matrix'." /> + <param name="design_matrix" type="data" format="tabular" label="Design matrix" help="Ensure your samplenames are identical to those in the expression matrix. Preferentially, create the contrast matrix using 'edgeR: Design- from Expression matrix'." /> <param name="contrast" type="text" label="Contrast (biological question)" help="e.g. 'tumor-normal' or '(G1+G2)/2-G3' using the factors chosen in the design matrix. Read the 'makeContrasts' manual from Limma package for more info: http://www.bioconductor.org/packages/release/bioc/html/limma.html and http://www.bioconductor.org/packages/release/bioc/vignettes/limma/inst/doc/usersguide.pdf." /> @@ -509,6 +524,21 @@ </data> </outputs> + <tests> + <test> + <param name="expression_matrix" value="Differential_Gene_Expression/expression_matrix.tabular.txt" /> + <param name="design_matrix" value="Differential_Gene_Expression/design_matrix.tabular.txt" /> + + <param name="contrast" value="E-C"/> + + <param name="fdr" value="0.05" /> + + <param name="output_format_images" value="png" /> + + <output name="output_count_edgeR" file="Differential_Gene_Expression/differentially_expressed_genes.tabular.txt" /> + </test> + </tests> + <help> edgeR: Differential Gene(Expression) Analysis ############################################# @@ -635,4 +665,9 @@ I would like to thank Hina Riaz - Naz Khan for her helpful contribution. </help> + + <citations> + <citation type="doi">10.1093/bioinformatics/btp616</citation> + <citation type="doi">10.1093/bioinformatics/btt688</citation> + </citations> </tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/Convert_DGE_Table_to_Bedgraph/FDR.output.bedgraph Sat Mar 28 08:09:03 2015 -0400 @@ -0,0 +1,10 @@ +chr1 1000 1100 0.99 +chr1 2000 2100 0.98 +chr2 1000 1100 0.94 +chr2 2000 2100 0.93 +chr3 1000 1100 0.86 +chr3 2000 2100 0.65 +chrMt 1000 1100 0.34 +chrMt 2000 2100 0.19 +chrX 1000 1100 0.62 +chrX 2000 2100 0.62
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/Convert_DGE_Table_to_Bedgraph/genes_01.gtf Sat Mar 28 08:09:03 2015 -0400 @@ -0,0 +1,10 @@ +chr1 example exon 1000 1100 . . . gene_id "GENE01"; +chr1 example exon 2000 2100 . . . gene_id "GENE02"; +chr2 example exon 1000 1100 . . . gene_id "GENE03"; +chr2 example exon 2000 2100 . . . gene_id "GENE04"; +chr3 example exon 1000 1100 . . . gene_id "GENE05"; +chr3 example exon 2000 2100 . . . gene_id "GENE06"; +chrX example exon 1000 1100 . . . gene_id "GENE07"; +chrX example exon 2000 2100 . . . gene_id "GENE08"; +chrMt example exon 1000 1100 . . . gene_id "GENE09"; +chrMt example exon 2000 2100 . . . gene_id "GENE10";
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/Convert_DGE_Table_to_Bedgraph/logFC.output.bedgraph Sat Mar 28 08:09:03 2015 -0400 @@ -0,0 +1,10 @@ +chr1 1000 1100 -1.67 +chr1 2000 2100 3.30 +chr2 1000 1100 2.81 +chr2 2000 2100 2.44 +chr3 1000 1100 -1.49 +chr3 2000 2100 1.74 +chrMt 1000 1100 1.99 +chrMt 2000 2100 -1.29 +chrX 1000 1100 2.36 +chrX 2000 2100 2.01
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/Convert_DGE_Table_to_Bedgraph/table_01.tabular.txt Sat Mar 28 08:09:03 2015 -0400 @@ -0,0 +1,11 @@ +"" "genes" "logFC" "logCPM" "LR" "PValue" "FDR" +"5" "GENE01" -1.67 5.06 180.02 0.01 0.01 +"4" "GENE02" 3.30 4.18 175.31 0.02 0.02 +"7" "GENE03" 2.81 5.08 166.44 0.07 0.06 +"1" "GENE04" 2.44 4.92 150.61 0.08 0.07 +"6" "GENE05" -1.49 5.03 133.30 0.17 0.14 +"9" "GENE06" 1.74 4.53 131.57 0.42 0.35 +"8" "GENE07" 2.36 6.06 123.13 0.46 0.38 +"3" "GENE08" 2.01 4.55 121.05 0.46 0.38 +"10" "GENE09" 1.99 5.65 114.51 0.79 0.66 +"2" "GENE10" -1.29 4.85 110.17 0.98 0.81
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/Differential_Gene_Expression/design_matrix.tabular.txt Sat Mar 28 08:09:03 2015 -0400 @@ -0,0 +1,9 @@ +sample-name Condition +C1 C +C2 C +C3 C +C4 C +E1 E +E2 E +E3 E +E4 E
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/Differential_Gene_Expression/differentially_expressed_genes.tabular.txt Sat Mar 28 08:09:03 2015 -0400 @@ -0,0 +1,30 @@ +"" "genes" "logFC" "logCPM" "LR" "PValue" "FDR" +"15" "RET" 1.94897640107287 13.2940435307943 77.654599543179 1.22730171836821e-18 3.55917498326782e-17 +"24" "HSPA8" 0.607138087178611 18.9380827005326 16.8408380175314 4.06490891367457e-05 0.000589411792482813 +"12" "IFI44L" -0.665544707287885 11.7020333673755 13.7144720204488 0.000212808307971688 0.00205714697705965 +"10" "MYO18A" -0.608389235629078 14.1586814058554 10.0030349278008 0.00156282461000764 0.0113304784225554 +"4" "DDX11" 0.719283453206407 12.4597575302041 9.12036988131984 0.00252778847255745 0.0146611731408332 +"26" "NR2C2AP" 0.538719097450498 12.9331552590697 7.88314604327009 0.00498976028659176 0.0241171747185268 +"19" "RASSF3" -0.322613484306053 12.4964626371138 4.62652086564883 0.0314813398355935 0.130422693604602 +"3" "DDX17" -0.202898462175599 16.1804174471336 2.58563106106719 0.107837421983493 0.390910654690163 +"2" "USP26" -2.18026172235612 4.27316802151059 2.23964914988384 0.134511497925546 0.433425937760093 +"27" "INADL" -0.171517188407227 15.2186072712825 1.67097950802946 0.19612738879992 0.532259040389235 +"21" "LYSMD3" -0.195688416211883 13.32535006408 1.62863926869131 0.201891360147641 0.532259040389235 +"16" "IQCG" -0.204602031239979 10.1622224199572 1.27895197461613 0.258093988838365 0.623727139692716 +"14" "KCNAB3" 0.235558773538239 7.85434240586327 0.626910369343036 0.428490880664559 0.920477060517754 +"6" "SLC35D3" 0.732791849378157 4.62274850327991 0.571072605743573 0.449833005996972 0.920477060517754 +"20" "FUT8" -0.101052861033366 15.3006920075591 0.507763042822674 0.476108824405735 0.920477060517754 +"1" "COMMD10" 0.124183380838805 12.8850203875481 0.427219144496277 0.513356635936627 0.927280503730139 +"9" "TXLNB" -0.182677549934781 7.2825391341052 0.332700921323453 0.56407299924751 0.927280503730139 +"28" "TMEM31" -0.242966946412951 6.16897757110336 0.313478542290647 0.575553416108362 0.927280503730139 +"7" "GLOD4" -0.0680179216917625 13.8710260882794 0.180025725971987 0.671351132981887 0.998117639327786 +"17" "C20orf118" 0.14522927187254 6.73904801103973 0.160869778108134 0.688356992639853 0.998117639327786 +"22" "LMOD3" 0.0653797517936461 7.60390982671528 0.0485282387333417 0.825644327378613 1 +"8" "GIMAP7" 0.278076976843342 4.26623062002702 0.0410800206726094 0.839383539966141 1 +"23" "HIPK1" -0.00503549836369412 17.7957744498389 0.00154544013400937 0.968641570543933 1 +"13" "KHSRP" -0.00239769805103114 14.7206397592923 0.000263977505255752 0.987037033025321 1 +"11" "ATG4B" 0.00130641853526229 13.5252482941211 3.53338139831294e-05 0.995257222366413 1 +"5" "PTPN20B" 0 4.09631395702755 0 1 1 +"18" "GPIHBP1" 0 4.09631395702755 0 1 1 +"25" "TAS2R39" 0 4.09631395702755 0 1 1 +"29" "GC" 0 4.09631395702755 0 1 1
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/Differential_Gene_Expression/expression_matrix.tabular.txt Sat Mar 28 08:09:03 2015 -0400 @@ -0,0 +1,30 @@ +Geneid C1 C2 C3 C4 E1 E2 E3 E4 +COMMD10 966 1067 438 1231 964 812 528 860 +USP26 1 0 1 0 0 0 0 0 +DDX17 8544 13335 4579 16358 6995 8079 5994 6596 +DDX11 329 970 221 867 916 632 706 518 +PTPN20B 0 0 0 0 0 0 0 0 +SLC35D3 1 0 0 2 1 0 2 1 +GLOD4 1614 2596 965 2912 1807 1448 1039 1564 +GIMAP7 0 1 0 0 1 0 0 0 +TXLNB 15 29 9 25 14 15 6 17 +MYO18A 1775 4666 1193 4741 1669 1457 1497 1121 +ATG4B 936 2602 638 2394 1605 953 1185 911 +IFI44L 347 678 307 784 268 302 191 269 +KHSRP 2557 5001 1593 5513 3162 2624 2434 2509 +KCNAB3 20 42 10 34 28 34 22 10 +RET 331 695 361 669 2077 1431 1490 1327 +IQCG 125 193 84 229 118 116 79 107 +C20orf118 9 20 3 14 6 13 10 9 +GPIHBP1 0 0 0 0 0 0 0 0 +RASSF3 658 1060 405 1277 507 575 401 568 +FUT8 4834 6459 2599 7977 4291 4187 2974 4154 +LYSMD3 1333 1679 666 2029 868 1141 749 1076 +LMOD3 12 31 7 48 19 26 9 20 +HIPK1 24218 35223 14147 47991 19201 28435 20715 22614 +HSPA8 44244 58864 26628 76924 72195 61132 42728 67106 +TAS2R39 0 0 0 0 0 0 0 0 +NR2C2AP 606 1162 403 1223 1293 761 726 902 +INADL 4315 6418 2421 8507 3443 4415 3094 3441 +TMEM31 5 10 3 14 6 5 6 3 +GC 0 0 0 0 0 0 0 0