Mercurial > repos > yhoogstrate > edger_with_design_matrix
comparison edgeR_Concatenate_Expression_Matrices.xml @ 27:c6463da87893 draft
Uploaded
author | yhoogstrate |
---|---|
date | Tue, 20 May 2014 05:27:02 -0400 |
parents | |
children | c86f1f86b3c1 |
comparison
equal
deleted
inserted
replaced
26:8b7bd6e290c4 | 27:c6463da87893 |
---|---|
1 <?xml version="1.0" encoding="UTF-8"?> | |
2 <tool id="concatenate_expression_matrices" name="edgeR: Concatenate Expression Matrices"> | |
3 <description>Create a full expression matrix by selecting the desired columns from specific count tables</description> | |
4 | |
5 <command> | |
6 #set $j = 0 | |
7 #set $paste = [] | |
8 | |
9 #if $add_geneids.choice == "true": | |
10 #set $filename = str($j)+".txt" | |
11 #set paste = paste + [$filename] | |
12 | |
13 cut -f $add_geneids.column_geneids.value "$add_geneids.sample_geneids" > $filename ; | |
14 #set $j += 1 | |
15 #end if | |
16 | |
17 #for $sample in $samples: | |
18 ##echo "$sample.column_index" | |
19 ##echo "$sample.column_index.value" | |
20 | |
21 #set $column_str = ",".join([str(x).strip() for x in $sample.column_index.value]) | |
22 #set $filename = str($j)+".txt" | |
23 #set paste = paste + [$filename] | |
24 | |
25 cut -f "$column_str" "$sample.sample" > $filename ; | |
26 #set $j += 1 | |
27 #end for | |
28 | |
29 #if $add_lengths.choice == "true": | |
30 #set $filename = str($j)+".txt" | |
31 #set paste = paste + [$filename] | |
32 | |
33 cut -f $add_lengths.column_lengths.value "$add_lengths.sample_lengths" > $filename ; | |
34 #set $j += 1 | |
35 #end if | |
36 | |
37 #set $paste_str = " ".join([str(x).strip() for x in $paste]) | |
38 | |
39 paste $paste_str > $expression_matrix | |
40 </command> | |
41 | |
42 <inputs> | |
43 <conditional name="add_geneids"> | |
44 <param name="choice" type="select" label="Add a gene-IDs column at the end of the file" help="Highly recommended to select!" > | |
45 <option value="false">No</option> | |
46 <option value="true" selected="true">Yes</option> | |
47 </param> | |
48 <when value="false" /> | |
49 <when value="true"> | |
50 <param name="sample_geneids" type="data" format="tabular" label="Select Read-count dataset that contains a column for GeneIDs" help="from featureCounts/DEXSeq-count/HTSeq-count, etc." /> | |
51 <param name="column_geneids" numerical="false" use_header_names="true" label="Select GeneID column" type="data_column" data_ref="sample_geneids" multiple="false"> | |
52 <validator type="no_options" message="Please select at least one column." /> | |
53 </param> | |
54 </when> | |
55 </conditional> | |
56 | |
57 <repeat name="samples" title="Expression Table" help="E.g. a earlier concatenated table, or a HT-seq, featureCounts or DESeq-count result."> | |
58 <param name="sample" type="data" format="tabular" label="Read-count dataset that belongs to a pair" help="from featureCounts/DEXSeq-count/HTSeq-count, etc." /> | |
59 <!-- select columns --> | |
60 <param name="column_index" numerical="false" use_header_names="true" label="Select columns that are associated with this factor level" type="data_column" data_ref="sample" multiple="true" size="120" display="checkboxes"> | |
61 <validator type="no_options" message="Please select at least one column." /> | |
62 </param> | |
63 </repeat> | |
64 | |
65 <conditional name="add_lengths"> | |
66 <param name="choice" type="select" label="Add a gene-lengths column at the end of the file" help="Optional, only usefull if RPKM/FPKM calculation is desired." > | |
67 <option value="false">No</option> | |
68 <option value="true">Yes</option> | |
69 </param> | |
70 <when value="false" /> | |
71 <when value="true"> | |
72 <param name="sample_lengths" type="data" format="tabular" label="Read-count dataset that belongs to a pair" help="from featureCounts/DEXSeq-count/HTSeq-count, etc." /> | |
73 <param name="column_lengths" numerical="false" use_header_names="true" label="Select columns that are associated with this factor level" type="data_column" data_ref="sample_lengths" multiple="false" size="120"> | |
74 <validator type="no_options" message="Please select at least one column." /> | |
75 </param> | |
76 </when> | |
77 </conditional> | |
78 | |
79 <param name="remove_comment_lines" type="boolean" label="Automatically remove 'comment' lines starting with a '#'" truevale="true" falsevalue="false" selected="true" help="Some tools (incl. featureCounts) include comment lines that are not neccesairy for downstream analyis. By enabling this function, these lines will be removed." /> | |
80 | |
81 </inputs> | |
82 | |
83 <outputs> | |
84 <data format="tabular" name="expression_matrix" label="Expression matrix" /> | |
85 </outputs> | |
86 | |
87 <tests> | |
88 <test> | |
89 <param name="add_geneids.choice" value="true" /> | |
90 <param name="choice" value="true" /> | |
91 <param name="add_geneids" value="true" /> | |
92 | |
93 | |
94 <!-- | |
95 Add a gene-IDs column at the end of the file true | |
96 Select Read-count dataset that contains a column for GeneIDs 21: MCF7_featureCounts_E2.txt | |
97 Select GeneID column 1 | |
98 Read-count dataset that belongs to a pair 14: MCF7_featureCounts_Control_1.txt | |
99 Select columns that are associated with this factor level 2 | |
100 Read-count dataset that belongs to a pair 15: MCF7_featureCounts_Control_2.txt | |
101 Select columns that are associated with this factor level 2 | |
102 Read-count dataset that belongs to a pair 16: MCF7_featureCounts_Control_3.txt | |
103 Select columns that are associated with this factor level 2 | |
104 Read-count dataset that belongs to a pair 17: MCF7_featureCounts_Control_4.txt | |
105 Select columns that are associated with this factor level 2 | |
106 Read-count dataset that belongs to a pair 18: MCF7_featureCounts_Control_5.txt | |
107 Select columns that are associated with this factor level 2 | |
108 Read-count dataset that belongs to a pair 22: MCF7_featureCounts_Control_6.txt | |
109 Select columns that are associated with this factor level 2 | |
110 Read-count dataset that belongs to a pair 20: MCF7_featureCounts_Control_7.txt | |
111 Select columns that are associated with this factor level 2 | |
112 Read-count dataset that belongs to a pair 21: MCF7_featureCounts_E2.txt | |
113 Select columns that are associated with this factor level 2 3 4 5 6 7 8 | |
114 Add a gene-lengths column at the end of the file true | |
115 Read-count dataset that belongs to a pair 26: MCF7_featureCounts_GeneLengths.txt | |
116 Select columns that are associated with this factor level 2 | |
117 Automatically remove 'comment' lines starting with a '#' False | |
118 --> | |
119 | |
120 <output name="expression_matrix" file="MCF7_featureCounts_concatenated.txt" /> | |
121 </test> | |
122 </tests> | |
123 | |
124 <help> | |
125 edgeR: Concatenate Expression Matrices | |
126 | |
127 **Notes** | |
128 | |
129 Make sure the tables have an identical number of columns compared to the number of headers. | |
130 If you export tables using R, make sure you set: col.names=NA. Otherwise column may be swapped during concatenation. | |
131 | |
132 **References** | |
133 | |
134 The test data is coming from: doi: 10.1093/bioinformatics/btt688. | |
135 http://www.ncbi.nlm.nih.gov/pubmed/24319002 | |
136 | |
137 </help> | |
138 </tool> |