annotate htseqsams2mx.xml @ 59:57841366f112 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/htseq commit 344140b8df53b8b7024618bb04594607a045c03a
author iuc
date Mon, 04 May 2015 22:47:20 -0400
parents 9b59cd40f20d
children d300bc688e95
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
56
9b59cd40f20d Uploaded
iuc
parents:
diff changeset
1 <tool id="htseqsams2mxlocal" name="SAM/BAM to count matrix" version="0.5">
9b59cd40f20d Uploaded
iuc
parents:
diff changeset
2 <description>using HTSeq code</description>
59
57841366f112 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/htseq commit 344140b8df53b8b7024618bb04594607a045c03a
iuc
parents: 56
diff changeset
3 <requirements>
57841366f112 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/htseq commit 344140b8df53b8b7024618bb04594607a045c03a
iuc
parents: 56
diff changeset
4 <requirement type="package" version="0.7.6">pysam</requirement>
57841366f112 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/htseq commit 344140b8df53b8b7024618bb04594607a045c03a
iuc
parents: 56
diff changeset
5 <requirement type="package" version="1.2.1">matplotlib</requirement>
57841366f112 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/htseq commit 344140b8df53b8b7024618bb04594607a045c03a
iuc
parents: 56
diff changeset
6 <requirement type="package" version="0.5.4p3">htseq</requirement>
57841366f112 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/htseq commit 344140b8df53b8b7024618bb04594607a045c03a
iuc
parents: 56
diff changeset
7 </requirements>
56
9b59cd40f20d Uploaded
iuc
parents:
diff changeset
8 <stdio>
9b59cd40f20d Uploaded
iuc
parents:
diff changeset
9 <regex match=".*" source="both" level="warning" description="chatter from HTSeq:"/>
9b59cd40f20d Uploaded
iuc
parents:
diff changeset
10 </stdio>
9b59cd40f20d Uploaded
iuc
parents:
diff changeset
11 <command interpreter="python">
9b59cd40f20d Uploaded
iuc
parents:
diff changeset
12 htseqsams2mx.py -g "$gfffile" -o "$outfile" -m "$model" --id_attribute "$id_attr" --feature_type "$feature_type"
59
57841366f112 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/htseq commit 344140b8df53b8b7024618bb04594607a045c03a
iuc
parents: 56
diff changeset
13 --mapqMin $mapqMin
56
9b59cd40f20d Uploaded
iuc
parents:
diff changeset
14 #for $s in $samfiles:
9b59cd40f20d Uploaded
iuc
parents:
diff changeset
15 #if $s.ext != 'data':
59
57841366f112 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/htseq commit 344140b8df53b8b7024618bb04594607a045c03a
iuc
parents: 56
diff changeset
16 --samf "'${s}','${s.name}','${s.ext}','${s.metadata.bam_index}'"
56
9b59cd40f20d Uploaded
iuc
parents:
diff changeset
17 #end if
9b59cd40f20d Uploaded
iuc
parents:
diff changeset
18 #end for
9b59cd40f20d Uploaded
iuc
parents:
diff changeset
19 #if $filter_extras:
9b59cd40f20d Uploaded
iuc
parents:
diff changeset
20 --filter_extras "$filter_extras"
9b59cd40f20d Uploaded
iuc
parents:
diff changeset
21 #end if
9b59cd40f20d Uploaded
iuc
parents:
diff changeset
22 </command>
9b59cd40f20d Uploaded
iuc
parents:
diff changeset
23 <inputs>
9b59cd40f20d Uploaded
iuc
parents:
diff changeset
24 <param format="gtf" name="gfffile" type="data" label="Gene model (GFF) file to count reads over from your current history" size="100" />
59
57841366f112 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/htseq commit 344140b8df53b8b7024618bb04594607a045c03a
iuc
parents: 56
diff changeset
25 <param name="mapqMin" label="Filter reads with mapq below than this value"
57841366f112 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/htseq commit 344140b8df53b8b7024618bb04594607a045c03a
iuc
parents: 56
diff changeset
26 help="0 to count any mapping quality read. Otherwise only reads at or above specified mapq will be counted"
56
9b59cd40f20d Uploaded
iuc
parents:
diff changeset
27 type="integer" value="5"/>
9b59cd40f20d Uploaded
iuc
parents:
diff changeset
28 <param name="title" label="Name for this job's output file" type="text" size="80" value="bams to DGE count matrix"/>
9b59cd40f20d Uploaded
iuc
parents:
diff changeset
29 <param name="stranded" value="false" type="boolean" label="Reads are stranded - use strand in counting" display="checkbox"
9b59cd40f20d Uploaded
iuc
parents:
diff changeset
30 truevalue="yes" falsevalue="no" checked="no" help="Check this ONLY if you know your sequences are strand specific" />
9b59cd40f20d Uploaded
iuc
parents:
diff changeset
31 <param name="model" type="select" label="Model for counting reads over the supplied gene model- see HTSeq docs"
9b59cd40f20d Uploaded
iuc
parents:
diff changeset
32 help="If in doubt, union is a reasonable default but intersection-strict avoids double counting over overlapping exons">
9b59cd40f20d Uploaded
iuc
parents:
diff changeset
33 <option value="union" selected="true">union</option>
9b59cd40f20d Uploaded
iuc
parents:
diff changeset
34 <option value="intersection-strict">intersection-strict</option>
9b59cd40f20d Uploaded
iuc
parents:
diff changeset
35 <option value="intersection-nonempty">intersection-nonempty</option>
59
57841366f112 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/htseq commit 344140b8df53b8b7024618bb04594607a045c03a
iuc
parents: 56
diff changeset
36 </param>
56
9b59cd40f20d Uploaded
iuc
parents:
diff changeset
37 <param name="id_attr" type="select" label="GTF attribute to output as the name for each contig - see HTSeq docs"
9b59cd40f20d Uploaded
iuc
parents:
diff changeset
38 help="If in doubt, use gene name or if you need the id in your GTF, gene id">
9b59cd40f20d Uploaded
iuc
parents:
diff changeset
39 <option value="gene_name" selected="true">gene name</option>
9b59cd40f20d Uploaded
iuc
parents:
diff changeset
40 <option value="gene_id">gene id</option>
9b59cd40f20d Uploaded
iuc
parents:
diff changeset
41 <option value="transcript_id">transcript id</option>
9b59cd40f20d Uploaded
iuc
parents:
diff changeset
42 <option value="transcript_name">transcript name</option>
59
57841366f112 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/htseq commit 344140b8df53b8b7024618bb04594607a045c03a
iuc
parents: 56
diff changeset
43 </param>
56
9b59cd40f20d Uploaded
iuc
parents:
diff changeset
44 <param name="feature_type" type="select" label="GTF feature type for counting reads over the supplied gene model- see HTSeq docs"
9b59cd40f20d Uploaded
iuc
parents:
diff changeset
45 help="GTF feature type to count over - exon is a good choice with gene name as the contig to count over">
9b59cd40f20d Uploaded
iuc
parents:
diff changeset
46 <option value="exon" selected="true">exon</option>
9b59cd40f20d Uploaded
iuc
parents:
diff changeset
47 <option value="CDS">CDS</option>
9b59cd40f20d Uploaded
iuc
parents:
diff changeset
48 <option value="UTR">UTR</option>
9b59cd40f20d Uploaded
iuc
parents:
diff changeset
49 <option value="transcript">transcript</option>
59
57841366f112 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/htseq commit 344140b8df53b8b7024618bb04594607a045c03a
iuc
parents: 56
diff changeset
50 </param>
56
9b59cd40f20d Uploaded
iuc
parents:
diff changeset
51 <param name="filter_extras" type="select" label="Filter any read with one or more flags"
9b59cd40f20d Uploaded
iuc
parents:
diff changeset
52 help="eg the XS tag created by bowtie for multiple reads" optional="true" mutliple="true">
9b59cd40f20d Uploaded
iuc
parents:
diff changeset
53 <option value="">None</option>
9b59cd40f20d Uploaded
iuc
parents:
diff changeset
54 <option value="XS">XS:i > 0 - More than one mapping position Bowtie</option>
9b59cd40f20d Uploaded
iuc
parents:
diff changeset
55 <option value="XS:A">Might be useful for tophat</option>
59
57841366f112 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/htseq commit 344140b8df53b8b7024618bb04594607a045c03a
iuc
parents: 56
diff changeset
56 </param>
56
9b59cd40f20d Uploaded
iuc
parents:
diff changeset
57
9b59cd40f20d Uploaded
iuc
parents:
diff changeset
58 <param name="samfiles" type="data" label="bam/sam file from your history" format="sam,bam" size="100" multiple="true"/>
9b59cd40f20d Uploaded
iuc
parents:
diff changeset
59 </inputs>
9b59cd40f20d Uploaded
iuc
parents:
diff changeset
60 <outputs>
9b59cd40f20d Uploaded
iuc
parents:
diff changeset
61 <data format="tabular" name="outfile" label="${title}_htseqsams2mx.xls" />
9b59cd40f20d Uploaded
iuc
parents:
diff changeset
62 </outputs>
9b59cd40f20d Uploaded
iuc
parents:
diff changeset
63 <tests>
9b59cd40f20d Uploaded
iuc
parents:
diff changeset
64 <test>
9b59cd40f20d Uploaded
iuc
parents:
diff changeset
65 <param name="feature_type" value="exon" />
9b59cd40f20d Uploaded
iuc
parents:
diff changeset
66 <param name="gfffile" value="rn4_chr20_100k.gtf" />
9b59cd40f20d Uploaded
iuc
parents:
diff changeset
67 <param name="samfiles" value="rn4chr20test1.bam,rn4chr20test2.bam" ftype="bam"/>
9b59cd40f20d Uploaded
iuc
parents:
diff changeset
68 <param name="id_attr" value="gene_name" />
9b59cd40f20d Uploaded
iuc
parents:
diff changeset
69 <param name="model" value="union" />
9b59cd40f20d Uploaded
iuc
parents:
diff changeset
70 <param name="stranded" value="no" />
9b59cd40f20d Uploaded
iuc
parents:
diff changeset
71 <param name="title" value="htseqtest" />
9b59cd40f20d Uploaded
iuc
parents:
diff changeset
72 <param name="mapqMin" value="0" />
9b59cd40f20d Uploaded
iuc
parents:
diff changeset
73
9b59cd40f20d Uploaded
iuc
parents:
diff changeset
74 <output name="outfile" file="htseqsams2mx_test1_out.xls" lines_diff="1"/>
9b59cd40f20d Uploaded
iuc
parents:
diff changeset
75 </test>
9b59cd40f20d Uploaded
iuc
parents:
diff changeset
76 </tests>
9b59cd40f20d Uploaded
iuc
parents:
diff changeset
77 <help>
9b59cd40f20d Uploaded
iuc
parents:
diff changeset
78
9b59cd40f20d Uploaded
iuc
parents:
diff changeset
79 **What this tool does**
9b59cd40f20d Uploaded
iuc
parents:
diff changeset
80
9b59cd40f20d Uploaded
iuc
parents:
diff changeset
81 Counts reads in multiple sam/bam format mapped files and generates a matrix ideal for edgeR and other count based tools
9b59cd40f20d Uploaded
iuc
parents:
diff changeset
82 It uses HTSeq to count your sam reads over a gene model supplied as a GTF file
59
57841366f112 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/htseq commit 344140b8df53b8b7024618bb04594607a045c03a
iuc
parents: 56
diff changeset
83 The output is a tabular text (columnar - spreadsheet) file containing the
56
9b59cd40f20d Uploaded
iuc
parents:
diff changeset
84 count matrix for downstream processing. Each row contains the counts from each sample for each
59
57841366f112 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/htseq commit 344140b8df53b8b7024618bb04594607a045c03a
iuc
parents: 56
diff changeset
85 of the non-emtpy GTF input file contigs matching the GTF attribute choice above.
57841366f112 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/htseq commit 344140b8df53b8b7024618bb04594607a045c03a
iuc
parents: 56
diff changeset
86 You probably want to use gene level GTF output attribute and count reads that overlap
56
9b59cd40f20d Uploaded
iuc
parents:
diff changeset
87 GTF exons for RNA-seq. Or you can count over exons by using transcript level output names or ids. Etc.
9b59cd40f20d Uploaded
iuc
parents:
diff changeset
88
9b59cd40f20d Uploaded
iuc
parents:
diff changeset
89 ----
9b59cd40f20d Uploaded
iuc
parents:
diff changeset
90
9b59cd40f20d Uploaded
iuc
parents:
diff changeset
91 **Author's plea on replicates**
9b59cd40f20d Uploaded
iuc
parents:
diff changeset
92
59
57841366f112 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/htseq commit 344140b8df53b8b7024618bb04594607a045c03a
iuc
parents: 56
diff changeset
93 If you want to interpret the downstream p values in terms of rejecting or accepting the null hypothesis
56
9b59cd40f20d Uploaded
iuc
parents:
diff changeset
94 under random sampling with replacement from the universe of possible biological/experimental replicates from which your data was derived,
59
57841366f112 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/htseq commit 344140b8df53b8b7024618bb04594607a045c03a
iuc
parents: 56
diff changeset
95 which is what published p values are often assumed to do, then you need biological
57841366f112 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/htseq commit 344140b8df53b8b7024618bb04594607a045c03a
iuc
parents: 56
diff changeset
96 (or for cell culture material experimental) replicates.
56
9b59cd40f20d Uploaded
iuc
parents:
diff changeset
97
59
57841366f112 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/htseq commit 344140b8df53b8b7024618bb04594607a045c03a
iuc
parents: 56
diff changeset
98 Using technical or no replicates means the downstream p values are not interpretable the way most people would assume
56
9b59cd40f20d Uploaded
iuc
parents:
diff changeset
99 they are - ie as the probability of obtaining a result as or more extreme as your experimental data
9b59cd40f20d Uploaded
iuc
parents:
diff changeset
100 in millions of experiments conducted using the same methods under the null hypothesis.
9b59cd40f20d Uploaded
iuc
parents:
diff changeset
101
59
57841366f112 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/htseq commit 344140b8df53b8b7024618bb04594607a045c03a
iuc
parents: 56
diff changeset
102 There is no way around this and it is scientific fraud to ignore this issue and publish bogus p values derived from
57841366f112 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/htseq commit 344140b8df53b8b7024618bb04594607a045c03a
iuc
parents: 56
diff changeset
103 technical or no replicates without making the lack of biological or experimental error in the p value calculations
56
9b59cd40f20d Uploaded
iuc
parents:
diff changeset
104 clear to your readers so they can adjust their expectations. However, the buck stops here at higher level inference.
9b59cd40f20d Uploaded
iuc
parents:
diff changeset
105 If you have no replicates, you must not use this tool as the p values are uninterpretable. So there.
9b59cd40f20d Uploaded
iuc
parents:
diff changeset
106
59
57841366f112 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/htseq commit 344140b8df53b8b7024618bb04594607a045c03a
iuc
parents: 56
diff changeset
107 See your stats 101 notes on the central limit theorem and test statistics for a refresher or talk to a
56
9b59cd40f20d Uploaded
iuc
parents:
diff changeset
108 statistician if this makes no sense please.
9b59cd40f20d Uploaded
iuc
parents:
diff changeset
109
9b59cd40f20d Uploaded
iuc
parents:
diff changeset
110 **Attribution**
9b59cd40f20d Uploaded
iuc
parents:
diff changeset
111
59
57841366f112 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/htseq commit 344140b8df53b8b7024618bb04594607a045c03a
iuc
parents: 56
diff changeset
112 This Galaxy tool relies on HTSeq_ from http://www-huber.embl.de/users/anders/HTSeq/doc/index.html
56
9b59cd40f20d Uploaded
iuc
parents:
diff changeset
113 for the tricky work of counting. That code includes the following attribution:
9b59cd40f20d Uploaded
iuc
parents:
diff changeset
114
9b59cd40f20d Uploaded
iuc
parents:
diff changeset
115 ## Written by Simon Anders (sanders@fs.tum.de), European Molecular Biology
9b59cd40f20d Uploaded
iuc
parents:
diff changeset
116 ## Laboratory (EMBL). (c) 2010. Released under the terms of the GNU General
9b59cd40f20d Uploaded
iuc
parents:
diff changeset
117 ## Public License v3. Part of the 'HTSeq' framework, version HTSeq-0.5.4p3
9b59cd40f20d Uploaded
iuc
parents:
diff changeset
118
9b59cd40f20d Uploaded
iuc
parents:
diff changeset
119 It will be automatically installed if you use the toolshed as in general, you probably should.
9b59cd40f20d Uploaded
iuc
parents:
diff changeset
120 HTSeq_ must be installed with this tool if you install manually.
9b59cd40f20d Uploaded
iuc
parents:
diff changeset
121
9b59cd40f20d Uploaded
iuc
parents:
diff changeset
122 Otherwise, all code and documentation comprising this tool including the requirement
9b59cd40f20d Uploaded
iuc
parents:
diff changeset
123 for more than one sample bam
59
57841366f112 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/htseq commit 344140b8df53b8b7024618bb04594607a045c03a
iuc
parents: 56
diff changeset
124 was written by Ross Lazarus and is
56
9b59cd40f20d Uploaded
iuc
parents:
diff changeset
125 licensed to you under the LGPL_ like other rgenetics artefacts
9b59cd40f20d Uploaded
iuc
parents:
diff changeset
126
9b59cd40f20d Uploaded
iuc
parents:
diff changeset
127 Sorry, I don't use readgroups so had no reason to code read groups. Contributions welcome. Send code
9b59cd40f20d Uploaded
iuc
parents:
diff changeset
128
9b59cd40f20d Uploaded
iuc
parents:
diff changeset
129 .. _LGPL: http://www.gnu.org/copyleft/lesser.html
9b59cd40f20d Uploaded
iuc
parents:
diff changeset
130 .. _HTSeq: http://www-huber.embl.de/users/anders/HTSeq/doc/index.html
9b59cd40f20d Uploaded
iuc
parents:
diff changeset
131 </help>
59
57841366f112 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/htseq commit 344140b8df53b8b7024618bb04594607a045c03a
iuc
parents: 56
diff changeset
132 <citations>
57841366f112 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/htseq commit 344140b8df53b8b7024618bb04594607a045c03a
iuc
parents: 56
diff changeset
133 </citations>
56
9b59cd40f20d Uploaded
iuc
parents:
diff changeset
134 </tool>