annotate data_manager/rnastar_index_builder.xml @ 8:363d6797d366 draft default tip

Uploaded
author fubar
date Fri, 09 Jan 2015 23:40:09 -0500
parents f9d8dc8c2152
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
5
503097df1670 Uploaded
fubar
parents: 4
diff changeset
1 <tool id="rnastar_index_builder_data_manager" name="rnastar index" tool_type="manage_data" version="0.0.2">
0
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
2 <description>builder</description>
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
3 <requirements>
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
4 <requirement type="package" version="2.4.0d">rnastar</requirement>
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
5 </requirements>
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
6 <command interpreter="python">
4
102bdfdda10b Uploaded
fubar
parents: 2
diff changeset
7 rnastar_index_builder.py "${out_file}" --fasta_filename "${all_fasta_source.fields.path}"
0
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
8 --fasta_dbkey "${all_fasta_source.fields.dbkey}" --fasta_description "${all_fasta_source.fields.name}"
4
102bdfdda10b Uploaded
fubar
parents: 2
diff changeset
9 --runThreadN 1
0
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
10 #if $genemodel.modelformat=="gff3":
4
102bdfdda10b Uploaded
fubar
parents: 2
diff changeset
11 --sjdbGTFchrPrefix "${genemodel.sjdbGTFchrPrefix}"
0
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
12 --sjdbOverhang "${genemodel.sjdbOverhang}"
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
13 --sjdbGTFfile "${genemodel.sjdbGTFfile}"
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
14 --sjdbGTFtagExonParentTranscript "${genemodel.sjdbGTFtagExonParentTranscript}"
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
15 --sjdbGTFfeatureExon "${genemodel.sjdbGTFfeatureExon}"
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
16 #end if
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
17 #if $genemodel.modelformat=="bed":
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
18 --sjdbFileChrStartEnd "${genemodel.sjdbFileChrStartEnd}"
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
19 --sjdbOverhang "${genemodel.sjdbOverhang}"
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
20 #end if
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
21 #if $genemodel.modelformat=="None":
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
22 --sjdbOverhang 0
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
23 #end if
4
102bdfdda10b Uploaded
fubar
parents: 2
diff changeset
24 --data_table_name "rnastar_index"
6
f9d8dc8c2152 Uploaded
fubar
parents: 5
diff changeset
25 </command>
0
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
26 <inputs>
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
27 <param name="all_fasta_source" type="select" label="Source FASTA Sequence">
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
28 <options from_data_table="all_fasta"/>
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
29 </param>
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
30 <param type="text" name="sequence_name" value="" label="Informative name for sequence index" />
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
31 <param type="text" name="sequence_id" value="" label="ID for sequence index" />
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
32 <conditional name="genemodel">
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
33 <param name="modelformat" type="select"
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
34 label="Choose the format of gene model data from your history - bed or gff3"
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
35 help="This will be the source of splice junction indexing if required">
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
36 <option value="gff3" selected="true">gff3,gtf</option>
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
37 <option value="bed">BED - tabular chr,start,end,strand</option>
6
f9d8dc8c2152 Uploaded
fubar
parents: 5
diff changeset
38 <option value="None">None - no splice junction index</option>
0
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
39 </param>
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
40 <when value="gff3">
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
41 <param type="data" format="gff3,gff" name="sjdbGTFfile" value="" label="Gene model - must be gff3 or compatible and must match the input genome"
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
42 help="Required if you want to index splice junctions during index generation." />
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
43
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
44 <param type="text" name="sjdbGTFchrPrefix" value="chr" label="String prefix for GTF chromosomes"
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
45 help='GTF prefix for chromosome names (e.g. "chr" to use ENSMEBL annotations with UCSC geneomes)' >
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
46 <sanitizer invalid_char="">
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
47 <valid initial="string.printable"/>
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
48 </sanitizer>
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
49 </param>
4
102bdfdda10b Uploaded
fubar
parents: 2
diff changeset
50 <param type="text" name="sjdbGTFfeatureExon" value="exon" label="GTF feature to use as exon marker"
0
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
51 help="GTF feature type in GTF file to be used as exons for building transcripts - use what's in your GTF">
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
52 <sanitizer invalid_char="">
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
53 <valid initial="string.printable"/>
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
54 </sanitizer>
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
55 </param>
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
56
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
57 <param type="text" name="sjdbGTFtagExonParentTranscript" value="transcript_id" label="GTF feature to define for each exon's parents"
4
102bdfdda10b Uploaded
fubar
parents: 2
diff changeset
58 help="GTF tag name to be used as exons' parents for building transcripts - use what's in your gene model file eg parent for gff3">
0
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
59 <sanitizer invalid_char="">
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
60 <valid initial="string.printable"/>
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
61 </sanitizer>
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
62 </param>
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
63
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
64 <param type="integer" name="sjdbOverhang" value="100" label="Splice junction overhang. If=0, splice junction database NOT used"
5
503097df1670 Uploaded
fubar
parents: 4
diff changeset
65 help="integer length of the donor/acceptor sequence on each side, (mate_length - 1)" />
0
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
66
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
67 </when>
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
68 <when value='bed'>
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
69 <param type="data" format="bed" name="sjdbFileChrStartEnd" value="" label="Introns as a tabular bed (chr,start,end,strand) file matching the input genome"
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
70 help="Required if you want to index splice junctions during index generation." />
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
71 <param type="integer" name="sjdbOverhang" value="100" label="Splice junction overhang. If=0, splice junction database NOT used"
5
503097df1670 Uploaded
fubar
parents: 4
diff changeset
72 help="integer length of the donor/acceptor sequence on each side, (mate_length - 1)" />
0
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
73 </when>
6
f9d8dc8c2152 Uploaded
fubar
parents: 5
diff changeset
74 <when value='None'>
f9d8dc8c2152 Uploaded
fubar
parents: 5
diff changeset
75 </when>
0
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
76 </conditional>
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
77 </inputs>
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
78 <outputs>
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
79 <data name="out_file" format="data_manager_json"/>
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
80 </outputs>
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
81 <help>
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
82
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
83 .. class:: infomark
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
84
6
f9d8dc8c2152 Uploaded
fubar
parents: 5
diff changeset
85 <![CDATA[
0
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
86 *What it does*
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
87
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
88 This is a Galaxy datamanager for the rna STAR gap-aware RNA aligner.
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
89
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
90 Please read the fine manual - that and the google group are the places to learn about the options above.
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
91
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
92 *Note on sjdbOverhang*
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
93
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
94 From https://groups.google.com/forum/#!topic/rna-star/h9oh10UlvhI::
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
95
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
96 James is right, using large enough --sjdbOverhang is safer and should not generally cause any problems with reads of varying length. If your reads are very short, &lt;50b, then I would strongly recommend using optimum --sjdbOverhang=mateLength-1
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
97 By mate length I mean the length of one of the ends of the read, i.e. it's 100 for 2x100b PE or 1x100b SE. For longer reads you can simply use generic --sjdbOverhang 100.
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
98 It is a bit confusing because of the way I named this parameter. --sjdbOverhang Noverhang is only used at the genome generation step for constructing the reference sequence out of the annotations.
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
99 Basically, the Noverhang exonic bases from the donor site and Noverhang exonic bases from the acceptor site are spliced together for each of the junctions, and these spliced sequences are added to the genome sequence.
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
100
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
101 At the mapping stage, the reads are aligned to both genomic and splice sequences simultaneously. If a read maps to one of spliced sequences and crosses the "junction" in the middle of it, the coordinates of two pspliced pieces are translated back to genomic space and added to the collection of mapped pieces, which are then all "stitched" together to form the final alignment. Since in the process of "maximal mapped length" search the read is split into pieces of no longer than --seedSearchStartLmax (=50 by default) bases, even if the read (mate) is longer than --sjdbOverhang, it can still be mapped to the spliced reference, as long as --sjdbOverhang > --seedSearchStartLmax.
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
102
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
103 Cheers
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
104 Alex
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
105
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
106 *Note on gene model requirements for splice junctions*
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
107
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
108 From https://groups.google.com/forum/#!msg/rna-star/3Y_aaTuzBrE/lUylTB8h5vMJ::
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
109
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
110 When you generate a genome with annotations, you need to specify --sjdbOverhang value, which ideally should be equal to (oneMateLength-1), or you could use a generic value of ~100.
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
111
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
112 Your gtf lines look fine to me. STAR needs 3 features from a GTF file:
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
113 1. Chromosome names in col.1 that agree with chromosome names in genome .fasta files. If you have "chr2L" names in the genome .fasta files, and "2L" in the .gtf file, then you need to use --sjdbGTFchrPrefix chr option.
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
114 2. 'exon' in col.3 for the exons of all transcripts (this name can be changed with --sjdbGTFfeatureExon)
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
115 3. 'transcript_id' attribute that assigns each exon to a transcript (--this name can be changed with --sjdbGTFtagExonParentTranscript)
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
116
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
117 Cheers
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
118 Alex
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
119
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
120 **Notice:** If you leave name, description, or id blank, it will be generated automatically.
6
f9d8dc8c2152 Uploaded
fubar
parents: 5
diff changeset
121 ]]>
0
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
122 </help>
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
123 </tool>