annotate data_manager/rnastar_index_builder.xml @ 2:8a2d16bfdae2 draft

Uploaded
author fubar
date Fri, 03 Oct 2014 21:59:39 -0400
parents ebadd2c92958
children 102bdfdda10b
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
1 <tool id="rnastar_index_builder_data_manager" name="rnastar index" tool_type="manage_data" version="0.0.1">
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
2 <description>builder</description>
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
3 <requirements>
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
4 <requirement type="package" version="2.4.0d">rnastar</requirement>
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
5 </requirements>
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
6 <command interpreter="python">
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
7 rnastar_index_builder.py --out_file "${out_file}" --fasta_filename "${all_fasta_source.fields.path}"
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
8 --fasta_dbkey "${all_fasta_source.fields.dbkey}" --fasta_description "${all_fasta_source.fields.name}"
1
ebadd2c92958 Uploaded
fubar
parents: 0
diff changeset
9 --data_table_name "rnastar_indexes" --out_index_path "${out_file.extra_files_path}" --runThreadN 1
0
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
10 #if $genemodel.modelformat=="gff3":
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
11 --sjdbOverhang "${genemodel.sjdbOverhang}"
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
12 --sjdbGTFfile "${genemodel.sjdbGTFfile}"
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
13 --sjdbGTFtagExonParentTranscript "${genemodel.sjdbGTFtagExonParentTranscript}"
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
14 --sjdbGTFfeatureExon "${genemodel.sjdbGTFfeatureExon}"
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
15 #end if
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
16 #if $genemodel.modelformat=="bed":
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
17 --sjdbFileChrStartEnd "${genemodel.sjdbFileChrStartEnd}"
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
18 --sjdbOverhang "${genemodel.sjdbOverhang}"
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
19 #end if
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
20 #if $genemodel.modelformat=="None":
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
21 --sjdbOverhang 0
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
22 #end if
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
23 </command>
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
24 <stdio>
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
25 <regex match=".*" source="both" level="warning" description="stdout/err chatter:"/>
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
26 </stdio>
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
27 <inputs>
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
28 <param name="all_fasta_source" type="select" label="Source FASTA Sequence">
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
29 <options from_data_table="all_fasta"/>
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
30 </param>
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
31 <param type="text" name="sequence_name" value="" label="Informative name for sequence index" />
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
32 <param type="text" name="sequence_id" value="" label="ID for sequence index" />
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
33
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
34
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
35 <conditional name="genemodel">
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
36 <param name="modelformat" type="select"
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
37 label="Choose the format of gene model data from your history - bed or gff3"
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
38 help="This will be the source of splice junction indexing if required">
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
39 <option value="gff3" selected="true">gff3,gtf</option>
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
40 <option value="bed">BED - tabular chr,start,end,strand</option>
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
41 <option value="None" >None - no splice junction index</option>
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
42 </param>
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
43 <when value="gff3">
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
44 <param type="data" format="gff3,gff" name="sjdbGTFfile" value="" label="Gene model - must be gff3 or compatible and must match the input genome"
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
45 help="Required if you want to index splice junctions during index generation." />
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
46
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
47 <param type="text" name="sjdbGTFchrPrefix" value="chr" label="String prefix for GTF chromosomes"
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
48 help='GTF prefix for chromosome names (e.g. "chr" to use ENSMEBL annotations with UCSC geneomes)' >
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
49 <sanitizer invalid_char="">
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
50 <valid initial="string.printable"/>
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
51 </sanitizer>
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
52 </param>
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
53 <param type="text" name="sjdbGTFfeatureExon" value="exon_id" label="GTF feature to use as exon marker"
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
54 help="GTF feature type in GTF file to be used as exons for building transcripts - use what's in your GTF">
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
55 <sanitizer invalid_char="">
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
56 <valid initial="string.printable"/>
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
57 </sanitizer>
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
58 </param>
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
59
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
60 <param type="text" name="sjdbGTFtagExonParentTranscript" value="transcript_id" label="GTF feature to define for each exon's parents"
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
61 help="GTF tag name to be used as exons' parents for building transcripts - use what's in your GTF">
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
62 <sanitizer invalid_char="">
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
63 <valid initial="string.printable"/>
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
64 </sanitizer>
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
65 </param>
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
66
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
67 <param type="integer" name="sjdbOverhang" value="100" label="Splice junction overhang. If=0, splice junction database NOT used"
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
68 help="int>=0: length of the donor/acceptor sequence on each side, (mate_length - 1)" />
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
69
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
70 </when>
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
71 <when value='bed'>
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
72 <param type="data" format="bed" name="sjdbFileChrStartEnd" value="" label="Introns as a tabular bed (chr,start,end,strand) file matching the input genome"
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
73 help="Required if you want to index splice junctions during index generation." />
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
74 <param type="integer" name="sjdbOverhang" value="100" label="Splice junction overhang. If=0, splice junction database NOT used"
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
75 help="int>=0: length of the donor/acceptor sequence on each side, (mate_length - 1)" />
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
76 </when>
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
77 <when value='None'>
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
78 </when>
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
79 </conditional>
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
80 </inputs>
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
81 <outputs>
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
82 <data name="out_file" format="data_manager_json"/>
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
83 </outputs>
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
84 <help>
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
85
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
86 .. class:: infomark
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
87
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
88 *What it does*
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
89
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
90 This is a Galaxy datamanager for the rna STAR gap-aware RNA aligner.
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
91
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
92 Please read the fine manual - that and the google group are the places to learn about the options above.
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
93
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
94 *Note on sjdbOverhang*
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
95
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
96 From https://groups.google.com/forum/#!topic/rna-star/h9oh10UlvhI::
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
97
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
98 James is right, using large enough --sjdbOverhang is safer and should not generally cause any problems with reads of varying length. If your reads are very short, &lt;50b, then I would strongly recommend using optimum --sjdbOverhang=mateLength-1
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
99 By mate length I mean the length of one of the ends of the read, i.e. it's 100 for 2x100b PE or 1x100b SE. For longer reads you can simply use generic --sjdbOverhang 100.
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
100 It is a bit confusing because of the way I named this parameter. --sjdbOverhang Noverhang is only used at the genome generation step for constructing the reference sequence out of the annotations.
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
101 Basically, the Noverhang exonic bases from the donor site and Noverhang exonic bases from the acceptor site are spliced together for each of the junctions, and these spliced sequences are added to the genome sequence.
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
102
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
103 At the mapping stage, the reads are aligned to both genomic and splice sequences simultaneously. If a read maps to one of spliced sequences and crosses the "junction" in the middle of it, the coordinates of two pspliced pieces are translated back to genomic space and added to the collection of mapped pieces, which are then all "stitched" together to form the final alignment. Since in the process of "maximal mapped length" search the read is split into pieces of no longer than --seedSearchStartLmax (=50 by default) bases, even if the read (mate) is longer than --sjdbOverhang, it can still be mapped to the spliced reference, as long as --sjdbOverhang > --seedSearchStartLmax.
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
104
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
105 Cheers
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
106 Alex
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
107
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
108 *Note on gene model requirements for splice junctions*
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
109
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
110 From https://groups.google.com/forum/#!msg/rna-star/3Y_aaTuzBrE/lUylTB8h5vMJ::
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
111
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
112 When you generate a genome with annotations, you need to specify --sjdbOverhang value, which ideally should be equal to (oneMateLength-1), or you could use a generic value of ~100.
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
113
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
114 Your gtf lines look fine to me. STAR needs 3 features from a GTF file:
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
115 1. Chromosome names in col.1 that agree with chromosome names in genome .fasta files. If you have "chr2L" names in the genome .fasta files, and "2L" in the .gtf file, then you need to use --sjdbGTFchrPrefix chr option.
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
116 2. 'exon' in col.3 for the exons of all transcripts (this name can be changed with --sjdbGTFfeatureExon)
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
117 3. 'transcript_id' attribute that assigns each exon to a transcript (--this name can be changed with --sjdbGTFtagExonParentTranscript)
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
118
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
119 Cheers
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
120 Alex
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
121
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
122 **Notice:** If you leave name, description, or id blank, it will be generated automatically.
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
123
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
124 </help>
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
125 </tool>