Mercurial > repos > fubar > data_manager_rnasta
annotate data_manager/rnastar_index_builder.xml @ 8:363d6797d366 draft default tip
Uploaded
author | fubar |
---|---|
date | Fri, 09 Jan 2015 23:40:09 -0500 |
parents | f9d8dc8c2152 |
children |
rev | line source |
---|---|
5 | 1 <tool id="rnastar_index_builder_data_manager" name="rnastar index" tool_type="manage_data" version="0.0.2"> |
0
84f6e67cbae5
Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff
changeset
|
2 <description>builder</description> |
84f6e67cbae5
Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff
changeset
|
3 <requirements> |
84f6e67cbae5
Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff
changeset
|
4 <requirement type="package" version="2.4.0d">rnastar</requirement> |
84f6e67cbae5
Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff
changeset
|
5 </requirements> |
84f6e67cbae5
Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff
changeset
|
6 <command interpreter="python"> |
4 | 7 rnastar_index_builder.py "${out_file}" --fasta_filename "${all_fasta_source.fields.path}" |
0
84f6e67cbae5
Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff
changeset
|
8 --fasta_dbkey "${all_fasta_source.fields.dbkey}" --fasta_description "${all_fasta_source.fields.name}" |
4 | 9 --runThreadN 1 |
0
84f6e67cbae5
Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff
changeset
|
10 #if $genemodel.modelformat=="gff3": |
4 | 11 --sjdbGTFchrPrefix "${genemodel.sjdbGTFchrPrefix}" |
0
84f6e67cbae5
Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff
changeset
|
12 --sjdbOverhang "${genemodel.sjdbOverhang}" |
84f6e67cbae5
Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff
changeset
|
13 --sjdbGTFfile "${genemodel.sjdbGTFfile}" |
84f6e67cbae5
Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff
changeset
|
14 --sjdbGTFtagExonParentTranscript "${genemodel.sjdbGTFtagExonParentTranscript}" |
84f6e67cbae5
Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff
changeset
|
15 --sjdbGTFfeatureExon "${genemodel.sjdbGTFfeatureExon}" |
84f6e67cbae5
Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff
changeset
|
16 #end if |
84f6e67cbae5
Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff
changeset
|
17 #if $genemodel.modelformat=="bed": |
84f6e67cbae5
Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff
changeset
|
18 --sjdbFileChrStartEnd "${genemodel.sjdbFileChrStartEnd}" |
84f6e67cbae5
Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff
changeset
|
19 --sjdbOverhang "${genemodel.sjdbOverhang}" |
84f6e67cbae5
Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff
changeset
|
20 #end if |
84f6e67cbae5
Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff
changeset
|
21 #if $genemodel.modelformat=="None": |
84f6e67cbae5
Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff
changeset
|
22 --sjdbOverhang 0 |
84f6e67cbae5
Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff
changeset
|
23 #end if |
4 | 24 --data_table_name "rnastar_index" |
6 | 25 </command> |
0
84f6e67cbae5
Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff
changeset
|
26 <inputs> |
84f6e67cbae5
Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff
changeset
|
27 <param name="all_fasta_source" type="select" label="Source FASTA Sequence"> |
84f6e67cbae5
Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff
changeset
|
28 <options from_data_table="all_fasta"/> |
84f6e67cbae5
Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff
changeset
|
29 </param> |
84f6e67cbae5
Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff
changeset
|
30 <param type="text" name="sequence_name" value="" label="Informative name for sequence index" /> |
84f6e67cbae5
Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff
changeset
|
31 <param type="text" name="sequence_id" value="" label="ID for sequence index" /> |
84f6e67cbae5
Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff
changeset
|
32 <conditional name="genemodel"> |
84f6e67cbae5
Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff
changeset
|
33 <param name="modelformat" type="select" |
84f6e67cbae5
Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff
changeset
|
34 label="Choose the format of gene model data from your history - bed or gff3" |
84f6e67cbae5
Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff
changeset
|
35 help="This will be the source of splice junction indexing if required"> |
84f6e67cbae5
Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff
changeset
|
36 <option value="gff3" selected="true">gff3,gtf</option> |
84f6e67cbae5
Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff
changeset
|
37 <option value="bed">BED - tabular chr,start,end,strand</option> |
6 | 38 <option value="None">None - no splice junction index</option> |
0
84f6e67cbae5
Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff
changeset
|
39 </param> |
84f6e67cbae5
Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff
changeset
|
40 <when value="gff3"> |
84f6e67cbae5
Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff
changeset
|
41 <param type="data" format="gff3,gff" name="sjdbGTFfile" value="" label="Gene model - must be gff3 or compatible and must match the input genome" |
84f6e67cbae5
Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff
changeset
|
42 help="Required if you want to index splice junctions during index generation." /> |
84f6e67cbae5
Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff
changeset
|
43 |
84f6e67cbae5
Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff
changeset
|
44 <param type="text" name="sjdbGTFchrPrefix" value="chr" label="String prefix for GTF chromosomes" |
84f6e67cbae5
Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff
changeset
|
45 help='GTF prefix for chromosome names (e.g. "chr" to use ENSMEBL annotations with UCSC geneomes)' > |
84f6e67cbae5
Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff
changeset
|
46 <sanitizer invalid_char=""> |
84f6e67cbae5
Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff
changeset
|
47 <valid initial="string.printable"/> |
84f6e67cbae5
Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff
changeset
|
48 </sanitizer> |
84f6e67cbae5
Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff
changeset
|
49 </param> |
4 | 50 <param type="text" name="sjdbGTFfeatureExon" value="exon" label="GTF feature to use as exon marker" |
0
84f6e67cbae5
Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff
changeset
|
51 help="GTF feature type in GTF file to be used as exons for building transcripts - use what's in your GTF"> |
84f6e67cbae5
Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff
changeset
|
52 <sanitizer invalid_char=""> |
84f6e67cbae5
Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff
changeset
|
53 <valid initial="string.printable"/> |
84f6e67cbae5
Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff
changeset
|
54 </sanitizer> |
84f6e67cbae5
Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff
changeset
|
55 </param> |
84f6e67cbae5
Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff
changeset
|
56 |
84f6e67cbae5
Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff
changeset
|
57 <param type="text" name="sjdbGTFtagExonParentTranscript" value="transcript_id" label="GTF feature to define for each exon's parents" |
4 | 58 help="GTF tag name to be used as exons' parents for building transcripts - use what's in your gene model file eg parent for gff3"> |
0
84f6e67cbae5
Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff
changeset
|
59 <sanitizer invalid_char=""> |
84f6e67cbae5
Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff
changeset
|
60 <valid initial="string.printable"/> |
84f6e67cbae5
Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff
changeset
|
61 </sanitizer> |
84f6e67cbae5
Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff
changeset
|
62 </param> |
84f6e67cbae5
Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff
changeset
|
63 |
84f6e67cbae5
Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff
changeset
|
64 <param type="integer" name="sjdbOverhang" value="100" label="Splice junction overhang. If=0, splice junction database NOT used" |
5 | 65 help="integer length of the donor/acceptor sequence on each side, (mate_length - 1)" /> |
0
84f6e67cbae5
Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff
changeset
|
66 |
84f6e67cbae5
Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff
changeset
|
67 </when> |
84f6e67cbae5
Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff
changeset
|
68 <when value='bed'> |
84f6e67cbae5
Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff
changeset
|
69 <param type="data" format="bed" name="sjdbFileChrStartEnd" value="" label="Introns as a tabular bed (chr,start,end,strand) file matching the input genome" |
84f6e67cbae5
Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff
changeset
|
70 help="Required if you want to index splice junctions during index generation." /> |
84f6e67cbae5
Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff
changeset
|
71 <param type="integer" name="sjdbOverhang" value="100" label="Splice junction overhang. If=0, splice junction database NOT used" |
5 | 72 help="integer length of the donor/acceptor sequence on each side, (mate_length - 1)" /> |
0
84f6e67cbae5
Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff
changeset
|
73 </when> |
6 | 74 <when value='None'> |
75 </when> | |
0
84f6e67cbae5
Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff
changeset
|
76 </conditional> |
84f6e67cbae5
Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff
changeset
|
77 </inputs> |
84f6e67cbae5
Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff
changeset
|
78 <outputs> |
84f6e67cbae5
Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff
changeset
|
79 <data name="out_file" format="data_manager_json"/> |
84f6e67cbae5
Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff
changeset
|
80 </outputs> |
84f6e67cbae5
Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff
changeset
|
81 <help> |
84f6e67cbae5
Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff
changeset
|
82 |
84f6e67cbae5
Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff
changeset
|
83 .. class:: infomark |
84f6e67cbae5
Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff
changeset
|
84 |
6 | 85 <![CDATA[ |
0
84f6e67cbae5
Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff
changeset
|
86 *What it does* |
84f6e67cbae5
Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff
changeset
|
87 |
84f6e67cbae5
Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff
changeset
|
88 This is a Galaxy datamanager for the rna STAR gap-aware RNA aligner. |
84f6e67cbae5
Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff
changeset
|
89 |
84f6e67cbae5
Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff
changeset
|
90 Please read the fine manual - that and the google group are the places to learn about the options above. |
84f6e67cbae5
Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff
changeset
|
91 |
84f6e67cbae5
Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff
changeset
|
92 *Note on sjdbOverhang* |
84f6e67cbae5
Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff
changeset
|
93 |
84f6e67cbae5
Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff
changeset
|
94 From https://groups.google.com/forum/#!topic/rna-star/h9oh10UlvhI:: |
84f6e67cbae5
Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff
changeset
|
95 |
84f6e67cbae5
Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff
changeset
|
96 James is right, using large enough --sjdbOverhang is safer and should not generally cause any problems with reads of varying length. If your reads are very short, <50b, then I would strongly recommend using optimum --sjdbOverhang=mateLength-1 |
84f6e67cbae5
Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff
changeset
|
97 By mate length I mean the length of one of the ends of the read, i.e. it's 100 for 2x100b PE or 1x100b SE. For longer reads you can simply use generic --sjdbOverhang 100. |
84f6e67cbae5
Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff
changeset
|
98 It is a bit confusing because of the way I named this parameter. --sjdbOverhang Noverhang is only used at the genome generation step for constructing the reference sequence out of the annotations. |
84f6e67cbae5
Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff
changeset
|
99 Basically, the Noverhang exonic bases from the donor site and Noverhang exonic bases from the acceptor site are spliced together for each of the junctions, and these spliced sequences are added to the genome sequence. |
84f6e67cbae5
Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff
changeset
|
100 |
84f6e67cbae5
Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff
changeset
|
101 At the mapping stage, the reads are aligned to both genomic and splice sequences simultaneously. If a read maps to one of spliced sequences and crosses the "junction" in the middle of it, the coordinates of two pspliced pieces are translated back to genomic space and added to the collection of mapped pieces, which are then all "stitched" together to form the final alignment. Since in the process of "maximal mapped length" search the read is split into pieces of no longer than --seedSearchStartLmax (=50 by default) bases, even if the read (mate) is longer than --sjdbOverhang, it can still be mapped to the spliced reference, as long as --sjdbOverhang > --seedSearchStartLmax. |
84f6e67cbae5
Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff
changeset
|
102 |
84f6e67cbae5
Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff
changeset
|
103 Cheers |
84f6e67cbae5
Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff
changeset
|
104 Alex |
84f6e67cbae5
Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff
changeset
|
105 |
84f6e67cbae5
Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff
changeset
|
106 *Note on gene model requirements for splice junctions* |
84f6e67cbae5
Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff
changeset
|
107 |
84f6e67cbae5
Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff
changeset
|
108 From https://groups.google.com/forum/#!msg/rna-star/3Y_aaTuzBrE/lUylTB8h5vMJ:: |
84f6e67cbae5
Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff
changeset
|
109 |
84f6e67cbae5
Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff
changeset
|
110 When you generate a genome with annotations, you need to specify --sjdbOverhang value, which ideally should be equal to (oneMateLength-1), or you could use a generic value of ~100. |
84f6e67cbae5
Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff
changeset
|
111 |
84f6e67cbae5
Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff
changeset
|
112 Your gtf lines look fine to me. STAR needs 3 features from a GTF file: |
84f6e67cbae5
Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff
changeset
|
113 1. Chromosome names in col.1 that agree with chromosome names in genome .fasta files. If you have "chr2L" names in the genome .fasta files, and "2L" in the .gtf file, then you need to use --sjdbGTFchrPrefix chr option. |
84f6e67cbae5
Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff
changeset
|
114 2. 'exon' in col.3 for the exons of all transcripts (this name can be changed with --sjdbGTFfeatureExon) |
84f6e67cbae5
Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff
changeset
|
115 3. 'transcript_id' attribute that assigns each exon to a transcript (--this name can be changed with --sjdbGTFtagExonParentTranscript) |
84f6e67cbae5
Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff
changeset
|
116 |
84f6e67cbae5
Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff
changeset
|
117 Cheers |
84f6e67cbae5
Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff
changeset
|
118 Alex |
84f6e67cbae5
Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff
changeset
|
119 |
84f6e67cbae5
Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff
changeset
|
120 **Notice:** If you leave name, description, or id blank, it will be generated automatically. |
6 | 121 ]]> |
0
84f6e67cbae5
Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff
changeset
|
122 </help> |
84f6e67cbae5
Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff
changeset
|
123 </tool> |