0
|
1 <macros>
|
|
2 <!-- REMEMBER to bump the version of rna_star_index_builder_data_manager
|
|
3 whenever you make changes to the following two version tokens!
|
|
4 The data manager uses a symlink to this macro file to keep the STAR and
|
|
5 the index versions in sync, but you should manually adjust the +galaxy
|
|
6 version number. -->
|
|
7 <!-- STAR version to be used -->
|
|
8 <token name="@VERSION@">2.7.8a</token>
|
|
9 <!-- STAR index version compatible with this version of STAR
|
|
10 This is the STAR version that introduced the index structure expected
|
|
11 by the current version.
|
|
12 It can be found for any specific version of STAR with:
|
|
13 STAR -h | grep versionGenome
|
|
14 or by looking for the versionGenome parameter in source/parametersDefault
|
|
15 of STAR's source code -->
|
2
|
16 <token name="@IDX_VERSION@">2.7.4b</token>
|
0
|
17 <token name="@IDX_DATA_TABLE@">rnastar_index2x_versioned</token>
|
|
18
|
|
19 <xml name="requirements">
|
|
20 <requirements>
|
|
21 <requirement type="package" version="@VERSION@">star</requirement>
|
|
22 <requirement type="package" version="1.9">samtools</requirement>
|
|
23 <yield />
|
|
24 </requirements>
|
|
25 </xml>
|
|
26
|
|
27 <xml name="edam">
|
|
28 <edam_topics>
|
|
29 <edam_topic>topic_3170</edam_topic>
|
|
30 <edam_topic>topic_3308</edam_topic>
|
|
31 </edam_topics>
|
|
32 <edam_operations>
|
|
33 <edam_operation>operation_0292</edam_operation>
|
|
34 </edam_operations>
|
|
35 </xml>
|
|
36
|
|
37 <xml name="index_selection" token_with_gene_model="0">
|
|
38 <param argument="--genomeDir" name="genomeDir" type="select"
|
|
39 label="Select reference genome"
|
|
40 help="If your genome of interest is not listed, contact the Galaxy team">
|
|
41 <options from_data_table="@IDX_DATA_TABLE@">
|
|
42 <filter type="static_value" column="4" value="@WITH_GENE_MODEL@" />
|
|
43 <filter type="static_value" column="5" value="@IDX_VERSION@" />
|
|
44 <filter type="sort_by" column="2" />
|
|
45 <validator type="no_options" message="No indexes are available for the selected input dataset" />
|
|
46 </options>
|
|
47 </param>
|
|
48 </xml>
|
|
49
|
|
50 <token name="@FASTQ_GZ_OPTION@">
|
|
51 --readFilesCommand zcat
|
|
52 </token>
|
|
53 <xml name="citations">
|
|
54 <citations>
|
|
55 <citation type="doi">10.1093/bioinformatics/bts635</citation>
|
|
56 </citations>
|
|
57 </xml>
|
|
58 <xml name="@SJDBOPTIONS@" token_optional="true">
|
|
59 <param argument="--sjdbGTFfile" type="data" format="gff3,gtf" label="Gene model (gff3,gtf) file for splice junctions" optional="@OPTIONAL@" help="Exon junction information for mapping splices"/>
|
|
60 <param argument="--sjdbOverhang" type="integer" min="1" value="100" label="Length of the genomic sequence around annotated junctions" help="Used in constructing the splice junctions database. Ideal value is ReadLength-1"/>
|
|
61 </xml>
|
|
62 <xml name="dbKeyActions">
|
|
63 <actions>
|
|
64 <conditional name="refGenomeSource.geneSource">
|
|
65 <when value="indexed">
|
|
66 <action type="metadata" name="dbkey">
|
|
67 <option type="from_data_table" name="@IDX_DATA_TABLE@" column="1" offset="0">
|
|
68 <filter type="param_value" column="0" value="#" compare="startswith" keep="False"/>
|
|
69 <filter type="param_value" ref="refGenomeSource.GTFconditional.genomeDir" column="0"/>
|
|
70 </option>
|
|
71 </action>
|
|
72 </when>
|
|
73 <when value="history">
|
|
74 <action type="metadata" name="dbkey">
|
|
75 <option type="from_param" name="refGenomeSource.genomeFastaFiles" param_attribute="dbkey" />
|
|
76 </action>
|
|
77 </when>
|
|
78 </conditional>
|
|
79 </actions>
|
|
80 </xml>
|
|
81 <token name="@TEMPINDEX@"><![CDATA[
|
|
82 ## Create temporary index for custom reference
|
|
83 #if str($refGenomeSource.geneSource) == 'history':
|
|
84 mkdir -p tempstargenomedir &&
|
|
85 STAR
|
|
86 --runMode genomeGenerate
|
|
87 --genomeDir 'tempstargenomedir'
|
|
88 --genomeFastaFiles '${refGenomeSource.genomeFastaFiles}'
|
|
89 ## Handle difference between indices with/without annotations
|
|
90 #if 'GTFconditional' in $refGenomeSource:
|
|
91 ## GTFconditional exists only in STAR, but not STARsolo
|
|
92 #if str($refGenomeSource.GTFconditional.GTFselect) == 'with-gtf':
|
|
93 --sjdbOverhang '${refGenomeSource.GTFconditional.sjdbOverhang}'
|
|
94 --sjdbGTFfile '${refGenomeSource.GTFconditional.sjdbGTFfile}'
|
|
95 #if str($refGenomeSource.GTFconditional.sjdbGTFfile.ext) == 'gff3':
|
|
96 --sjdbGTFtagExonParentTranscript Parent
|
|
97 #end if
|
|
98 #end if
|
|
99 #else:
|
|
100 ## ref genome selection is less complex for STARsolo cause
|
|
101 ## with-gtf is mandatory there
|
|
102 --sjdbOverhang '${refGenomeSource.sjdbOverhang}'
|
|
103 --sjdbGTFfile '${refGenomeSource.sjdbGTFfile}'
|
|
104 #if str($refGenomeSource.sjdbGTFfile.ext) == 'gff3':
|
|
105 --sjdbGTFtagExonParentTranscript Parent
|
|
106 #end if
|
|
107 #end if
|
|
108 #if str($refGenomeSource.genomeSAindexNbases):
|
|
109 --genomeSAindexNbases ${refGenomeSource.genomeSAindexNbases}
|
|
110 #end if
|
|
111 --runThreadN \${GALAXY_SLOTS:-4}
|
|
112 &&
|
|
113 #end if
|
|
114 ]]></token>
|
|
115 <token name="@REFGENOMEHANDLING@" ><![CDATA[
|
|
116 --runThreadN \${GALAXY_SLOTS:-4}
|
|
117 --genomeLoad NoSharedMemory
|
|
118 --genomeDir
|
|
119 #if str($refGenomeSource.geneSource) == 'history':
|
|
120 tempstargenomedir
|
|
121 #else:
|
|
122 '${refGenomeSource.GTFconditional.genomeDir.fields.path}'
|
|
123 ## Handle difference between indices with/without annotations
|
|
124 #if str($refGenomeSource.GTFconditional.GTFselect) == 'without-gtf':
|
|
125 #if $refGenomeSource.GTFconditional.sjdbGTFfile:
|
|
126 --sjdbOverhang $refGenomeSource.GTFconditional.sjdbOverhang
|
|
127 --sjdbGTFfile '${refGenomeSource.GTFconditional.sjdbGTFfile}'
|
|
128 #if str($refGenomeSource.GTFconditional.sjdbGTFfile.ext) == 'gff3':
|
|
129 --sjdbGTFtagExonParentTranscript Parent
|
|
130 #end if
|
|
131 #end if
|
|
132 #end if
|
|
133 #end if
|
|
134 ]]></token>
|
|
135 <token name="@READSHANDLING@" ><![CDATA[
|
|
136 ## Check that the input pairs are of the same type
|
|
137 ## otherwise STARsolo will run for a long time and then error out.
|
|
138 ## We consume either repeats of two inputs R1 + R2
|
|
139 ## or a collection of paired reads.
|
|
140 #if str($sc.input_types.use) == "repeat":
|
|
141 #set $reads1 = []
|
|
142 #set $reads2 = []
|
|
143 #for $r1, $r2 in zip($sc.input_types.input1, $sc.input_types.input2):
|
|
144 #assert $r1.datatype == $r2.datatype
|
|
145 #silent $reads1.append(str($r1))
|
|
146 #silent $reads2.append(str($r2))
|
|
147 #end for
|
|
148 #set $reads1 = ','.join($reads1)
|
|
149 #set $reads2 = ','.join($reads2)
|
|
150 #elif str($sc.input_types.use) == "list_paired":
|
|
151 #set $r1 = $sc.input_types.input_collection.forward
|
|
152 #set $r2 = $sc.input_types.input_collection.reverse
|
|
153 #set $reads1 = $r1
|
|
154 #set $reads2 = $r2
|
|
155 #end if
|
|
156 ## cDNA sequence(s) [R2] always go first, then barcode(s) [R1]
|
|
157 ## see: Section 3.2 of STAR manual for multiple inputs, and Section 13 for STARsolo inputs
|
|
158 --readFilesIn $reads2 $reads1
|
|
159 --soloCBmatchWLtype $sc.soloCBmatchWLtype
|
|
160 #if $r1.is_of_type('fastq.gz', 'fastqsanger.gz'):
|
|
161 @FASTQ_GZ_OPTION@
|
|
162 #end if
|
|
163 ]]></token>
|
|
164 <xml name="ref_selection">
|
|
165 <param argument="--genomeFastaFiles" type="data" format="fasta" label="Select a reference genome" />
|
|
166 <param argument="--genomeSAindexNbases" type="integer" min="2" max="16" value="14" label="Length of the SA pre-indexing string" help="Typically between 10 and 15. Longer strings will use much more memory, but allow faster searches. For small genomes, the parameter --genomeSAindexNbases must be scaled down to min(14, log2(GenomeLength)/2 - 1)"/>
|
|
167 </xml>
|
|
168 <xml name="stdio" >
|
|
169 <stdio>
|
|
170 <regex match="FATAL error" source="both" level="fatal"/>
|
|
171 <regex match="EXITING: FATAL INPUT ERROR:" source="both" level="fatal"/>
|
|
172 <regex match="EXITING: fatal error trying to allocate genome arrays, exception thrown: std::bad_alloc" source="both" level="fatal"/>
|
|
173 <regex match="\[sam_read1\] missing header\? Abort!" source="both" level="fatal"/>
|
|
174 <yield />
|
|
175 </stdio>
|
|
176 </xml>
|
|
177 <xml name="input_selection">
|
|
178 <conditional name="input_types" >
|
|
179 <param name="use" type="select" label="Input Type" >
|
|
180 <option value="repeat" >Separate barcode and cDNA reads</option>
|
|
181 <option value="list_paired" >Paired collection of barcode and cDNA reads</option>
|
|
182 </param>
|
|
183 <when value="repeat">
|
|
184 <param format="fastq,fasta,fastq.gz,fastqsanger.gz" name="input1" type="data" multiple="true"
|
|
185 label="RNA-Seq FASTQ/FASTA file, Barcode reads" />
|
|
186 <param format="fastq,fasta,fastq.gz,fastqsanger.gz" name="input2" type="data" multiple="true"
|
|
187 label="RNA-Seq FASTQ/FASTA file, cDNA reads"/>
|
|
188 </when>
|
|
189 <when value="list_paired">
|
|
190 <param name="input_collection" collection_type="paired" type="data_collection" format="fastq,fasta,fastq.gz,fastqsanger.gz" label="Collection of Pairs" />
|
|
191 </when>
|
|
192 </conditional>
|
|
193 </xml>
|
|
194 <xml name="input_selection_smart_seq">
|
|
195 <conditional name="input_types_smart_seq" >
|
|
196 <param name="use" type="select" label="Input Type" >
|
|
197 <option value="list_single_end" >Single-end FASTQ collection</option>
|
|
198 <option value="list_paired_end" >Paired FASTQ collection</option>
|
|
199 </param>
|
|
200 <when value="list_single_end">
|
|
201 <param name="single_end_collection" collection_type="list" type="data_collection" format="fastq,fasta,fastq.gz,fastqsanger.gz" label="List of single-end FASTQ files" />
|
|
202 </when>
|
|
203 <when value="list_paired_end">
|
|
204 <param name="paired_end_collection" collection_type="list:paired" type="data_collection" format="fastq,fasta,fastq.gz,fastqsanger.gz" label="List of paired-end FASTQ files" />
|
|
205 </when>
|
|
206 </conditional>
|
|
207 </xml>
|
|
208 <xml name="umidedup_options">
|
|
209 <option value="1MM_All" selected="true">Collapse all UMIs with 1 mismatch distance to each other</option>
|
|
210 <option value="1MM_Directional_UMItools" >Directional method from the UMI-tool</option>
|
|
211 <option value="1MM_Directional" >Directional with stringent UMI deduplication</option>
|
|
212 </xml>
|
|
213 <xml name="anchor_types">
|
|
214 <option value="0">Read start</option>
|
|
215 <option value="1">Read end</option>
|
|
216 <option value="2">Adapter start</option>
|
|
217 <option value="3">Adapter end</option>
|
|
218 </xml>
|
|
219 <xml name="cb_match_wl_common">
|
|
220 <option value="Exact" >Exact</option>
|
|
221 <option value="1MM" >Single match</option>
|
|
222 </xml>
|
|
223 <xml name="cb_match_wl_cellranger">
|
|
224 <option value="1MM_multi" selected="true" >Multiple matches (CellRanger 2)</option>
|
|
225 <option value="1MM_multi_pseudocounts" >Multiple matches (CellRanger 3)</option>
|
|
226 <option value="1MM_multi_Nbase_pseudocounts" >Multimatching to WL is allowed for CBs with N-bases (CellRanger 3)</option>
|
|
227 </xml>
|
|
228 <xml name="solo_adapter_params">
|
|
229 <param argument="--soloAdapterSequence" type="text" value="-" label="Adapter sequence to anchor barcodes." >
|
|
230 <sanitizer>
|
|
231 <valid initial="string.digits">
|
|
232 <add value="-"/>
|
|
233 <add value="A"/>
|
|
234 <add value="T"/>
|
|
235 <add value="C"/>
|
|
236 <add value="G"/>
|
|
237 <add value="N"/>
|
|
238 </valid>
|
|
239 </sanitizer>
|
|
240 </param>
|
|
241 <param argument="--soloAdapterMismatchesNmax" type="integer" min="1" value="1" label="Maximum number of mismatches allowed in adapter sequence" />
|
|
242 <param argument="--clipAdapterType" type="select" >
|
|
243 <option value="Hamming" selected="true" >Adapter clipping based on Hamming distance</option>
|
|
244 <option value="CellRanger4" >5p and 3p adapter clipping similar to CellRanger4</option>
|
|
245 <option value="None" >No adapter clipping</option>
|
|
246 </param>
|
|
247 </xml>
|
|
248 </macros>
|