Mercurial > repos > iuc > data_manager_star_index_builder
changeset 10:f639ff7dea45 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_star_index_builder commit 1f8b01701c5b81d155617267a179ba42a0d4a307"
author | iuc |
---|---|
date | Fri, 10 Sep 2021 16:42:21 +0000 |
parents | 5750f6d0d369 |
children | c6f957c373d3 |
files | data_manager/macros.xml data_manager/rna_star_index_builder.py tool-data/rnastar_index2x_versioned.loc.sample |
diffstat | 3 files changed, 115 insertions(+), 7 deletions(-) [+] |
line wrap: on
line diff
--- a/data_manager/macros.xml Wed Aug 05 13:36:20 2020 +0000 +++ b/data_manager/macros.xml Fri Sep 10 16:42:21 2021 +0000 @@ -5,7 +5,7 @@ the index versions in sync, but you should manually adjust the +galaxy version number. --> <!-- STAR version to be used --> - <token name="@VERSION@">2.7.5b</token> + <token name="@VERSION@">2.7.8a</token> <!-- STAR index version compatible with this version of STAR This is the STAR version that introduced the index structure expected by the current version. @@ -24,6 +24,16 @@ </requirements> </xml> + <xml name="edam"> + <edam_topics> + <edam_topic>topic_3170</edam_topic> + <edam_topic>topic_3308</edam_topic> + </edam_topics> + <edam_operations> + <edam_operation>operation_0292</edam_operation> + </edam_operations> + </xml> + <xml name="index_selection" token_with_gene_model="0"> <param argument="--genomeDir" name="genomeDir" type="select" label="Select reference genome" @@ -122,12 +132,38 @@ #end if #end if ]]></token> + <token name="@READSHANDLING@" ><![CDATA[ + ## Check that the input pairs are of the same type + ## otherwise STARsolo will run for a long time and then error out. + ## We consume either repeats of two inputs R1 + R2 + ## or a collection of paired reads. + #if str($sc.input_types.use) == "repeat": + #set $reads1 = [] + #set $reads2 = [] + #for $r1, $r2 in zip($sc.input_types.input1, $sc.input_types.input2): + #assert $r1.datatype == $r2.datatype + #silent $reads1.append(str($r1)) + #silent $reads2.append(str($r2)) + #end for + #set $reads1 = ','.join($reads1) + #set $reads2 = ','.join($reads2) + #elif str($sc.input_types.use) == "list_paired": + #set $r1 = $sc.input_types.input_collection.forward + #set $r2 = $sc.input_types.input_collection.reverse + #set $reads1 = $r1 + #set $reads2 = $r2 + #end if + ## cDNA sequence(s) [R2] always go first, then barcode(s) [R1] + ## see: Section 3.2 of STAR manual for multiple inputs, and Section 13 for STARsolo inputs + --readFilesIn $reads2 $reads1 + --soloCBmatchWLtype $sc.soloCBmatchWLtype + #if $r1.is_of_type('fastq.gz', 'fastqsanger.gz'): + @FASTQ_GZ_OPTION@ + #end if + ]]></token> <xml name="ref_selection"> <param argument="--genomeFastaFiles" type="data" format="fasta" label="Select a reference genome" /> - <!-- Currently, this parameter is not exposed in the wrapper, - but used only in the tests to avoid excessive index sizes for - the tiny test genomes. --> - <param name="genomeSAindexNbases" type="hidden" value="" /> + <param argument="--genomeSAindexNbases" type="integer" min="2" max="16" value="14" label="Length of the SA pre-indexing string" help="Typically between 10 and 15. Longer strings will use much more memory, but allow faster searches. For small genomes, the parameter --genomeSAindexNbases must be scaled down to min(14, log2(GenomeLength)/2 - 1)"/> </xml> <xml name="stdio" > <stdio> @@ -138,4 +174,75 @@ <yield /> </stdio> </xml> + <xml name="input_selection"> + <conditional name="input_types" > + <param name="use" type="select" label="Input Type" > + <option value="repeat" >Separate barcode and cDNA reads</option> + <option value="list_paired" >Paired collection of barcode and cDNA reads</option> + </param> + <when value="repeat"> + <param format="fastq,fasta,fastq.gz,fastqsanger.gz" name="input1" type="data" multiple="true" + label="RNA-Seq FASTQ/FASTA file, Barcode reads" /> + <param format="fastq,fasta,fastq.gz,fastqsanger.gz" name="input2" type="data" multiple="true" + label="RNA-Seq FASTQ/FASTA file, cDNA reads"/> + </when> + <when value="list_paired"> + <param name="input_collection" collection_type="paired" type="data_collection" format="fastq,fasta,fastq.gz,fastqsanger.gz" label="Collection of Pairs" /> + </when> + </conditional> + </xml> + <xml name="input_selection_smart_seq"> + <conditional name="input_types_smart_seq" > + <param name="use" type="select" label="Input Type" > + <option value="list_single_end" >Single-end FASTQ collection</option> + <option value="list_paired_end" >Paired FASTQ collection</option> + </param> + <when value="list_single_end"> + <param name="single_end_collection" collection_type="list" type="data_collection" format="fastq,fasta,fastq.gz,fastqsanger.gz" label="List of single-end FASTQ files" /> + </when> + <when value="list_paired_end"> + <param name="paired_end_collection" collection_type="list:paired" type="data_collection" format="fastq,fasta,fastq.gz,fastqsanger.gz" label="List of paired-end FASTQ files" /> + </when> + </conditional> + </xml> + <xml name="umidedup_options"> + <option value="1MM_All" selected="true">Collapse all UMIs with 1 mismatch distance to each other</option> + <option value="1MM_Directional_UMItools" >Directional method from the UMI-tool</option> + <option value="1MM_Directional" >Directional with stringent UMI deduplication</option> + </xml> + <xml name="anchor_types"> + <option value="0">Read start</option> + <option value="1">Read end</option> + <option value="2">Adapter start</option> + <option value="3">Adapter end</option> + </xml> + <xml name="cb_match_wl_common"> + <option value="Exact" >Exact</option> + <option value="1MM" >Single match</option> + </xml> + <xml name="cb_match_wl_cellranger"> + <option value="1MM_multi" selected="true" >Multiple matches (CellRanger 2)</option> + <option value="1MM_multi_pseudocounts" >Multiple matches (CellRanger 3)</option> + <option value="1MM_multi_Nbase_pseudocounts" >Multimatching to WL is allowed for CBs with N-bases (CellRanger 3)</option> + </xml> + <xml name="solo_adapter_params"> + <param argument="--soloAdapterSequence" type="text" value="-" label="Adapter sequence to anchor barcodes." > + <sanitizer> + <valid initial="string.digits"> + <add value="-"/> + <add value="A"/> + <add value="T"/> + <add value="C"/> + <add value="G"/> + <add value="N"/> + </valid> + </sanitizer> + </param> + <param argument="--soloAdapterMismatchesNmax" type="integer" min="1" value="1" label="Maximum number of mismatches allowed in adapter sequence" /> + <param argument="--clipAdapterType" type="select" > + <option value="Hamming" selected="true" >Adapter clipping based on Hamming distance</option> + <option value="CellRanger4" >5p and 3p adapter clipping similar to CellRanger4</option> + <option value="None" >No adapter clipping</option> + </param> + </xml> </macros>
--- a/data_manager/rna_star_index_builder.py Wed Aug 05 13:36:20 2020 +0000 +++ b/data_manager/rna_star_index_builder.py Fri Sep 10 16:42:21 2021 +0000 @@ -41,7 +41,8 @@ ] } } - open(args.config_file, 'w').write(json.dumps(data_manager_dict, sort_keys=True)) + with open(args.config_file, 'w') as fh: + json.dump(data_manager_dict, fh, sort_keys=True) if __name__ == "__main__":
--- a/tool-data/rnastar_index2x_versioned.loc.sample Wed Aug 05 13:36:20 2020 +0000 +++ b/tool-data/rnastar_index2x_versioned.loc.sample Fri Sep 10 16:42:21 2021 +0000 @@ -2,7 +2,7 @@ #to use a directory of rna-star indexed sequences data files. #You will need to create these data files and then create a #rnastar_index2x_versioned.loc file similar to this one (store it in this -directory) that points to the directories in which those files are stored. +#directory) that points to the directories in which those files are stored. #The rnastar_index2x_versioned.loc file has this format (longer white space #characters are TAB characters): #