Mercurial > repos > iuc > data_manager_star_index_builder
changeset 8:d2f4bfcf269a draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_star_index_builder commit 57f71aa633a43ab02bbf05acd0c6d7f406e01f1e"
author | iuc |
---|---|
date | Thu, 28 Nov 2019 20:32:41 +0000 |
parents | 9a231bb19a3b |
children | 5750f6d0d369 |
files | data_manager/macros.xml data_manager/rna_star_index_builder.py data_manager/rna_star_index_builder.xml data_manager_conf.xml test-data/all_fasta.loc test-data/phiX174.fasta test-data/rnastar_index2_versioned.loc test-data/test_star_01.data_manager_json tool_data_table_conf.xml.test |
diffstat | 8 files changed, 171 insertions(+), 39 deletions(-) [+] |
line wrap: on
line diff
--- a/data_manager/macros.xml Thu Aug 15 11:28:53 2019 -0400 +++ b/data_manager/macros.xml Thu Nov 28 20:32:41 2019 +0000 @@ -1,10 +1,11 @@ <macros> <!-- REMEMBER to bump the version of rna_star_index_builder_data_manager whenever you make changes to the following two version tokens! - The data manager uses a symlink to this macro file to keep the versions in - sync. --> + The data manager uses a symlink to this macro file to keep the STAR and + the index versions in sync, but you should manually adjust the +galaxy + version number. --> <!-- STAR version to be used --> - <token name="@VERSION@">2.7.2a</token> + <token name="@VERSION@">2.7.2b</token> <!-- STAR index version compatible with this version of STAR This is the STAR version that introduced the index structure expected by the current version. @@ -18,10 +19,11 @@ <requirements> <requirement type="package" version="@VERSION@">star</requirement> <requirement type="package" version="1.9">samtools</requirement> + <yield /> </requirements> </xml> - <xml name="index_selection" token_with_gene_model="1"> + <xml name="index_selection" token_with_gene_model="0"> <param argument="--genomeDir" name="genomeDir" type="select" label="Select reference genome" help="If your genome of interest is not listed, contact the Galaxy team"> @@ -88,7 +90,7 @@ && #end if ]]></token> - <token name="@REFGENOMEHANDLING" ><![CDATA[ + <token name="@REFGENOMEHANDLING@" ><![CDATA[ --runThreadN \${GALAXY_SLOTS:-4} --genomeLoad NoSharedMemory --genomeDir @@ -97,7 +99,7 @@ #else: '${refGenomeSource.GTFconditional.genomeDir.fields.path}' ## Handle difference between indices with/without annotations - #if str($refGenomeSource.GTFconditional.GTFselect) == 'with-gtf': + #if str($refGenomeSource.GTFconditional.GTFselect) == 'without-gtf': #if $refGenomeSource.GTFconditional.sjdbGTFfile: --sjdbOverhang $refGenomeSource.GTFconditional.sjdbOverhang --sjdbGTFfile '${refGenomeSource.GTFconditional.sjdbGTFfile}' @@ -114,6 +116,7 @@ <regex match="EXITING: FATAL INPUT ERROR:" source="both" level="fatal"/> <regex match="EXITING: fatal error trying to allocate genome arrays, exception thrown: std::bad_alloc" source="both" level="fatal"/> <regex match="\[sam_read1\] missing header\? Abort!" source="both" level="fatal"/> + <yield /> </stdio> </xml> <xml name="refgenomehandling" > @@ -127,16 +130,16 @@ <param name="GTFselect" type="select" label="Reference genome with or without an annotation" help="Select the '... with builtin gene-model' option to select from the list of available indexes that were built with splice junction information. Select the '... without builtin gene-model' option to select from the list of available indexes without annotated splice junctions."> - <option value="without-gtf">use genome reference with builtin gene-model</option> - <option value="with-gtf">use genome reference without builtin gene-model</option> + <option value="without-gtf" selected='true'>use genome reference without builtin gene-model</option> + <option value="with-gtf">use genome reference with builtin gene-model</option> </param> <when value="with-gtf"> + <expand macro="index_selection" with_gene_model="1" /> + </when> + <when value="without-gtf"> <expand macro="index_selection" with_gene_model="0" /> <expand macro="@SJDBOPTIONS@" /> </when> - <when value="without-gtf"> - <expand macro="index_selection" with_gene_model="1" /> - </when> </conditional> </when> <when value="history">
--- a/data_manager/rna_star_index_builder.py Thu Aug 15 11:28:53 2019 -0400 +++ b/data_manager/rna_star_index_builder.py Thu Nov 28 20:32:41 2019 +0000 @@ -6,26 +6,42 @@ def main(): parser = argparse.ArgumentParser() - parser.add_argument( '--config-file' ) - parser.add_argument( '--value' ) - parser.add_argument( '--dbkey' ) - parser.add_argument( '--name' ) - parser.add_argument( '--subdir' ) - parser.add_argument( '--data-table' ) - parser.add_argument( '--with-gene-model', action='store_true' ) - parser.add_argument( '--index-version' ) + parser.add_argument('--config-file') + parser.add_argument('--value') + parser.add_argument('--dbkey') + parser.add_argument('--name') + parser.add_argument('--subdir') + parser.add_argument('--data-table') + parser.add_argument('--with-gene-model', action='store_true') + parser.add_argument('--index-version') args = parser.parse_args() - if args.dbkey in [ None, '', '?' ]: - raise Exception( '"%s" is not a valid dbkey. You must specify a valid dbkey.' % ( args.dbkey ) ) + if args.dbkey in [None, '', '?']: + raise Exception( + '"%s" is not a valid dbkey. You must specify a valid dbkey.' + % (args.dbkey) + ) with_gene_model = "0" if args.with_gene_model: with_gene_model = "1" - data_manager_dict = {'data_tables': {args.data_table: [dict({"value": args.value, "dbkey": args.dbkey, "name": args.name, "path": args.subdir, "with_gene_model": with_gene_model, "version": args.index_version} )]}} - open( args.config_file, 'w' ).write( json.dumps( data_manager_dict ) ) + data_manager_dict = { + 'data_tables': { + args.data_table: [ + { + "value": args.value, + "dbkey": args.dbkey, + "name": args.name, + "path": args.subdir, + "with_gene_model": with_gene_model, + "version": args.index_version + } + ] + } + } + open(args.config_file, 'w').write(json.dumps(data_manager_dict, sort_keys=True)) if __name__ == "__main__":
--- a/data_manager/rna_star_index_builder.xml Thu Aug 15 11:28:53 2019 -0400 +++ b/data_manager/rna_star_index_builder.xml Thu Nov 28 20:32:41 2019 +0000 @@ -1,11 +1,13 @@ -<tool id="rna_star_index_builder_data_manager" name="rnastar index2" tool_type="manage_data" version="2.7.1a" profile="17.01"> +<tool id="rna_star_index_builder_data_manager" name="rnastar index2" tool_type="manage_data" version="@IDX_VERSION@+galaxy2" profile="19.05"> <description>builder</description> <macros> <import>macros.xml</import> </macros> - <expand macro="requirements" /> + <expand macro="requirements"> + <requirement type="package" version="3.7">python</requirement> + </expand> <command><![CDATA[ if [ -z "\$GALAXY_MEMORY_MB" ] ; then @@ -14,17 +16,16 @@ GALAXY_MEMORY_BYTES=\$((GALAXY_MEMORY_MB * 1000000)) ; fi ; -#import json, os -#set params = json.loads(open(str($out_file)).read()) -#set target_directory = $params['output_data'][0]['extra_files_path'].encode('ascii', 'replace') -#set subdir = os.path.basename(target_directory) +#import os +#set $target_directory = str($out_file.extra_files_path) +#set $subdir = os.path.basename($target_directory) -mkdir -p '${target_directory}/${subdir}' && +mkdir '${target_directory}' && STAR --runMode genomeGenerate --genomeFastaFiles '${all_fasta_source.fields.path}' ---genomeDir '${target_directory}/${subdir}' +--genomeDir '${target_directory}' --limitGenomeGenerateRAM \${GALAXY_MEMORY_BYTES} #if $GTFconditional.GTFselect == "withGTF": --sjdbGTFfile '${GTFconditional.sjdbGTFfile}' @@ -100,25 +101,26 @@ <data name="out_file" format="data_manager_json"/> </outputs> - <!-- not available in planemo at the moment of writing <tests> <test> - <param name="all_fasta_source" value="phiX.fa"/> + <param name="all_fasta_source" value="phiX174"/> <param name="sequence_name" value="phiX"/> <param name="sequence_id" value="minimal-settings"/> <param name="modelformat" value="None"/> - <output name="out_file" file="test_star_01.data_manager_json"/> + <output name="out_file" file="test_star_01.data_manager_json" compare="re_match"/> </test> </tests> - --> <help><![CDATA[ .. class:: infomark *What it does* -This is a Galaxy datamanager for the rna STAR gap-aware RNA aligner. +This is a Galaxy data manager tool for the gap-aware RNA aligner STAR. + +This version of the tool builds STAR indices of the format first introduced +with STAR version @IDX_VERSION@. Please read the fine manual - that and the google group are the places to learn about the options above.
--- a/data_manager_conf.xml Thu Aug 15 11:28:53 2019 -0400 +++ b/data_manager_conf.xml Thu Nov 28 20:32:41 2019 +0000 @@ -1,6 +1,6 @@ <?xml version="1.0"?> <data_managers> - <data_manager tool_file="data_manager/rna_star_index_builder.xml" id="rna_star_index_builder"> + <data_manager tool_file="data_manager/rna_star_index_builder.xml" id="rna_star_index_builder" version="0.0.6"> <data_table name="rnastar_index2_versioned"> <output> <column name="value" /> @@ -12,9 +12,9 @@ out_file.extra_files_path is used as base by default if no source, eg for type=directory, then refers to base --> - <target base="${GALAXY_DATA_MANAGER_DATA_PATH}">rnastar/${version}/${dbkey}/${value}</target> + <target base="${GALAXY_DATA_MANAGER_DATA_PATH}">rnastar/${version}/${dbkey}/${value}/${path}</target> </move> - <value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/rnastar/${version}/${dbkey}/${value}</value_translation> + <value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/rnastar/${version}/${dbkey}/${value}/${path}</value_translation> <value_translation type="function">abspath</value_translation> </column> <column name="with_gene_model" />
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/all_fasta.loc Thu Nov 28 20:32:41 2019 +0000 @@ -0,0 +1,19 @@ +#This file lists the locations and dbkeys of all the fasta files +#under the "genome" directory (a directory that contains a directory +#for each build). The script extract_fasta.py will generate the file +#all_fasta.loc. This file has the format (white space characters are +#TAB characters): +# +#<unique_build_id> <dbkey> <display_name> <file_path> +# +#So, all_fasta.loc could look something like this: +# +#apiMel3 apiMel3 Honeybee (Apis mellifera): apiMel3 /path/to/genome/apiMel3/apiMel3.fa +#hg19canon hg19 Human (Homo sapiens): hg19 Canonical /path/to/genome/hg19/hg19canon.fa +#hg19full hg19 Human (Homo sapiens): hg19 Full /path/to/genome/hg19/hg19full.fa +# +#Your all_fasta.loc file should contain an entry for each individual +#fasta file. So there will be multiple fasta files for each build, +#such as with hg19 above. +# +phiX174 phiX174 phiX174 ${__HERE__}/phiX174.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/phiX174.fasta Thu Nov 28 20:32:41 2019 +0000 @@ -0,0 +1,79 @@ +>phiX174 +GAGTTTTATCGCTTCCATGACGCAGAAGTTAACACTTTCGGATATTTCTGATGAGTCGAAAAATTATCTT +GATAAAGCAGGAATTACTACTGCTTGTTTACGAATTAAATCGAAGTGGACTGCTGGCGGAAAATGAGAAA +ATTCGACCTATCCTTGCGCAGCTCGAGAAGCTCTTACTTTGCGACCTTTCGCCATCAACTAACGATTCTG +TCAAAAACTGACGCGTTGGATGAGGAGAAGTGGCTTAATATGCTTGGCACGTTCGTCAAGGACTGGTTTA +GATATGAGTCACATTTTGTTCATGGTAGAGATTCTCTTGTTGACATTTTAAAAGAGCGTGGATTACTATC +TGAGTCCGATGCTGTTCAACCACTAATAGGTAAGAAATCATGAGTCAAGTTACTGAACAATCCGTACGTT +TCCAGACCGCTTTGGCCTCTATTAAGCTCATTCAGGCTTCTGCCGTTTTGGATTTAACCGAAGATGATTT +CGATTTTCTGACGAGTAACAAAGTTTGGATTGCTACTGACCGCTCTCGTGCTCGTCGCTGCGTTGAGGCT +TGCGTTTATGGTACGCTGGACTTTGTGGGATACCCTCGCTTTCCTGCTCCTGTTGAGTTTATTGCTGCCG +TCATTGCTTATTATGTTCATCCCGTCAACATTCAAACGGCCTGTCTCATCATGGAAGGCGCTGAATTTAC +GGAAAACATTATTAATGGCGTCGAGCGTCCGGTTAAAGCCGCTGAATTGTTCGCGTTTACCTTGCGTGTA +CGCGCAGGAAACACTGACGTTCTTACTGACGCAGAAGAAAACGTGCGTCAAAAATTACGTGCAGAAGGAG +TGATGTAATGTCTAAAGGTAAAAAACGTTCTGGCGCTCGCCCTGGTCGTCCGCAGCCGTTGCGAGGTACT +AAAGGCAAGCGTAAAGGCGCTCGTCTTTGGTATGTAGGTGGTCAACAATTTTAATTGCAGGGGCTTCGGC +CCCTTACTTGAGGATAAATTATGTCTAATATTCAAACTGGCGCCGAGCGTATGCCGCATGACCTTTCCCA +TCTTGGCTTCCTTGCTGGTCAGATTGGTCGTCTTATTACCATTTCAACTACTCCGGTTATCGCTGGCGAC +TCCTTCGAGATGGACGCCGTTGGCGCTCTCCGTCTTTCTCCATTGCGTCGTGGCCTTGCTATTGACTCTA +CTGTAGACATTTTTACTTTTTATGTCCCTCATCGTCACGTTTATGGTGAACAGTGGATTAAGTTCATGAA +GGATGGTGTTAATGCCACTCCTCTCCCGACTGTTAACACTACTGGTTATATTGACCATGCCGCTTTTCTT +GGCACGATTAACCCTGATACCAATAAAATCCCTAAGCATTTGTTTCAGGGTTATTTGAATATCTATAACA +ACTATTTTAAAGCGCCGTGGATGCCTGACCGTACCGAGGCTAACCCTAATGAGCTTAATCAAGATGATGC +TCGTTATGGTTTCCGTTGCTGCCATCTCAAAAACATTTGGACTGCTCCGCTTCCTCCTGAGACTGAGCTT +TCTCGCCAAATGACGACTTCTACCACATCTATTGACATTATGGGTCTGCAAGCTGCTTATGCTAATTTGC +ATACTGACCAAGAACGTGATTACTTCATGCAGCGTTACCGTGATGTTATTTCTTCATTTGGAGGTAAAAC +CTCTTATGACGCTGACAACCGTCCTTTACTTGTCATGCGCTCTAATCTCTGGGCATCTGGCTATGATGTT +GATGGAACTGACCAAACGTCGTTAGGCCAGTTTTCTGGTCGTGTTCAACAGACCTATAAACATTCTGTGC +CGCGTTTCTTTGTTCCTGAGCATGGCACTATGTTTACTCTTGCGCTTGTTCGTTTTCCGCCTACTGCGAC +TAAAGAGATTCAGTACCTTAACGCTAAAGGTGCTTTGACTTATACCGATATTGCTGGCGACCCTGTTTTG +TATGGCAACTTGCCGCCGCGTGAAATTTCTATGAAGGATGTTTTCCGTTCTGGTGATTCGTCTAAGAAGT +TTAAGATTGCTGAGGGTCAGTGGTATCGTTATGCGCCTTCGTATGTTTCTCCTGCTTATCACCTTCTTGA +AGGCTTCCCATTCATTCAGGAACCGCCTTCTGGTGATTTGCAAGAACGCGTACTTATTCGCCACCATGAT +TATGACCAGTGTTTCCAGTCCGTTCAGTTGTTGCAGTGGAATAGTCAGGTTAAATTTAATGTGACCGTTT +ATCGCAATCTGCCGACCACTCGCGATTCAATCATGACTTCGTGATAAAAGATTGAGTGTGAGGTTATAAC +GCCGAAGCGGTAAAAATTTTAATTTTTGCCGCTGAGGGGTTGACCAAGCGAAGCGCGGTAGGTTTTCTGC +TTAGGAGTTTAATCATGTTTCAGACTTTTATTTCTCGCCATAATTCAAACTTTTTTTCTGATAAGCTGGT +TCTCACTTCTGTTACTCCAGCTTCTTCGGCACCTGTTTTACAGACACCTAAAGCTACATCGTCAACGTTA +TATTTTGATAGTTTGACGGTTAATGCTGGTAATGGTGGTTTTCTTCATTGCATTCAGATGGATACATCTG +TCAACGCCGCTAATCAGGTTGTTTCTGTTGGTGCTGATATTGCTTTTGATGCCGACCCTAAATTTTTTGC +CTGTTTGGTTCGCTTTGAGTCTTCTTCGGTTCCGACTACCCTCCCGACTGCCTATGATGTTTATCCTTTG +AATGGTCGCCATGATGGTGGTTATTATACCGTCAAGGACTGTGTGACTATTGACGTCCTTCCCCGTACGC +CGGGCAATAATGTTTATGTTGGTTTCATGGTTTGGTCTAACTTTACCGCTACTAAATGCCGCGGATTGGT +TTCGCTGAATCAGGTTATTAAAGAGATTATTTGTCTCCAGCCACTTAAGTGAGGTGATTTATGTTTGGTG +CTATTGCTGGCGGTATTGCTTCTGCTCTTGCTGGTGGCGCCATGTCTAAATTGTTTGGAGGCGGTCAAAA +AGCCGCCTCCGGTGGCATTCAAGGTGATGTGCTTGCTACCGATAACAATACTGTAGGCATGGGTGATGCT +GGTATTAAATCTGCCATTCAAGGCTCTAATGTTCCTAACCCTGATGAGGCCGCCCCTAGTTTTGTTTCTG +GTGCTATGGCTAAAGCTGGTAAAGGACTTCTTGAAGGTACGTTGCAGGCTGGCACTTCTGCCGTTTCTGA +TAAGTTGCTTGATTTGGTTGGACTTGGTGGCAAGTCTGCCGCTGATAAAGGAAAGGATACTCGTGATTAT +CTTGCTGCTGCATTTCCTGAGCTTAATGCTTGGGAGCGTGCTGGTGCTGATGCTTCCTCTGCTGGTATGG +TTGACGCCGGATTTGAGAATCAAAAAGAGCTTACTAAAATGCAACTGGACAATCAGAAAGAGATTGCCGA +GATGCAAAATGAGACTCAAAAAGAGATTGCTGGCATTCAGTCGGCGACTTCACGCCAGAATACGAAAGAC +CAGGTATATGCACAAAATGAGATGCTTGCTTATCAACAGAAGGAGTCTACTGCTCGCGTTGCGTCTATTA +TGGAAAACACCAATCTTTCCAAGCAACAGCAGGTTTCCGAGATTATGCGCCAAATGCTTACTCAAGCTCA +AACGGCTGGTCAGTATTTTACCAATGACCAAATCAAAGAAATGACTCGCAAGGTTAGTGCTGAGGTTGAC +TTAGTTCATCAGCAAACGCAGAATCAGCGGTATGGCTCTTCTCATATTGGCGCTACTGCAAAGGATATTT +CTAATGTCGTCACTGATGCTGCTTCTGGTGTGGTTGATATTTTTCATGGTATTGATAAAGCTGTTGCCGA +TACTTGGAACAATTTCTGGAAAGACGGTAAAGCTGATGGTATTGGCTCTAATTTGTCTAGGAAATAACCG +TCAGGATTGACACCCTCCCAATTGTATGTTTTCATGCCTCCAAATCTTGGAGGCTTTTTTATGGTTCGTT +CTTATTACCCTTCTGAATGTCACGCTGATTATTTTGACTTTGAGCGTATCGAGGCTCTTAAACCTGCTAT +TGAGGCTTGTGGCATTTCTACTCTTTCTCAATCCCCAATGCTTGGCTTCCATAAGCAGATGGATAACCGC +ATCAAGCTCTTGGAAGAGATTCTGTCTTTTCGTATGCAGGGCGTTGAGTTCGATAATGGTGATATGTATG +TTGACGGCCATAAGGCTGCTTCTGACGTTCGTGATGAGTTTGTATCTGTTACTGAGAAGTTAATGGATGA +ATTGGCACAATGCTACAATGTGCTCCCCCAACTTGATATTAATAACACTATAGACCACCGCCCCGAAGGG +GACGAAAAATGGTTTTTAGAGAACGAGAAGACGGTTACGCAGTTTTGCCGCAAGCTGGCTGCTGAACGCC +CTCTTAAGGATATTCGCGATGAGTATAATTACCCCAAAAAGAAAGGTATTAAGGATGAGTGTTCAAGATT +GCTGGAGGCCTCCACTATGAAATCGCGTAGAGGCTTTACTATTCAGCGTTTGATGAATGCAATGCGACAG +GCTCATGCTGATGGTTGGTTTATCGTTTTTGACACTCTCACGTTGGCTGACGACCGATTAGAGGCGTTTT +ATGATAATCCCAATGCTTTGCGTGACTATTTTCGTGATATTGGTCGTATGGTTCTTGCTGCCGAGGGTCG +CAAGGCTAATGATTCACACGCCGACTGCTATCAGTATTTTTGTGTGCCTGAGTATGGTACAGCTAATGGC +CGTCTTCATTTCCATGCGGTGCATTTTATGCGGACACTTCCTACAGGTAGCGTTGACCCTAATTTTGGTC +GTCGGGTACGCAATCGCCGCCAGTTAAATAGCTTGCAAAATACGTGGCCTTATGGTTACAGTATGCCCAT +CGCAGTTCGCTACACGCAGGACGCTTTTTCACGTTCTGGTTGGTTGTGGCCTGTTGATGCTAAAGGTGAG +CCGCTTAAAGCTACCAGTTATATGGCTGTTGGTTTCTATGTGGCTAAATACGTTAACAAAAAGTCAGATA +TGGACCTTGCTGCTAAAGGTCTAGGAGCTAAAGAATGGAACAACTCACTAAAAACCAAGCTGTCGCTACT +TCCCAAGAAGCTGTTCAGAATCAGAATGAGCCGCAACTTCGGGATGAAAATGCTCACAATGACAAATCTG +TCCACGGAGTGCTTAATCCAACTTACCAAGCTGGGTTACGACGCGACGCCGTTCAACCAGATATTGAAGC +AGAACGCAAAAAGAGAGATGAGATTGAGGCTGGGAAAAGTTACTGTAGCCGACGTTTTGGCGGCGCAACC +TGTGACGACAAATCTGCTCAAATTTATGCGCGCTTCGATAAAAATGATTGGCGTATCCAACCTGCA +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_star_01.data_manager_json Thu Nov 28 20:32:41 2019 +0000 @@ -0,0 +1,1 @@ +{"data_tables": {"rnastar_index2_versioned": \[{"dbkey": "phiX174", "name": "phiX174", "path": ".*", "value": "phiX174", "version": "2.7.1a", "with_gene_model": "0"}\]}} \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.test Thu Nov 28 20:32:41 2019 +0000 @@ -0,0 +1,12 @@ +<tables> + <!-- Locations of all fasta files under genome directory --> + <table name="all_fasta" comment_char="#"> + <columns>value, dbkey, name, path</columns> + <file path="${__HERE__}/test-data/all_fasta.loc" /> + </table> + <!-- Locations of STAR indexes --> + <table name="rnastar_index2_versioned" comment_char="#" allow_duplicate_entries="False"> + <columns>value, dbkey, name, path, with_gene_model, version</columns> + <file path="${__HERE__}/test-data/rnastar_index2_versioned.loc" /> + </table> +</tables>