# HG changeset patch
# User nate
# Date 1745615164 0
# Node ID f7c16185b8e126de341cd7c09608c12b0827f2b9
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_hisat2_index_builder commit 36a598c1014c3fa9696c4bdbf13d98a9e1e528c9
diff -r 000000000000 -r f7c16185b8e1 data_manager/.hisat2_index_builder.xml.swp
Binary file data_manager/.hisat2_index_builder.xml.swp has changed
diff -r 000000000000 -r f7c16185b8e1 data_manager/hisat2_index_builder.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager/hisat2_index_builder.xml Fri Apr 25 21:06:04 2025 +0000
@@ -0,0 +1,148 @@
+
+ builder
+
+ 2.2.1
+ 0
+
+
+ hisat2
+
+ splice_sites.txt &&
+ hisat2_extract_exons.py gtf_file.gtf > exon.txt &&
+ #end if
+ #if $advanced.adv_param_select == 'yes' and $advanced.snps:
+ ln -s '${advanced.snps}' snps.tabular &&
+ #if $advanced.snps.is_of_type('vcf')
+ hisat2_extract_snps_haplotypes_VCF.py '${all_fasta_source.fields.path}' snps.tabular extracted &&
+ #else
+ hisat2_extract_snps_haplotypes_UCSC.py '${all_fasta_source.fields.path}' snps.tabular extracted &&
+ #end if
+ #end if
+
+ mkdir -p '${out_file.extra_files_path}' &&
+ ln -s '${all_fasta_source.fields.path}' '${out_file.extra_files_path}/${fasta_file_name}' &&
+ working=`pwd` &&
+ cd '${out_file.extra_files_path}' &&
+
+ hisat2-build -p \${GALAXY_SLOTS:-1}
+ #if $advanced.adv_param_select == 'yes':
+ --noauto
+ #if $advanced.snps:
+ --snp "\${working}/extracted.snp"
+ --haplotype "\${working}/extracted.haplotype"
+ #end if
+ #if $advanced.gtf_input:
+ --ss "\${working}/splice_sites.txt"
+ --exon "\${working}/exon.txt"
+ #end if
+ --bmax $advanced.bmax
+ --bmaxdivn $advanced.bmaxdivn
+ --dcv $advanced.dcv
+ --offrate $advanced.offrate
+ #end if
+ '${fasta_file_name}' '${value}' &&
+
+ cp '$dmjson' '$out_file'
+ ]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+`__ is a fast and sensitive alignment
+program for mapping next-generation sequencing reads (both DNA and RNA) against
+the general human population (as well as against a single reference genome).
+Based on an extension of BWT for graphs (`BWT `__)
+we designed and implemented a graph FM index (GFM), an original approach and
+its first implementation to the best of our knowledge. In addition to using one
+global GFM index that represents the general population, HISAT2 uses a large set
+of small GFM indexes that collectively cover the whole genome (each index
+representing a genomic region of 56 Kbp, with 55,000 indexes needed to cover
+the human population). These small indexes (called local indexes), combined
+with several alignment strategies, enable rapid and accurate alignment of
+sequencing reads. This new indexing scheme is called a Hierarchical Graph
+FM index (HGFM). In addition to spliced alignment, HISAT handles reads
+involving indels and supports a paired-end alignment mode. Multiple processors
+can be used simultaneously to achieve greater alignment speed. HISAT outputs
+alignments in `SAM `__ format, enabling
+interoperation with a large number of other tools (e.g. `SAMtools `__,
+`GATK `__)
+that use SAM. HISAT is distributed under the `GPLv3 license `__,
+and it runs on the command line under Linux, Mac OS X and Windows.
+]]>
+
+
+ 10.1038/nmeth.3317
+
+
diff -r 000000000000 -r f7c16185b8e1 data_manager_conf.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager_conf.xml Fri Apr 25 21:06:04 2025 +0000
@@ -0,0 +1,20 @@
+
+
+
+
+
+
+
+
diff -r 000000000000 -r f7c16185b8e1 test-data/all_fasta.loc
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/all_fasta.loc Fri Apr 25 21:06:04 2025 +0000
@@ -0,0 +1,19 @@
+#This file lists the locations and dbkeys of all the fasta files
+#under the "genome" directory (a directory that contains a directory
+#for each build). The script extract_fasta.py will generate the file
+#all_fasta.loc. This file has the format (white space characters are
+#TAB characters):
+#
+#
+#
+#So, all_fasta.loc could look something like this:
+#
+#apiMel3 apiMel3 Honeybee (Apis mellifera): apiMel3 /path/to/genome/apiMel3/apiMel3.fa
+#hg19canon hg19 Human (Homo sapiens): hg19 Canonical /path/to/genome/hg19/hg19canon.fa
+#hg19full hg19 Human (Homo sapiens): hg19 Full /path/to/genome/hg19/hg19full.fa
+#
+#Your all_fasta.loc file should contain an entry for each individual
+#fasta file. So there will be multiple fasta files for each build,
+#such as with hg19 above.
+#
+phiX174 phiX174 phiX174 ${__HERE__}/phiX174.fasta
diff -r 000000000000 -r f7c16185b8e1 test-data/hisat2_data_manager.1.json
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/hisat2_data_manager.1.json Fri Apr 25 21:06:04 2025 +0000
@@ -0,0 +1,12 @@
+{
+ "data_tables":{
+ "hisat2_indexes":[
+ {
+ "value": "phiX174",
+ "dbkey": "phiX174",
+ "name": "phiX174",
+ "path": "phiX174.fasta"
+ }
+ ]
+ }
+}
diff -r 000000000000 -r f7c16185b8e1 test-data/hisat2_data_manager.2.json
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/hisat2_data_manager.2.json Fri Apr 25 21:06:04 2025 +0000
@@ -0,0 +1,12 @@
+{
+ "data_tables":{
+ "hisat2_indexes":[
+ {
+ "value": "tigHai1",
+ "dbkey": "phiX174",
+ "name": "Galeocerdo cuvier",
+ "path": "phiX174.fasta"
+ }
+ ]
+ }
+}
diff -r 000000000000 -r f7c16185b8e1 test-data/hisat2_indexes.loc
diff -r 000000000000 -r f7c16185b8e1 test-data/phiX174.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/phiX174.fasta Fri Apr 25 21:06:04 2025 +0000
@@ -0,0 +1,79 @@
+>phiX174
+GAGTTTTATCGCTTCCATGACGCAGAAGTTAACACTTTCGGATATTTCTGATGAGTCGAAAAATTATCTT
+GATAAAGCAGGAATTACTACTGCTTGTTTACGAATTAAATCGAAGTGGACTGCTGGCGGAAAATGAGAAA
+ATTCGACCTATCCTTGCGCAGCTCGAGAAGCTCTTACTTTGCGACCTTTCGCCATCAACTAACGATTCTG
+TCAAAAACTGACGCGTTGGATGAGGAGAAGTGGCTTAATATGCTTGGCACGTTCGTCAAGGACTGGTTTA
+GATATGAGTCACATTTTGTTCATGGTAGAGATTCTCTTGTTGACATTTTAAAAGAGCGTGGATTACTATC
+TGAGTCCGATGCTGTTCAACCACTAATAGGTAAGAAATCATGAGTCAAGTTACTGAACAATCCGTACGTT
+TCCAGACCGCTTTGGCCTCTATTAAGCTCATTCAGGCTTCTGCCGTTTTGGATTTAACCGAAGATGATTT
+CGATTTTCTGACGAGTAACAAAGTTTGGATTGCTACTGACCGCTCTCGTGCTCGTCGCTGCGTTGAGGCT
+TGCGTTTATGGTACGCTGGACTTTGTGGGATACCCTCGCTTTCCTGCTCCTGTTGAGTTTATTGCTGCCG
+TCATTGCTTATTATGTTCATCCCGTCAACATTCAAACGGCCTGTCTCATCATGGAAGGCGCTGAATTTAC
+GGAAAACATTATTAATGGCGTCGAGCGTCCGGTTAAAGCCGCTGAATTGTTCGCGTTTACCTTGCGTGTA
+CGCGCAGGAAACACTGACGTTCTTACTGACGCAGAAGAAAACGTGCGTCAAAAATTACGTGCAGAAGGAG
+TGATGTAATGTCTAAAGGTAAAAAACGTTCTGGCGCTCGCCCTGGTCGTCCGCAGCCGTTGCGAGGTACT
+AAAGGCAAGCGTAAAGGCGCTCGTCTTTGGTATGTAGGTGGTCAACAATTTTAATTGCAGGGGCTTCGGC
+CCCTTACTTGAGGATAAATTATGTCTAATATTCAAACTGGCGCCGAGCGTATGCCGCATGACCTTTCCCA
+TCTTGGCTTCCTTGCTGGTCAGATTGGTCGTCTTATTACCATTTCAACTACTCCGGTTATCGCTGGCGAC
+TCCTTCGAGATGGACGCCGTTGGCGCTCTCCGTCTTTCTCCATTGCGTCGTGGCCTTGCTATTGACTCTA
+CTGTAGACATTTTTACTTTTTATGTCCCTCATCGTCACGTTTATGGTGAACAGTGGATTAAGTTCATGAA
+GGATGGTGTTAATGCCACTCCTCTCCCGACTGTTAACACTACTGGTTATATTGACCATGCCGCTTTTCTT
+GGCACGATTAACCCTGATACCAATAAAATCCCTAAGCATTTGTTTCAGGGTTATTTGAATATCTATAACA
+ACTATTTTAAAGCGCCGTGGATGCCTGACCGTACCGAGGCTAACCCTAATGAGCTTAATCAAGATGATGC
+TCGTTATGGTTTCCGTTGCTGCCATCTCAAAAACATTTGGACTGCTCCGCTTCCTCCTGAGACTGAGCTT
+TCTCGCCAAATGACGACTTCTACCACATCTATTGACATTATGGGTCTGCAAGCTGCTTATGCTAATTTGC
+ATACTGACCAAGAACGTGATTACTTCATGCAGCGTTACCGTGATGTTATTTCTTCATTTGGAGGTAAAAC
+CTCTTATGACGCTGACAACCGTCCTTTACTTGTCATGCGCTCTAATCTCTGGGCATCTGGCTATGATGTT
+GATGGAACTGACCAAACGTCGTTAGGCCAGTTTTCTGGTCGTGTTCAACAGACCTATAAACATTCTGTGC
+CGCGTTTCTTTGTTCCTGAGCATGGCACTATGTTTACTCTTGCGCTTGTTCGTTTTCCGCCTACTGCGAC
+TAAAGAGATTCAGTACCTTAACGCTAAAGGTGCTTTGACTTATACCGATATTGCTGGCGACCCTGTTTTG
+TATGGCAACTTGCCGCCGCGTGAAATTTCTATGAAGGATGTTTTCCGTTCTGGTGATTCGTCTAAGAAGT
+TTAAGATTGCTGAGGGTCAGTGGTATCGTTATGCGCCTTCGTATGTTTCTCCTGCTTATCACCTTCTTGA
+AGGCTTCCCATTCATTCAGGAACCGCCTTCTGGTGATTTGCAAGAACGCGTACTTATTCGCCACCATGAT
+TATGACCAGTGTTTCCAGTCCGTTCAGTTGTTGCAGTGGAATAGTCAGGTTAAATTTAATGTGACCGTTT
+ATCGCAATCTGCCGACCACTCGCGATTCAATCATGACTTCGTGATAAAAGATTGAGTGTGAGGTTATAAC
+GCCGAAGCGGTAAAAATTTTAATTTTTGCCGCTGAGGGGTTGACCAAGCGAAGCGCGGTAGGTTTTCTGC
+TTAGGAGTTTAATCATGTTTCAGACTTTTATTTCTCGCCATAATTCAAACTTTTTTTCTGATAAGCTGGT
+TCTCACTTCTGTTACTCCAGCTTCTTCGGCACCTGTTTTACAGACACCTAAAGCTACATCGTCAACGTTA
+TATTTTGATAGTTTGACGGTTAATGCTGGTAATGGTGGTTTTCTTCATTGCATTCAGATGGATACATCTG
+TCAACGCCGCTAATCAGGTTGTTTCTGTTGGTGCTGATATTGCTTTTGATGCCGACCCTAAATTTTTTGC
+CTGTTTGGTTCGCTTTGAGTCTTCTTCGGTTCCGACTACCCTCCCGACTGCCTATGATGTTTATCCTTTG
+AATGGTCGCCATGATGGTGGTTATTATACCGTCAAGGACTGTGTGACTATTGACGTCCTTCCCCGTACGC
+CGGGCAATAATGTTTATGTTGGTTTCATGGTTTGGTCTAACTTTACCGCTACTAAATGCCGCGGATTGGT
+TTCGCTGAATCAGGTTATTAAAGAGATTATTTGTCTCCAGCCACTTAAGTGAGGTGATTTATGTTTGGTG
+CTATTGCTGGCGGTATTGCTTCTGCTCTTGCTGGTGGCGCCATGTCTAAATTGTTTGGAGGCGGTCAAAA
+AGCCGCCTCCGGTGGCATTCAAGGTGATGTGCTTGCTACCGATAACAATACTGTAGGCATGGGTGATGCT
+GGTATTAAATCTGCCATTCAAGGCTCTAATGTTCCTAACCCTGATGAGGCCGCCCCTAGTTTTGTTTCTG
+GTGCTATGGCTAAAGCTGGTAAAGGACTTCTTGAAGGTACGTTGCAGGCTGGCACTTCTGCCGTTTCTGA
+TAAGTTGCTTGATTTGGTTGGACTTGGTGGCAAGTCTGCCGCTGATAAAGGAAAGGATACTCGTGATTAT
+CTTGCTGCTGCATTTCCTGAGCTTAATGCTTGGGAGCGTGCTGGTGCTGATGCTTCCTCTGCTGGTATGG
+TTGACGCCGGATTTGAGAATCAAAAAGAGCTTACTAAAATGCAACTGGACAATCAGAAAGAGATTGCCGA
+GATGCAAAATGAGACTCAAAAAGAGATTGCTGGCATTCAGTCGGCGACTTCACGCCAGAATACGAAAGAC
+CAGGTATATGCACAAAATGAGATGCTTGCTTATCAACAGAAGGAGTCTACTGCTCGCGTTGCGTCTATTA
+TGGAAAACACCAATCTTTCCAAGCAACAGCAGGTTTCCGAGATTATGCGCCAAATGCTTACTCAAGCTCA
+AACGGCTGGTCAGTATTTTACCAATGACCAAATCAAAGAAATGACTCGCAAGGTTAGTGCTGAGGTTGAC
+TTAGTTCATCAGCAAACGCAGAATCAGCGGTATGGCTCTTCTCATATTGGCGCTACTGCAAAGGATATTT
+CTAATGTCGTCACTGATGCTGCTTCTGGTGTGGTTGATATTTTTCATGGTATTGATAAAGCTGTTGCCGA
+TACTTGGAACAATTTCTGGAAAGACGGTAAAGCTGATGGTATTGGCTCTAATTTGTCTAGGAAATAACCG
+TCAGGATTGACACCCTCCCAATTGTATGTTTTCATGCCTCCAAATCTTGGAGGCTTTTTTATGGTTCGTT
+CTTATTACCCTTCTGAATGTCACGCTGATTATTTTGACTTTGAGCGTATCGAGGCTCTTAAACCTGCTAT
+TGAGGCTTGTGGCATTTCTACTCTTTCTCAATCCCCAATGCTTGGCTTCCATAAGCAGATGGATAACCGC
+ATCAAGCTCTTGGAAGAGATTCTGTCTTTTCGTATGCAGGGCGTTGAGTTCGATAATGGTGATATGTATG
+TTGACGGCCATAAGGCTGCTTCTGACGTTCGTGATGAGTTTGTATCTGTTACTGAGAAGTTAATGGATGA
+ATTGGCACAATGCTACAATGTGCTCCCCCAACTTGATATTAATAACACTATAGACCACCGCCCCGAAGGG
+GACGAAAAATGGTTTTTAGAGAACGAGAAGACGGTTACGCAGTTTTGCCGCAAGCTGGCTGCTGAACGCC
+CTCTTAAGGATATTCGCGATGAGTATAATTACCCCAAAAAGAAAGGTATTAAGGATGAGTGTTCAAGATT
+GCTGGAGGCCTCCACTATGAAATCGCGTAGAGGCTTTACTATTCAGCGTTTGATGAATGCAATGCGACAG
+GCTCATGCTGATGGTTGGTTTATCGTTTTTGACACTCTCACGTTGGCTGACGACCGATTAGAGGCGTTTT
+ATGATAATCCCAATGCTTTGCGTGACTATTTTCGTGATATTGGTCGTATGGTTCTTGCTGCCGAGGGTCG
+CAAGGCTAATGATTCACACGCCGACTGCTATCAGTATTTTTGTGTGCCTGAGTATGGTACAGCTAATGGC
+CGTCTTCATTTCCATGCGGTGCATTTTATGCGGACACTTCCTACAGGTAGCGTTGACCCTAATTTTGGTC
+GTCGGGTACGCAATCGCCGCCAGTTAAATAGCTTGCAAAATACGTGGCCTTATGGTTACAGTATGCCCAT
+CGCAGTTCGCTACACGCAGGACGCTTTTTCACGTTCTGGTTGGTTGTGGCCTGTTGATGCTAAAGGTGAG
+CCGCTTAAAGCTACCAGTTATATGGCTGTTGGTTTCTATGTGGCTAAATACGTTAACAAAAAGTCAGATA
+TGGACCTTGCTGCTAAAGGTCTAGGAGCTAAAGAATGGAACAACTCACTAAAAACCAAGCTGTCGCTACT
+TCCCAAGAAGCTGTTCAGAATCAGAATGAGCCGCAACTTCGGGATGAAAATGCTCACAATGACAAATCTG
+TCCACGGAGTGCTTAATCCAACTTACCAAGCTGGGTTACGACGCGACGCCGTTCAACCAGATATTGAAGC
+AGAACGCAAAAAGAGAGATGAGATTGAGGCTGGGAAAAGTTACTGTAGCCGACGTTTTGGCGGCGCAACC
+TGTGACGACAAATCTGCTCAAATTTATGCGCGCTTCGATAAAAATGATTGGCGTATCCAACCTGCA
+
diff -r 000000000000 -r f7c16185b8e1 tool-data/all_fasta.loc.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/all_fasta.loc.sample Fri Apr 25 21:06:04 2025 +0000
@@ -0,0 +1,18 @@
+#This file lists the locations and dbkeys of all the fasta files
+#under the "genome" directory (a directory that contains a directory
+#for each build). The script extract_fasta.py will generate the file
+#all_fasta.loc. This file has the format (white space characters are
+#TAB characters):
+#
+#
+#
+#So, all_fasta.loc could look something like this:
+#
+#apiMel3 apiMel3 Honeybee (Apis mellifera): apiMel3 /path/to/genome/apiMel3/apiMel3.fa
+#hg19canon hg19 Human (Homo sapiens): hg19 Canonical /path/to/genome/hg19/hg19canon.fa
+#hg19full hg19 Human (Homo sapiens): hg19 Full /path/to/genome/hg19/hg19full.fa
+#
+#Your all_fasta.loc file should contain an entry for each individual
+#fasta file. So there will be multiple fasta files for each build,
+#such as with hg19 above.
+#
diff -r 000000000000 -r f7c16185b8e1 tool-data/hisat2_indexes.loc.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/hisat2_indexes.loc.sample Fri Apr 25 21:06:04 2025 +0000
@@ -0,0 +1,39 @@
+# hisat2_indexes.loc.sample
+# This is a *.loc.sample file distributed with Galaxy that enables tools
+# to use a directory of indexed data files. This one is for HISAT2.
+# See the wiki: http://wiki.galaxyproject.org/Admin/NGS%20Local%20Setup
+# First create these data files and save them in your own data directory structure.
+# Then, create a hisat2_indexes.loc file to use those indexes with tools.
+# Copy this file, save it with the same name (minus the .sample),
+# follow the format examples, and store the result in this directory.
+# The file should include an one line entry for each index set.
+# The path points to the "basename" for the set, not a specific file.
+# It has four text columns seperated by TABS.
+#
+#
+#
+# So, for example, if you had sacCer3 indexes stored in:
+#
+# /depot/data2/galaxy/sacCer3/hisat2_indexes/
+#
+# containing sacCer3 genome and sacCer3.*.ht2 files, such as:
+#
+# -rw-rw-r-- 1 dave dave 12M Sep 23 13:57 sacCer3.1.ht2
+# -rw-rw-r-- 1 dave dave 2.9M Sep 23 13:57 sacCer3.2.ht2
+# -rw-rw-r-- 1 dave dave 161 Sep 23 13:57 sacCer3.3.ht2
+# -rw-rw-r-- 1 dave dave 2.9M Sep 23 13:57 sacCer3.4.ht2
+# -rw-rw-r-- 1 dave dave 7.3M Sep 23 13:57 sacCer3.5.ht2
+# -rw-rw-r-- 1 dave dave 3.0M Sep 23 13:57 sacCer3.6.ht2
+# -rw-rw-r-- 1 dave dave 128K Sep 23 13:57 sacCer3.7.ht2
+# -rw-rw-r-- 1 dave dave 32K Sep 23 13:57 sacCer3.8.ht2
+#
+# then the hisat2_indexes.loc entry could look like this:
+#
+#sacCer3 sacCer3 S. cerevisiae Apr. 2011 (SacCer_Apr2011/sacCer3) (sacCer3) /depot/data2/galaxy/hisat2_indexes/sacCer3
+#
+#More examples:
+#
+#mm10 mm10 Mouse (mm10) /depot/data2/galaxy/hisat2_indexes/mm10
+#dm3 dm3 D. melanogaster (dm3) /depot/data2/galaxy/hisat2_indexes/dm3
+#
+#
diff -r 000000000000 -r f7c16185b8e1 tool_data_table_conf.xml.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample Fri Apr 25 21:06:04 2025 +0000
@@ -0,0 +1,13 @@
+
+
+
+