Galaxy |

Changeset 2:f6ba0f12cca2 (2016-09-28)

Previous changeset 1:74391fc6e3f2 (2012-10-05) Next changeset 3:488e9d642566 (2016-09-28)

Commit message:
Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013

modified:
gmap.xml
gmap_build.xml
gsnap.xml
iit_store.xml
lib/galaxy/datatypes/gmap.py
snpindex.xml
tool_dependencies.xml

added:
datatypes_conf.xml

diff -r 74391fc6e3f2 -r f6ba0f12cca2 datatypes_conf.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/datatypes_conf.xml Wed Sep 28 10:43:44 2016 -0400

@@ -0,0 +1,24 @@
+<?xml version="1.0"?>
+<datatypes>
+    <datatype_files>
+        <datatype_file name="gmap.py"/>
+    </datatype_files>
+    <registration>
+        <datatype extension="gmapdb" type="galaxy.datatypes.gmap:GmapDB" display_in_upload="False"/>
+        <datatype extension="gmapsnpindex" type="galaxy.datatypes.gmap:GmapSnpIndex" display_in_upload="False"/>
+        <datatype extension="iit" type="galaxy.datatypes.gmap:IntervalIndexTree" display_in_upload="True"/>
+        <datatype extension="splicesites.iit" type="galaxy.datatypes.gmap:SpliceSitesIntervalIndexTree" display_in_upload="True"/>
+        <datatype extension="introns.iit" type="galaxy.datatypes.gmap:IntronsIntervalIndexTree" display_in_upload="True"/>
+        <datatype extension="snps.iit" type="galaxy.datatypes.gmap:SNPsIntervalIndexTree" display_in_upload="True"/>
+        <datatype extension="gmap_annotation" type="galaxy.datatypes.gmap:IntervalAnnotation" display_in_upload="False"/>
+        <datatype extension="gmap_splicesites" type="galaxy.datatypes.gmap:SpliceSiteAnnotation" display_in_upload="True"/>
+        <datatype extension="gmap_introns" type="galaxy.datatypes.gmap:IntronAnnotation" display_in_upload="True"/>
+        <datatype extension="gmap_snps" type="galaxy.datatypes.gmap:SNPAnnotation" display_in_upload="True"/>
+    </registration>
+    <sniffers>
+        <sniffer type="galaxy.datatypes.gmap:IntervalAnnotation"/>
+        <sniffer type="galaxy.datatypes.gmap:SpliceSiteAnnotation"/>
+        <sniffer type="galaxy.datatypes.gmap:IntronAnnotation"/>
+        <sniffer type="galaxy.datatypes.gmap:SNPAnnotation"/>
+    </sniffers>
+</datatypes>

diff -r 74391fc6e3f2 -r f6ba0f12cca2 gmap.xml
--- a/gmap.xml Fri Oct 05 13:08:43 2012 -0500
+++ b/gmap.xml Wed Sep 28 10:43:44 2016 -0400

[

@@ -1,7 +1,7 @@
-<tool id="gmap" name="GMAP" version="2.0.1">
+<tool id="gmap" name="GMAP" version="3.0.0">
   <description>Genomic Mapping and Alignment Program for mRNA and EST sequences</description>
   <requirements>
-    <requirement type="package" version="2011-11-30">gmap</requirement>
+    <requirement type="package" version="2013-05-09">gmap</requirement>
   </requirements>
   <version_string>gmap --version</version_string>
   <command>
@@ -11,8 +11,7 @@
     #if $refGenomeSource.genomeSource == "history":
       --gseg=$refGenomeSource.ownFile
     #elif $refGenomeSource.genomeSource == "gmapdb":
-      #set $gmapdb = $os.listdir($refGenomeSource.gmapdb.extra_files_path)[0]
-      --dir=$refGenomeSource.gmapdb.extra_files_path --db=$gmapdb
+      --dir=$refGenomeSource.gmapdb.extra_files_path --db=$refGenomeSource.gmapdb.metadata.db_name
       #if $refGenomeSource.kmer != None and len($refGenomeSource.kmer.__str__) == 2:
         --kmer=$refGenomeSource.kmer
       #end if
@@ -43,6 +42,9 @@
     #elif $result.format == "sam":
       --format=$result.sam_paired_read
       $result.no_sam_headers
+      $result.sam_use_0M
+      $result.force_xs_dir
+      $result.md_lowercase_snp
       #* Removed in gmap version 2011-11-30
       #if len($result.noncanonical_splices.__str__) > 0
          --noncanonical-splices=$result.noncanonical_splices
@@ -65,6 +67,7 @@
     #end if
     #if $computation.options == "advanced":
       $computation.nosplicing
+      $computation.find_shifted_canonical
       $computation.cross_species
       #if len($computation.min_intronlength.__str__) > 0
         --min-intronlength=$computation.min_intronlength
@@ -170,6 +173,15 @@
             <filter type="sort_by" column="3"/>
           </options>
         </param>
+        
         <param name="map" type="select" data_ref="gmapindex" label="Look for splicing involving known sites or known introns" help="">
           <options from_file="gmap_indices.loc">
             <column name="name" index="4"/>
@@ -224,7 +236,7 @@
          <validator type="in_range" message="totallength must be positive" min="0" />
        </param>
        <param name="chimera_margin" type="integer" value="" optional="true" label="Amount of unaligned sequence that triggers search for a chimera"
-              help=" default is 40, To turn off, set to a large value (greater than the query length)" >
+              help=" default is 40, To turn off, set to 0" >
          <validator type="in_range" message="chimera_margin must be positive" min="0" />
        </param>
        <param name="direction"  type="select" label="cDNA direction">
@@ -237,6 +249,7 @@
        <param name="trimendexons"  type="integer" value="" optional="true" label="Trim end exons with fewer than given number of matches (in nt, default 12)" >
          <validator type="in_range" message="trimendexons must be positive" min="1" />
        </param>
+       <param name="find_shifted_canonical" type="boolean" truevalue="--find-shifted-canonical-species" falsevalue="" checked="false" label="find-shifted-canonical Use a more sensitive search for canonical splicing" help=""/>
        <param name="cross_species" type="boolean" truevalue="--cross-species" falsevalue="" checked="false" label="Cross-species alignment" help="For cross-species alignments, use a more sensitive search for canonical splicing"/>

        <param name="canonical"  type="select" label="Reward for canonical and semi-canonical introns">
@@ -328,7 +341,7 @@
       <option value="splicesites">splicesites output (for GSNAP)</option>
       <option value="introns">introns output (for GSNAP)</option>
       <option value="map_exons">IIT FASTA exon map format</option>
-      <option value="map_genes">IIT FASTA map format</option>
+      <option value="map_ranges">IIT FASTA map format</option>
       <option value="coords">coords in table format</option>
       <option value="sam" selected="true">SAM format</option>
     </param>
@@ -353,7 +366,7 @@
       <when value="splicesites"/>
       <when value="introns"/>
       <when value="map_exons"/>
-      <when value="map_genes"/>
+      <when value="map_ranges"/>
       <when value="coords"/>
       <when value="sam">
         <param name="sam_paired_read" type="boolean" truevalue="sampe" falsevalue="samse" checked="false" label="SAM paired reads"/>
@@ -369,6 +382,11 @@
         <param name="read_group_name" type="text" value="" label="Value to put into read-group name (RG-SM) field"/>
         <param name="read_group_library" type="text" value="" label="Value to put into read-group library (RG-LB) field"/>
         <param name="read_group_platform" type="text" value="" label="Value to put into read-group library platform (RG-PL) field"/>
+        <param name="sam_use_0M" type="boolean" truevalue="--sam-use-0M" falsevalue="" checked="false" label="Insert 0M in CIGAR between adjacent insertions and deletions" help="Required by Picard, but can cause errors in other tools"/>
+        <param name="force_xs_dir" type="boolean" truevalue="--force-xs-dir" falsevalue="" checked="false" label="Force direction (disallow XS:A:?)"
+               help="For RNA-Seq alignments, disallows XS:A:? when the sense direction is unclear, and replaces this value arbitrarily with XS:A:+. May be useful for some programs, such as Cufflinks, that cannot handle XS:A:?.  However, if you use this flag, the reported value of XS:A:+ in these cases will not be meaningful."/>
+        <param name="md_lowercase_snp" type="boolean" truevalue="--md-lowercase-snp" falsevalue="" checked="false" label="MD lowercase SNP"
+               help="In MD string, when known SNPs are given by the -v flag, prints difference nucleotides as lower-case when they, differ from reference but match a known alternate allele"/>
       </when>
     </conditional> 

@@ -383,6 +401,18 @@
       print-comment    Show comment line for each hit
     -->

+    
+
+

   </inputs>
   <outputs>

diff -r 74391fc6e3f2 -r f6ba0f12cca2 gmap_build.xml
--- a/gmap_build.xml Fri Oct 05 13:08:43 2012 -0500
+++ b/gmap_build.xml Wed Sep 28 10:43:44 2016 -0400

b'@@ -1,10 +1,10 @@\n-<tool id="gmap_build" name="GMAP Build" version="2.0.0">\n+<tool id="gmap_build" name="GMAP Build" version="3.0.0">\n <description>a database genome index for GMAP and GSNAP</description>\n <requirements>\n- <requirement type="package" version="2011-11-30">gmap</requirement>\n+ <requirement type="package" version="2013-05-09">gmap</requirement>\n </requirements>\n <version_string>gmap --version</version_string>\n- <command interpreter="command"> /bin/bash $shscript 2>1 1> $output </command>\n+ <command interpreter="command"> /bin/bash $shscript > $output </command>\n <inputs>\n \n <param name="refname" type="text" label="Name you want to give this gmap database" help="">\n@@ -15,12 +15,17 @@\n <param name="input" type="data" format="fasta" label="reference sequence fasta" />\n </repeat>\n \n- <param name="kmer" type="select" multiple="true" force_select="true" label="kmer size" help="">\n- <option value="12">12</option>\n- <option value="13">13</option>\n- <option value="14">14</option>\n- <option value="15" selected="true">15</option>\n+ <param name="circular_chroms" type="text" value="" optional="true" label="Names of circular chromosomes" \n+ help="a list of chromosomes, separated by commas, allow GSNAP and GMAP to align reads across the ends of the chromosome">\n+ </param>\n+\n+ <param name="sort" type="select" label="Sort chromosomes" help="">\n+ <option value="none">none - use chromosomes as found in FASTA file(s)</option>\n+ <option value="alpha">alpha - sort chromosomes alphabetically (chr10 before chr 1)</option>\n+ <option value="numeric-alpha">numeric-alpha - chr1, chr1U, chr2, chrM, chrU, chrX, chrY</option>\n+ <option value="chrom">chrom - chr1, chr2, chrM, chrX, chrY, chr1U, chrU</option>\n </param> \n+\n <param name="cmetindex" type="boolean" checked="true" truevalue="yes" falsevalue="no" label="Create cmetindex to process reads from bisulfite-treated DNA"/>\n <param name="atoiindex" type="boolean" checked="true" truevalue="yes" falsevalue="no" label="Create atoiindex to process reads under RNA-editing tolerance"/>\n <conditional name="splicesite">\n@@ -51,6 +56,7 @@\n <option value="none"></option>\n <option value="snpTable">UCSC SNP Table</option>\n <option value="snpFile">GMAP SNP File</option>\n+ <option value="vcfFile">VCF File</option>\n </param>\n <when value="none"/>\n <when value="snpTable">\n@@ -77,8 +83,33 @@\n sequence, that SNP will be ignored in subsequent processing as a probable error.\n The N stands for any other allele." />\n </when>\n+ <when value="vcfFile">\n+ <param name="snps" type="data" format="vcf" optional="true" label="VCF SNPs file" \n+ help="Example: ftp://ftp.ncbi.nih.gov/snp/organisms/human_9606/VCF/00-All.vcf.gz\n+ The VCF file contains multiple versions of dbSNP, so if you want a\n+ particular version, such as 135. The vcf_iit program tries to pick \n+ a subset of SNPs that somewhat parallel\n+ the ones without exceptions in the UCSC dbSNP file. It keeps all SNPs\n+ that have been validated (marked in the VCF file as "VLD") or have a\n+ submitter link-out ("SLO"). Otherwise, it excludes SNPs that are\n+ individual genotypes ("GNO"). If none of these conditions hold, then\n+ the SNP is allowed. "/>\n+ <param name="vcf_version" type="text" value="" optional="true" label="dbSNP version" \n+ help="The VCF file contains multiple versions of dbSNP, so if you want a particular version, such as 135"/>\n+ </when>\n </conditional> \n+\n+ <param name="kmer" type="select" multiple="true" force_select="true" label="kmer size" help="Use smaller values when building i'..b'wer\n+files in VCF format. The older dbSNP files can be obtained from UCSC,\n+either from the Galaxy UCSC table browser or downloaded:\n+\n+ ftp://hgdownload.cse.ucsc.edu/goldenPath/hg18/database/snp130.txt.gz\n+\n+For versions before snp132, you may also want to exclude exceptions,\n+which will require this file:\n+\n+ ftp://hgdownload.cse.ucsc.edu/goldenPath/hg18/database/snp130Exceptions.txt.gz\n+\n+The option "-w weight" makes use of the dbSNP item weight, a value\n+from 1 to 3, where lower weight means higher confidence. Items will\n+be included if the item weight is the given value weight or less.\n+The default value of -w is 1, which is the criterion UCSC uses to\n+build its ambiguous version of the genome. To allow all item weights,\n+specify "-w 3".\n+\n+The more recent SNP data are provided in VCF format, and can be\n+retrieved like this:\n+\n+ ftp://ftp.ncbi.nih.gov/snp/organisms/human_9606/VCF/00-All.vcf.gz\n+\n+The VCF file contains multiple versions of dbSNP, so if you want a\n+particular version, such as 135, you would use the flag "-v 135". The\n+vcf_iit program tries to pick a subset of SNPs that somewhat parallel\n+the ones without exceptions in the UCSC dbSNP file. It keeps all SNPs\n+that have been validated (marked in the VCF file as "VLD") or have a\n+submitter link-out ("SLO"). Otherwise, it excludes SNPs that are\n+individual genotypes ("GNO"). If none of these conditions hold, then\n+the SNP is allowed. These rules might not be the best ones; I made\n+them up by trying to compare version 135 of the VCF data with\n+version 135 of the UCSC dbSNP data.\n+\n+**Alignment of reads from bisulfite-treated DNA in GSNAP**\n+\n+GSNAP has the ability to align reads from bisulfite-treated DNA, which\n+converts unmethylated cytosines to uracils that appear as thymines in\n+reads. GSNAP is able to identify genomic-T to read-C mismatches, \n+if a cmetindex is generated.\n+\n+**RNA-editing tolerance in GSNAP**\n+\n+Just as GSNAP has a program cmetindex and a mode called "cmet" for\n+tolerance to C-to-T changes, it can be tolerant to A-to-G changes\n+using the program atoiindex and a mode called "atoi". This mode is\n+designed to facilitate alignments that are tolerant to RNA editing\n+where A\'s are converted to I\'s, which appear as G\'s to a sequencer.\n+\n+To process reads under RNA-editing tolerance, you will first need to\n+create th atoi index.\n+\n+\n+\n+**K-mer size**\n+\n+You can control the k-mer size\n+for the genomic index with the -k flag, which can range from 12 to 15.\n+The default value for -k is 15, but this requires your machine to have\n+4 GB of RAM to build the indices. If you do not have 4 GB of RAM,\n+then you will need to reduce the value of -k or find another machine.\n+Here are the RAM requirements for building various indices::\n+\n+ k-mer of 12: 64 MB\n+ k-mer of 13: 256 MB\n+ k-mer of 14: 1 GB\n+ k-mer of 15: 4 GB\n+\n+These are the RAM requirements for building indices, but not to run\n+the GMAP/GSNAP programs once the indices are built, because the\n+genomic indices are compressed. For example, the genomic index for a\n+k-mer of 15 gives a gammaptrs file of 64 MB and an offsetscomp file of\n+about 350 MB, much smaller than the 4 GB that would otherwise be\n+required. Therefore, you may want to build your genomic index on a\n+computer with sufficient RAM, and distribute that index to be used by\n+computers with less RAM.\n+\n+The amount of compression can be controlled using the -b or -\\-basesize\n+parameter to gmap_build. By default, the value for k-mer size is 15,\n+and the value for basesize is 12. If you select a different value for\n+k-mer size, then basesize is made by default to be equal to that k-mer\n+size.\n+\n+If you want to build your genomic databases with more than one k-mer\n+size, you can re-run gmap_build with different values of -k. This\n+will overwrite only the identical files from the previous runs. You\n+can then choose the k-mer size at run-time by using the -k flag for\n+either GMAP or GSNAP.\n+\n </help>\n </tool>\n \n'

diff -r 74391fc6e3f2 -r f6ba0f12cca2 gsnap.xml
--- a/gsnap.xml Fri Oct 05 13:08:43 2012 -0500
+++ b/gsnap.xml Wed Sep 28 10:43:44 2016 -0400

[

@@ -1,7 +1,7 @@
-<tool id="gsnap" name="GSNAP" version="2.0.1">
+<tool id="gsnap" name="GSNAP" version="3.0.0">
   <description>Genomic Short-read Nucleotide Alignment Program</description>
   <requirements>
-      <requirement type="package" version="2011-11-30">gmap</requirement>
+      <requirement type="package" version="2013-05-09">gmap</requirement>
   </requirements>
   <version_string>gsnap --version</version_string>
   <command>
@@ -9,7 +9,6 @@
     gsnap
     --nthreads="4" --ordered
     #if $refGenomeSource.genomeSource == "gmapdb":
-      #set $gmapdb = $os.listdir($refGenomeSource.gmapdb.extra_files_path)[0]
       --dir=$refGenomeSource.gmapdb.extra_files_path --db=$refGenomeSource.gmapdb.metadata.db_name
     #else:
       --dir=$os.path.dirname($refGenomeSource.gmapindex.value) --db=$os.path.basename($refGenomeSource.gmapindex.value)
@@ -140,6 +139,9 @@
       #if $output.npath.__str__ != '':
         --npath=$output.npath
       #end if
+      #if $output.maxsearch.__str__ != '':
+        --maxsearch=$output.maxsearch
+      #end if
       $output.quiet_if_excessive
       $output.show_refdiff
       $output.clip_overlap
@@ -248,7 +250,13 @@
         <param name="fastq" type="data" format="fastq" label="Select a fastq dataset" />
         <conditional name="paired">
           <param name="ispaired" type="boolean" truevalue="yes" falsevalue="no" checked="false" label="Use Paired Reads?"/>
-          <when value="no"/>
+          <when value="no">
+          
+          </when>
           <when value="yes">
             <param name="fastq" type="data" format="fastq" label="Select the paired reads reverse dataset" />
             <param name="orientation" type="select" label="Orientation of paired-end reads" help="">
@@ -471,8 +479,11 @@
       <when value="default"/>
       <when value="advanced">
          <param name="max_mismatches" type="float" value="" optional="true" label="Maximum number of mismatches allowed (uses default when negative)"
-              help="Defaults to the ultrafast level of ((readlength+2)/12 - 2)).
-                    If specified between 0.0 and 1.0, then treated as a fraction
+              help="Maximum number of mismatches allowed (if not specified, then
+                    defaults to the ultrafast level of ((readlength+index_interval-1)/kmer - 2))
+                    (By default, the genome index interval is 3, but this can be changed
+                    by providing a different value for -q to gmap_build when processing the genome.)
+     If specified between 0.0 and 1.0, then treated as a fraction
                     of each read length.  Otherwise, treated as an integral number
                     of mismatches (including indel and splicing penalties)
                     For RNA-Seq, you may need to increase this value slightly
@@ -481,12 +492,24 @@
          </param>
          <param name="query_unk_mismatch" type="boolean" checked="false" truevalue="--query-unk-mismatch=1" falsevalue="" label="Count unknown (N) characters in the query as a mismatch"/>
          <param name="genome_unk_mismatch" type="boolean" checked="true" truevalue="" falsevalue="--genome-unk-mismatch=0" label="Count unknown (N) characters in the genome as a mismatch"/>
-         <param name="terminal_threshold"  type="integer" value="" optional="true" label="Threshold for searching for a terminal alignment (default 2)"
-                help="(from one end of the read to the best possible position at the other end).   For example, if this value is 2, then if GSNAP finds an exact or
+         <param name="maxsearch" type="integer" value="" optional="true" label="Maximum number of alignments to find (default 1000)"
+                help="Must be larger than paths, which is the number to report.
+                      Keeping this number large will allow for random selection among multiple alignments.
+                      Reducing this number can speed up the program. "/>
+
+         <param name="terminal_threshold"  type="integer" value="" optional="true" label="Threshold for searching for a terminal alignment"
+                help="Threshold for searching for a terminal alignment (from one end of the
+                                   read to the best possible position at the other end) (default 2
+                                   for standard, atoi-stranded, and atoi-nonstranded mode; default 100
+                                   for cmet-stranded and cmet-nonstranded mode).
+                                   For example, if this value is 2, then if GSNAP finds an exact or
                                    1-mismatch alignment, it will not try to find a terminal alignment.
                                    Note that this default value may not be low enough if you want to
                                    obtain terminal alignments for very short reads, although such reads
-                                   probably don't have enough specificity for terminal alignments anyway." />
+                                   probably don't have enough specificity for terminal alignments anyway.
+                                   To turn off terminal alignments, set this to a high value, greater
+                                   than the value for max-mismatches.
+                                   "/>
          <param name="indel_penalty"  type="integer" value="" optional="true" label="Penalty for an indel (default 2)"
                 help="Counts against mismatches allowed.  To find indels, make indel-penalty less than or equal to max-mismatches.  A value < 2 can lead to false positives at read ends" />
          <param name="indel_endlength"  type="integer" value="" optional="true" label="Minimum length at end required for indel alignments (default 4)" />
@@ -497,9 +520,10 @@
          <param name="suboptimal_levels"  type="integer" value="" optional="true" label="Report suboptimal hits beyond best hit (default 0)"
                 help="All hits with best score plus suboptimal-levels are reported" />
          <param name="adapter_strip"  type="select" label="Method for removing adapters from reads"
-                help="paired removes adapters from paired-end reads if a concordant or paired alignment cannot be found from the original read">
-           <option value="paired" selected="true">paired</option>
-           <option value="off">off</option>
+                help="Default is 'off'.  To turn on, specify 'paired', which removes adapters
+                                   from paired-end reads if they appear to be present.">
+           <option value="paired">paired</option>
+           <option value="off" selected="true">off</option>
          </param>
          <param name="trim_mismatch_score" type="integer" value="" optional="true" label="Score to use for mismatches when trimming at ends (default is -3)"
                 help="to turn off trimming, specify 0 (Warning: turning trimming off will give false positive mismatches at the ends of reads)"/>
@@ -523,6 +547,7 @@
           <param name="gmap_mode" type="select" multiple="true" optional="true" display="checkboxes" label="Cases to use GMAP for complex alignments containing multiple splices or indels"
                  help="Default: pairsearch,terminal,improve">
             <option value="pairsearch" selected="true">pairsearch</option>
+            <option value="indel_knownsplice" selected="true">indel_knownsplice</option>
             <option value="terminal" selected="true">terminal</option>
             <option value="improve" selected="true">improve</option>
           </param>

diff -r 74391fc6e3f2 -r f6ba0f12cca2 iit_store.xml
--- a/iit_store.xml Fri Oct 05 13:08:43 2012 -0500
+++ b/iit_store.xml Wed Sep 28 10:43:44 2016 -0400

@@ -1,7 +1,7 @@
-<tool id="gmap_iit_store" name="GMAP IIT" version="2.0.0">
+<tool id="gmap_iit_store" name="GMAP IIT" version="3.0.0">
   <description>Create a map store for known genes or SNPs</description>
   <requirements>
-      <requirement type="package" version="2011-11-30">gmap</requirement>
+      <requirement type="package" version="2013-05-09">gmap</requirement>
   </requirements>
   <version_string>iit_store --version</version_string>
   <command interpreter="command"> /bin/bash $shscript 2> $log </command>

diff -r 74391fc6e3f2 -r f6ba0f12cca2 lib/galaxy/datatypes/gmap.py
--- a/lib/galaxy/datatypes/gmap.py Fri Oct 05 13:08:43 2012 -0500
+++ b/lib/galaxy/datatypes/gmap.py Wed Sep 28 10:43:44 2016 -0400

[

b'@@ -2,7 +2,7 @@\n GMAP indexes\n """\n import logging\n-import os,os.path,re\n+import os,os.path,re,sys\n import galaxy.datatypes.data\n from galaxy.datatypes.data import Text\n from galaxy import util\n@@ -15,11 +15,14 @@\n A GMAP DB for indexes\n """\n MetadataElement( name="db_name", desc="The db name for this index set", default=\'unknown\', set_in_upload=True, readonly=True )\n+ MetadataElement( name="chromosomes", desc="The chromosomes or contigs", no_value=[], readonly=False )\n+ MetadataElement( name="circular", desc="cirular chromosomes", no_value=[], readonly=False )\n+ MetadataElement( name="chromlength", desc="Chromosome lengths", no_value=[], readonly=False )\n MetadataElement( name="basesize", default="12", desc="The basesize for offsetscomp", visible=True, readonly=True )\n- MetadataElement( name="kmers", default=[\'\'], desc="The kmer sizes for indexes", visible=True, no_value=[\'\'], readonly=True )\n+ MetadataElement( name="kmers", desc="The kmer sizes for indexes", visible=True, no_value=[\'\'], readonly=True )\n MetadataElement( name="map_dir", desc="The maps directory", default=\'unknown\', set_in_upload=True, readonly=True )\n- MetadataElement( name="maps", default=[\'\'], desc="The names of maps stored for this gmap gmapdb", visible=True, no_value=[\'\'], readonly=True )\n- MetadataElement( name="snps", default=[\'\'], desc="The names of SNP indexes stored for this gmapdb", visible=True, no_value=[\'\'], readonly=True )\n+ MetadataElement( name="maps", desc="The names of maps stored for this gmap gmapdb", visible=True, no_value=[\'\'], readonly=True )\n+ MetadataElement( name="snps", desc="The names of SNP indexes stored for this gmapdb", visible=True, no_value=[\'\'], readonly=True )\n MetadataElement( name="cmet", default=False, desc="Has a cmet index", visible=True, readonly=True )\n MetadataElement( name="atoi", default=False, desc="Has a atoi index", visible=True, readonly=True )\n \n@@ -41,10 +44,24 @@\n """\n bn = dataset.metadata.db_name\n log.info( "GmapDB regenerate_primary_file %s" % (bn))\n- rval = [\'<html><head><title>GMAPDB %s</title></head><p/><H3>GMAPDB %s</H3><p/>cmet %s<br>atoi %s<H4>Maps:</H4><ul>\' % (bn,bn,dataset.metadata.cmet,dataset.metadata.atoi)]\n- for i,name in enumerate(dataset.metadata.maps):\n- rval.append( \'<li>%s\' % name)\n- rval.append( \'</ul></html>\' )\n+ rval = []\n+ rval.append("GMAPDB: %s" % dataset.metadata.db_name)\n+ if dataset.metadata.chromosomes:\n+ rval.append("chromosomes: %s" % dataset.metadata.chromosomes)\n+ if dataset.metadata.chromlength and len(dataset.metadata.chromlength) == len(dataset.metadata.chromosomes):\n+ rval.append( \'chrom\\tlength\' )\n+ for i,name in enumerate(dataset.metadata.chromosomes):\n+ rval.append( \'%s\\t%d\' % (dataset.metadata.chromosomes[i],dataset.metadata.chromlength[i]))\n+ if dataset.metadata.circular:\n+ rval.append("circular: %s" % dataset.metadata.circular)\n+ if dataset.metadata.kmers:\n+ rval.append("kmers: %s" % dataset.metadata.kmers)\n+ rval.append("cmetindex: %s atoiindex: %s" % (dataset.metadata.cmet,dataset.metadata.atoi))\n+ if dataset.metadata.maps and len(dataset.metadata.maps) > 0:\n+ rval.append( \'Maps:\')\n+ for i,name in enumerate(dataset.metadata.maps):\n+ if name.strip() != \'\':\n+ rval.append( \' %s\' % name)\n f = file(dataset.file_name,\'w\')\n f.write("\\n".join( rval ))\n f.write(\'\\n\')\n@@ -53,7 +70,7 @@\n def set_peek( self, dataset, is_multi_byte=False ):\n log.info( "GmapDB set_peek %s" % (dataset))\n if not dataset.dataset.purged:\n- dataset.peek = "GMAPDB index %s\\n cmet %s\\n atoi %s\\n maps %s" % ( dataset.metadata.db_name,dataset.metadata.cmet,dataset.metadata.atoi,dataset.metadata.maps )\n+ dataset.peek = "GMAPDB index %s\\n chr'..b' % (j,iname))\n ipath = os.path.join(fpath,iname)\n+ print >> sys.stderr, "GmapDB set_meta file %s %s %s" % (j,iname,ipath)\n if os.path.isdir(ipath): # find maps\n dataset.metadata.map_dir = iname\n+ maps = []\n+ snps = []\n for mapfile in os.listdir(ipath):\n mapname = mapfile.replace(\'.iit\',\'\')\n log.info( "GmapDB set_meta map %s %s" % (mapname,mapfile))\n- dataset.metadata.maps.append(mapname)\n+ print >> sys.stderr, "GmapDB set_meta map %s %s " % (mapname,mapfile)\n+ maps.append(mapname)\n+ if mapname.find(\'snp\') >= 0: \n+ snps.append(mapname)\n+ if len(maps) > 0:\n+ dataset.metadata.maps = maps\n+ if len(snps) > 0:\n+ dataset.metadata.snps = snps\n else: \n+ m = re.match(chrom_pat,iname)\n+ if m and len(m.groups()) == 1:\n+ dataset.metadata.db_name = m.groups()[0]\n+ print >> sys.stderr, "GmapDB set_meta file %s %s %s" % (j,iname,ipath)\n+ try:\n+ fh = open(ipath)\n+ dataset.metadata.chromosomes = []\n+ dataset.metadata.circular = []\n+ dataset.metadata.chromlength = []\n+ for k,line in enumerate(fh):\n+ fields = line.strip().split(\'\\t\')\n+ print >> sys.stderr, "GmapDB set_meta chrom %s fields %s" % (line,fields)\n+ if len(fields) > 2:\n+ dataset.metadata.chromosomes.append(str(fields[0]))\n+ dataset.metadata.chromlength.append(int(fields[2]))\n+ if len(fields) > 3 and fields[3] == \'circular\':\n+ dataset.metadata.circular.append(str(fields[0]))\n+ print >> sys.stderr, "GmapDB set_meta db_name %s chromosomes %s circular %s" % (dataset.metadata.db_name,dataset.metadata.chromosomes,dataset.metadata.circular)\n+ except Exception, e:\n+ log.info( "GmapDB set_meta error %s %s " % (iname, e))\n+ print >> sys.stderr, "GmapDB set_meta file %s Error %s" % (ipath,e)\n+ finally:\n+ if fh:\n+ fh.close()\n+ continue\n m = re.match(pat,iname)\n if m:\n log.info( "GmapDB set_meta m %s %s " % (iname, m))\n+ print >> sys.stderr, "GmapDB set_meta iname %s %s" % (iname,m.groups())\n assert len(m.groups()) == 10\n- dataset.metadata.db_name = fname\n if m.groups()[2] == \'ref\':\n- if m.groups()[-1] != None:\n+ if m.groups()[-1] != None and m.groups()[-1] != \'offsetscomp\':\n dataset.metadata.snps.append(m.groups()[-1])\n else:\n if m.groups()[-3] != None:\n@@ -115,6 +190,7 @@\n elif m.groups()[4] == \'a2i\':\n dataset.metadata.atoi = True\n dataset.metadata.kmers = kmers.keys()\n+ self.regenerate_primary_file(dataset)\n \n class GmapSnpIndex( Text ):\n """\n'

diff -r 74391fc6e3f2 -r f6ba0f12cca2 snpindex.xml
--- a/snpindex.xml Fri Oct 05 13:08:43 2012 -0500
+++ b/snpindex.xml Wed Sep 28 10:43:44 2016 -0400

@@ -1,7 +1,7 @@
-<tool id="gmap_snpindex" name="GMAP SNP Index" version="2.0.0">
+<tool id="gmap_snpindex" name="GMAP SNP Index" version="3.0.0">
   <description>build index files for known SNPs</description>
   <requirements>
-      <requirement type="package" version="2011-11-30">gmap</requirement>
+      <requirement type="package" version="2013-05-09">gmap</requirement>
   </requirements>
   <version_string>snpindex --version</version_string>
   <command interpreter="command"> /bin/bash $shscript 2>1 1> $output </command>

diff -r 74391fc6e3f2 -r f6ba0f12cca2 tool_dependencies.xml
--- a/tool_dependencies.xml Fri Oct 05 13:08:43 2012 -0500
+++ b/tool_dependencies.xml Wed Sep 28 10:43:44 2016 -0400

@@ -1,21 +1,6 @@
<?xml version="1.0"?>
<tool_dependency>
-    <package name="gmap" version="2011-11-30">
-        <install version="1.0">
-            <actions>
-                <action type="shell_command">wget http://research-pub.gene.com/gmap/src/gmap-gsnap-2011-11-30.tar.gz</action>
-                <action type="shell_command"> ./configure --prefix=bin --with-gmapdb=../gmapdb</action>
-                <action type="shell_command">make</action>
-                <action type="move_directory_files">
-                    <source_directory>bin</source_directory>
-                    <destination_directory>$INSTALL_DIR/bin</destination_directory>
-                </action>
-                <action type="set_environment">
-                    <environment_variable name="PATH" action="prepend_to">$INSTALL_DIR/bin</environment_variable>
-                </action>
-            </actions>
-        </install>
-        <readme>
-        </readme>
+    <package name="gmap" version="2013-05-09">
+        <repository changeset_revision="8bacc25e2023" name="package_gmap_2013_05_09" owner="jjohnson" toolshed="https://testtoolshed.g2.bx.psu.edu" />
     </package>
</tool_dependency>