# HG changeset patch
# User peterjc
# Date 1475073824 14400
# Node ID f6ba0f12cca2d93b20f39a6fa060556363b3a037
# Parent 74391fc6e3f2a688f1192518eed60dbc6ef2ae3d
Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013
diff -r 74391fc6e3f2 -r f6ba0f12cca2 datatypes_conf.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/datatypes_conf.xml Wed Sep 28 10:43:44 2016 -0400
@@ -0,0 +1,24 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff -r 74391fc6e3f2 -r f6ba0f12cca2 gmap.xml
--- a/gmap.xml Fri Oct 05 13:08:43 2012 -0500
+++ b/gmap.xml Wed Sep 28 10:43:44 2016 -0400
@@ -1,7 +1,7 @@
-
+
Genomic Mapping and Alignment Program for mRNA and EST sequences
- gmap
+ gmap
gmap --version
@@ -11,8 +11,7 @@
#if $refGenomeSource.genomeSource == "history":
--gseg=$refGenomeSource.ownFile
#elif $refGenomeSource.genomeSource == "gmapdb":
- #set $gmapdb = $os.listdir($refGenomeSource.gmapdb.extra_files_path)[0]
- --dir=$refGenomeSource.gmapdb.extra_files_path --db=$gmapdb
+ --dir=$refGenomeSource.gmapdb.extra_files_path --db=$refGenomeSource.gmapdb.metadata.db_name
#if $refGenomeSource.kmer != None and len($refGenomeSource.kmer.__str__) == 2:
--kmer=$refGenomeSource.kmer
#end if
@@ -43,6 +42,9 @@
#elif $result.format == "sam":
--format=$result.sam_paired_read
$result.no_sam_headers
+ $result.sam_use_0M
+ $result.force_xs_dir
+ $result.md_lowercase_snp
#* Removed in gmap version 2011-11-30
#if len($result.noncanonical_splices.__str__) > 0
--noncanonical-splices=$result.noncanonical_splices
@@ -65,6 +67,7 @@
#end if
#if $computation.options == "advanced":
$computation.nosplicing
+ $computation.find_shifted_canonical
$computation.cross_species
#if len($computation.min_intronlength.__str__) > 0
--min-intronlength=$computation.min_intronlength
@@ -170,6 +173,15 @@
+
@@ -224,7 +236,7 @@
+ help=" default is 40, To turn off, set to 0" >
@@ -237,6 +249,7 @@
+
@@ -328,7 +341,7 @@
-
+
@@ -353,7 +366,7 @@
-
+
@@ -369,6 +382,11 @@
+
+
+
@@ -383,6 +401,18 @@
print-comment Show comment line for each hit
-->
+
+
+
diff -r 74391fc6e3f2 -r f6ba0f12cca2 gmap_build.xml
--- a/gmap_build.xml Fri Oct 05 13:08:43 2012 -0500
+++ b/gmap_build.xml Wed Sep 28 10:43:44 2016 -0400
@@ -1,10 +1,10 @@
-
+
a database genome index for GMAP and GSNAP
- gmap
+ gmap
gmap --version
- /bin/bash $shscript 2>1 1> $output
+ /bin/bash $shscript > $output
@@ -15,12 +15,17 @@
-
-
-
-
-
+
+
+
+
+
+
+
+
+
@@ -51,6 +56,7 @@
+
@@ -77,8 +83,33 @@
sequence, that SNP will be ignored in subsequent processing as a probable error.
The N stands for any other allele." />
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
@@ -471,8 +479,11 @@
-
+
+
+ probably don't have enough specificity for terminal alignments anyway.
+ To turn off terminal alignments, set this to a high value, greater
+ than the value for max-mismatches.
+ "/>
@@ -497,9 +520,10 @@
-
-
+ help="Default is 'off'. To turn on, specify 'paired', which removes adapters
+ from paired-end reads if they appear to be present.">
+
+
@@ -523,6 +547,7 @@
+
diff -r 74391fc6e3f2 -r f6ba0f12cca2 iit_store.xml
--- a/iit_store.xml Fri Oct 05 13:08:43 2012 -0500
+++ b/iit_store.xml Wed Sep 28 10:43:44 2016 -0400
@@ -1,7 +1,7 @@
-
+
Create a map store for known genes or SNPs
- gmap
+ gmap
iit_store --version
/bin/bash $shscript 2> $log
diff -r 74391fc6e3f2 -r f6ba0f12cca2 lib/galaxy/datatypes/gmap.py
--- a/lib/galaxy/datatypes/gmap.py Fri Oct 05 13:08:43 2012 -0500
+++ b/lib/galaxy/datatypes/gmap.py Wed Sep 28 10:43:44 2016 -0400
@@ -2,7 +2,7 @@
GMAP indexes
"""
import logging
-import os,os.path,re
+import os,os.path,re,sys
import galaxy.datatypes.data
from galaxy.datatypes.data import Text
from galaxy import util
@@ -15,11 +15,14 @@
A GMAP DB for indexes
"""
MetadataElement( name="db_name", desc="The db name for this index set", default='unknown', set_in_upload=True, readonly=True )
+ MetadataElement( name="chromosomes", desc="The chromosomes or contigs", no_value=[], readonly=False )
+ MetadataElement( name="circular", desc="cirular chromosomes", no_value=[], readonly=False )
+ MetadataElement( name="chromlength", desc="Chromosome lengths", no_value=[], readonly=False )
MetadataElement( name="basesize", default="12", desc="The basesize for offsetscomp", visible=True, readonly=True )
- MetadataElement( name="kmers", default=[''], desc="The kmer sizes for indexes", visible=True, no_value=[''], readonly=True )
+ MetadataElement( name="kmers", desc="The kmer sizes for indexes", visible=True, no_value=[''], readonly=True )
MetadataElement( name="map_dir", desc="The maps directory", default='unknown', set_in_upload=True, readonly=True )
- MetadataElement( name="maps", default=[''], desc="The names of maps stored for this gmap gmapdb", visible=True, no_value=[''], readonly=True )
- MetadataElement( name="snps", default=[''], desc="The names of SNP indexes stored for this gmapdb", visible=True, no_value=[''], readonly=True )
+ MetadataElement( name="maps", desc="The names of maps stored for this gmap gmapdb", visible=True, no_value=[''], readonly=True )
+ MetadataElement( name="snps", desc="The names of SNP indexes stored for this gmapdb", visible=True, no_value=[''], readonly=True )
MetadataElement( name="cmet", default=False, desc="Has a cmet index", visible=True, readonly=True )
MetadataElement( name="atoi", default=False, desc="Has a atoi index", visible=True, readonly=True )
@@ -41,10 +44,24 @@
"""
bn = dataset.metadata.db_name
log.info( "GmapDB regenerate_primary_file %s" % (bn))
- rval = ['GMAPDB %sGMAPDB %s
cmet %s
atoi %sMaps:
' % (bn,bn,dataset.metadata.cmet,dataset.metadata.atoi)]
- for i,name in enumerate(dataset.metadata.maps):
- rval.append( '- %s' % name)
- rval.append( '
' )
+ rval = []
+ rval.append("GMAPDB: %s" % dataset.metadata.db_name)
+ if dataset.metadata.chromosomes:
+ rval.append("chromosomes: %s" % dataset.metadata.chromosomes)
+ if dataset.metadata.chromlength and len(dataset.metadata.chromlength) == len(dataset.metadata.chromosomes):
+ rval.append( 'chrom\tlength' )
+ for i,name in enumerate(dataset.metadata.chromosomes):
+ rval.append( '%s\t%d' % (dataset.metadata.chromosomes[i],dataset.metadata.chromlength[i]))
+ if dataset.metadata.circular:
+ rval.append("circular: %s" % dataset.metadata.circular)
+ if dataset.metadata.kmers:
+ rval.append("kmers: %s" % dataset.metadata.kmers)
+ rval.append("cmetindex: %s atoiindex: %s" % (dataset.metadata.cmet,dataset.metadata.atoi))
+ if dataset.metadata.maps and len(dataset.metadata.maps) > 0:
+ rval.append( 'Maps:')
+ for i,name in enumerate(dataset.metadata.maps):
+ if name.strip() != '':
+ rval.append( ' %s' % name)
f = file(dataset.file_name,'w')
f.write("\n".join( rval ))
f.write('\n')
@@ -53,7 +70,7 @@
def set_peek( self, dataset, is_multi_byte=False ):
log.info( "GmapDB set_peek %s" % (dataset))
if not dataset.dataset.purged:
- dataset.peek = "GMAPDB index %s\n cmet %s\n atoi %s\n maps %s" % ( dataset.metadata.db_name,dataset.metadata.cmet,dataset.metadata.atoi,dataset.metadata.maps )
+ dataset.peek = "GMAPDB index %s\n chroms %s\n kmers %s cmet %s atoi %s\n maps %s" % ( dataset.metadata.db_name,dataset.metadata.chromosomes,dataset.metadata.kmers,dataset.metadata.cmet,dataset.metadata.atoi,dataset.metadata.maps )
dataset.blurb = "GMAPDB %s" % ( dataset.metadata.db_name )
else:
dataset.peek = 'file does not exist'
@@ -68,6 +85,26 @@
return False
def set_meta( self, dataset, overwrite = True, **kwd ):
"""
+ extra_files_path//GRCh37_19
+ extra_files_path//GRCh37_19/GRCh37_19.a2iag12123offsetscomp
+ extra_files_path//GRCh37_19/GRCh37_19.a2iag123positions
+ extra_files_path//GRCh37_19/GRCh37_19.a2itc12123offsetscomp
+ extra_files_path//GRCh37_19/GRCh37_19.a2itc123positions
+ extra_files_path//GRCh37_19/GRCh37_19.chromosome
+ extra_files_path//GRCh37_19/GRCh37_19.chromosome.iit
+ extra_files_path//GRCh37_19/GRCh37_19.chrsubset
+ extra_files_path//GRCh37_19/GRCh37_19.contig
+ extra_files_path//GRCh37_19/GRCh37_19.contig.iit
+ extra_files_path//GRCh37_19/GRCh37_19.genomecomp
+ extra_files_path//GRCh37_19/GRCh37_19.maps
+ extra_files_path//GRCh37_19/GRCh37_19.metct12123offsetscomp
+ extra_files_path//GRCh37_19/GRCh37_19.metct123positions
+ extra_files_path//GRCh37_19/GRCh37_19.metga12123offsetscomp
+ extra_files_path//GRCh37_19/GRCh37_19.metga123positions
+ extra_files_path//GRCh37_19/GRCh37_19.ref12123offsetscomp
+ extra_files_path//GRCh37_19/GRCh37_19.ref123positions
+ extra_files_path//GRCh37_19/GRCh37_19.version
+
Expecting:
extra_files_path//db_name>.ref3
extra_files_path/db_name/db_name.ref1[2345]1[2345]3offsetscomp
@@ -77,7 +114,9 @@
extra_files_path/db_name/db_name.maps/*.iit
"""
log.info( "GmapDB set_meta %s %s" % (dataset,dataset.extra_files_path))
- pat = '(.*)\.((ref)|(met)[atgc][atgc]|(a2i)[atgc][atgc])((\d\d)(\d\d))?3positions(\.(.+))?'
+ chrom_pat = '^(.+).chromosome$'
+ #pat = '(.*)\.((ref)|(met)[atgc][atgc]|(a2i)[atgc][atgc])((\d\d)(\d\d))?3positions(\.(.+))?'
+ pat = '(.*)\.((ref)|(met)[atgc][atgc]|(a2i)[atgc][atgc])((\d\d)(\d\d))?(\d)(offsetscomp)'
efp = dataset.extra_files_path
flist = os.listdir(efp)
for i,fname in enumerate(flist):
@@ -85,24 +124,60 @@
fpath = os.path.join(efp,fname)
if os.path.isdir(fpath):
ilist = os.listdir(fpath)
- kmers = {'':'default'} # HACK '' empty key added so user has default choice when selecting kmer from metadata
+ # kmers = {'':'default'} # HACK '' empty key added so user has default choice when selecting kmer from metadata
+ kmers = dict()
for j,iname in enumerate(ilist):
log.info( "GmapDB set_meta file %s %s" % (j,iname))
ipath = os.path.join(fpath,iname)
+ print >> sys.stderr, "GmapDB set_meta file %s %s %s" % (j,iname,ipath)
if os.path.isdir(ipath): # find maps
dataset.metadata.map_dir = iname
+ maps = []
+ snps = []
for mapfile in os.listdir(ipath):
mapname = mapfile.replace('.iit','')
log.info( "GmapDB set_meta map %s %s" % (mapname,mapfile))
- dataset.metadata.maps.append(mapname)
+ print >> sys.stderr, "GmapDB set_meta map %s %s " % (mapname,mapfile)
+ maps.append(mapname)
+ if mapname.find('snp') >= 0:
+ snps.append(mapname)
+ if len(maps) > 0:
+ dataset.metadata.maps = maps
+ if len(snps) > 0:
+ dataset.metadata.snps = snps
else:
+ m = re.match(chrom_pat,iname)
+ if m and len(m.groups()) == 1:
+ dataset.metadata.db_name = m.groups()[0]
+ print >> sys.stderr, "GmapDB set_meta file %s %s %s" % (j,iname,ipath)
+ try:
+ fh = open(ipath)
+ dataset.metadata.chromosomes = []
+ dataset.metadata.circular = []
+ dataset.metadata.chromlength = []
+ for k,line in enumerate(fh):
+ fields = line.strip().split('\t')
+ print >> sys.stderr, "GmapDB set_meta chrom %s fields %s" % (line,fields)
+ if len(fields) > 2:
+ dataset.metadata.chromosomes.append(str(fields[0]))
+ dataset.metadata.chromlength.append(int(fields[2]))
+ if len(fields) > 3 and fields[3] == 'circular':
+ dataset.metadata.circular.append(str(fields[0]))
+ print >> sys.stderr, "GmapDB set_meta db_name %s chromosomes %s circular %s" % (dataset.metadata.db_name,dataset.metadata.chromosomes,dataset.metadata.circular)
+ except Exception, e:
+ log.info( "GmapDB set_meta error %s %s " % (iname, e))
+ print >> sys.stderr, "GmapDB set_meta file %s Error %s" % (ipath,e)
+ finally:
+ if fh:
+ fh.close()
+ continue
m = re.match(pat,iname)
if m:
log.info( "GmapDB set_meta m %s %s " % (iname, m))
+ print >> sys.stderr, "GmapDB set_meta iname %s %s" % (iname,m.groups())
assert len(m.groups()) == 10
- dataset.metadata.db_name = fname
if m.groups()[2] == 'ref':
- if m.groups()[-1] != None:
+ if m.groups()[-1] != None and m.groups()[-1] != 'offsetscomp':
dataset.metadata.snps.append(m.groups()[-1])
else:
if m.groups()[-3] != None:
@@ -115,6 +190,7 @@
elif m.groups()[4] == 'a2i':
dataset.metadata.atoi = True
dataset.metadata.kmers = kmers.keys()
+ self.regenerate_primary_file(dataset)
class GmapSnpIndex( Text ):
"""
diff -r 74391fc6e3f2 -r f6ba0f12cca2 snpindex.xml
--- a/snpindex.xml Fri Oct 05 13:08:43 2012 -0500
+++ b/snpindex.xml Wed Sep 28 10:43:44 2016 -0400
@@ -1,7 +1,7 @@
-
+
build index files for known SNPs
- gmap
+ gmap
snpindex --version
/bin/bash $shscript 2>1 1> $output
diff -r 74391fc6e3f2 -r f6ba0f12cca2 tool_dependencies.xml
--- a/tool_dependencies.xml Fri Oct 05 13:08:43 2012 -0500
+++ b/tool_dependencies.xml Wed Sep 28 10:43:44 2016 -0400
@@ -1,21 +1,6 @@
-
-
-
- wget http://research-pub.gene.com/gmap/src/gmap-gsnap-2011-11-30.tar.gz
- ./configure --prefix=bin --with-gmapdb=../gmapdb
- make
-
- bin
- $INSTALL_DIR/bin
-
-
- $INSTALL_DIR/bin
-
-
-
-
-
+
+