# HG changeset patch # User peterjc # Date 1475073824 14400 # Node ID f6ba0f12cca2d93b20f39a6fa060556363b3a037 # Parent 74391fc6e3f2a688f1192518eed60dbc6ef2ae3d Untested work-in-progress GMAP wrappers v3.0.0, from JJ back in June 2013 diff -r 74391fc6e3f2 -r f6ba0f12cca2 datatypes_conf.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/datatypes_conf.xml Wed Sep 28 10:43:44 2016 -0400 @@ -0,0 +1,24 @@ + + + + + + + + + + + + + + + + + + + + + + + + diff -r 74391fc6e3f2 -r f6ba0f12cca2 gmap.xml --- a/gmap.xml Fri Oct 05 13:08:43 2012 -0500 +++ b/gmap.xml Wed Sep 28 10:43:44 2016 -0400 @@ -1,7 +1,7 @@ - + Genomic Mapping and Alignment Program for mRNA and EST sequences - gmap + gmap gmap --version @@ -11,8 +11,7 @@ #if $refGenomeSource.genomeSource == "history": --gseg=$refGenomeSource.ownFile #elif $refGenomeSource.genomeSource == "gmapdb": - #set $gmapdb = $os.listdir($refGenomeSource.gmapdb.extra_files_path)[0] - --dir=$refGenomeSource.gmapdb.extra_files_path --db=$gmapdb + --dir=$refGenomeSource.gmapdb.extra_files_path --db=$refGenomeSource.gmapdb.metadata.db_name #if $refGenomeSource.kmer != None and len($refGenomeSource.kmer.__str__) == 2: --kmer=$refGenomeSource.kmer #end if @@ -43,6 +42,9 @@ #elif $result.format == "sam": --format=$result.sam_paired_read $result.no_sam_headers + $result.sam_use_0M + $result.force_xs_dir + $result.md_lowercase_snp #* Removed in gmap version 2011-11-30 #if len($result.noncanonical_splices.__str__) > 0 --noncanonical-splices=$result.noncanonical_splices @@ -65,6 +67,7 @@ #end if #if $computation.options == "advanced": $computation.nosplicing + $computation.find_shifted_canonical $computation.cross_species #if len($computation.min_intronlength.__str__) > 0 --min-intronlength=$computation.min_intronlength @@ -170,6 +173,15 @@ + @@ -224,7 +236,7 @@ + help=" default is 40, To turn off, set to 0" > @@ -237,6 +249,7 @@ + @@ -328,7 +341,7 @@ - + @@ -353,7 +366,7 @@ - + @@ -369,6 +382,11 @@ + + + @@ -383,6 +401,18 @@ print-comment Show comment line for each hit --> + + + diff -r 74391fc6e3f2 -r f6ba0f12cca2 gmap_build.xml --- a/gmap_build.xml Fri Oct 05 13:08:43 2012 -0500 +++ b/gmap_build.xml Wed Sep 28 10:43:44 2016 -0400 @@ -1,10 +1,10 @@ - + a database genome index for GMAP and GSNAP - gmap + gmap gmap --version - /bin/bash $shscript 2>1 1> $output + /bin/bash $shscript > $output @@ -15,12 +15,17 @@ - - - - - + + + + + + + + + @@ -51,6 +56,7 @@ + @@ -77,8 +83,33 @@ sequence, that SNP will be ignored in subsequent processing as a probable error. The N stands for any other allele." /> + + + + + + + + + + + + + + + + @@ -471,8 +479,11 @@ - + + + probably don't have enough specificity for terminal alignments anyway. + To turn off terminal alignments, set this to a high value, greater + than the value for max-mismatches. + "/> @@ -497,9 +520,10 @@ - - + help="Default is 'off'. To turn on, specify 'paired', which removes adapters + from paired-end reads if they appear to be present."> + + @@ -523,6 +547,7 @@ + diff -r 74391fc6e3f2 -r f6ba0f12cca2 iit_store.xml --- a/iit_store.xml Fri Oct 05 13:08:43 2012 -0500 +++ b/iit_store.xml Wed Sep 28 10:43:44 2016 -0400 @@ -1,7 +1,7 @@ - + Create a map store for known genes or SNPs - gmap + gmap iit_store --version /bin/bash $shscript 2> $log diff -r 74391fc6e3f2 -r f6ba0f12cca2 lib/galaxy/datatypes/gmap.py --- a/lib/galaxy/datatypes/gmap.py Fri Oct 05 13:08:43 2012 -0500 +++ b/lib/galaxy/datatypes/gmap.py Wed Sep 28 10:43:44 2016 -0400 @@ -2,7 +2,7 @@ GMAP indexes """ import logging -import os,os.path,re +import os,os.path,re,sys import galaxy.datatypes.data from galaxy.datatypes.data import Text from galaxy import util @@ -15,11 +15,14 @@ A GMAP DB for indexes """ MetadataElement( name="db_name", desc="The db name for this index set", default='unknown', set_in_upload=True, readonly=True ) + MetadataElement( name="chromosomes", desc="The chromosomes or contigs", no_value=[], readonly=False ) + MetadataElement( name="circular", desc="cirular chromosomes", no_value=[], readonly=False ) + MetadataElement( name="chromlength", desc="Chromosome lengths", no_value=[], readonly=False ) MetadataElement( name="basesize", default="12", desc="The basesize for offsetscomp", visible=True, readonly=True ) - MetadataElement( name="kmers", default=[''], desc="The kmer sizes for indexes", visible=True, no_value=[''], readonly=True ) + MetadataElement( name="kmers", desc="The kmer sizes for indexes", visible=True, no_value=[''], readonly=True ) MetadataElement( name="map_dir", desc="The maps directory", default='unknown', set_in_upload=True, readonly=True ) - MetadataElement( name="maps", default=[''], desc="The names of maps stored for this gmap gmapdb", visible=True, no_value=[''], readonly=True ) - MetadataElement( name="snps", default=[''], desc="The names of SNP indexes stored for this gmapdb", visible=True, no_value=[''], readonly=True ) + MetadataElement( name="maps", desc="The names of maps stored for this gmap gmapdb", visible=True, no_value=[''], readonly=True ) + MetadataElement( name="snps", desc="The names of SNP indexes stored for this gmapdb", visible=True, no_value=[''], readonly=True ) MetadataElement( name="cmet", default=False, desc="Has a cmet index", visible=True, readonly=True ) MetadataElement( name="atoi", default=False, desc="Has a atoi index", visible=True, readonly=True ) @@ -41,10 +44,24 @@ """ bn = dataset.metadata.db_name log.info( "GmapDB regenerate_primary_file %s" % (bn)) - rval = ['GMAPDB %s

GMAPDB %s

cmet %s
atoi %s

Maps:

    ' % (bn,bn,dataset.metadata.cmet,dataset.metadata.atoi)] - for i,name in enumerate(dataset.metadata.maps): - rval.append( '
  • %s' % name) - rval.append( '
' ) + rval = [] + rval.append("GMAPDB: %s" % dataset.metadata.db_name) + if dataset.metadata.chromosomes: + rval.append("chromosomes: %s" % dataset.metadata.chromosomes) + if dataset.metadata.chromlength and len(dataset.metadata.chromlength) == len(dataset.metadata.chromosomes): + rval.append( 'chrom\tlength' ) + for i,name in enumerate(dataset.metadata.chromosomes): + rval.append( '%s\t%d' % (dataset.metadata.chromosomes[i],dataset.metadata.chromlength[i])) + if dataset.metadata.circular: + rval.append("circular: %s" % dataset.metadata.circular) + if dataset.metadata.kmers: + rval.append("kmers: %s" % dataset.metadata.kmers) + rval.append("cmetindex: %s atoiindex: %s" % (dataset.metadata.cmet,dataset.metadata.atoi)) + if dataset.metadata.maps and len(dataset.metadata.maps) > 0: + rval.append( 'Maps:') + for i,name in enumerate(dataset.metadata.maps): + if name.strip() != '': + rval.append( ' %s' % name) f = file(dataset.file_name,'w') f.write("\n".join( rval )) f.write('\n') @@ -53,7 +70,7 @@ def set_peek( self, dataset, is_multi_byte=False ): log.info( "GmapDB set_peek %s" % (dataset)) if not dataset.dataset.purged: - dataset.peek = "GMAPDB index %s\n cmet %s\n atoi %s\n maps %s" % ( dataset.metadata.db_name,dataset.metadata.cmet,dataset.metadata.atoi,dataset.metadata.maps ) + dataset.peek = "GMAPDB index %s\n chroms %s\n kmers %s cmet %s atoi %s\n maps %s" % ( dataset.metadata.db_name,dataset.metadata.chromosomes,dataset.metadata.kmers,dataset.metadata.cmet,dataset.metadata.atoi,dataset.metadata.maps ) dataset.blurb = "GMAPDB %s" % ( dataset.metadata.db_name ) else: dataset.peek = 'file does not exist' @@ -68,6 +85,26 @@ return False def set_meta( self, dataset, overwrite = True, **kwd ): """ + extra_files_path//GRCh37_19 + extra_files_path//GRCh37_19/GRCh37_19.a2iag12123offsetscomp + extra_files_path//GRCh37_19/GRCh37_19.a2iag123positions + extra_files_path//GRCh37_19/GRCh37_19.a2itc12123offsetscomp + extra_files_path//GRCh37_19/GRCh37_19.a2itc123positions + extra_files_path//GRCh37_19/GRCh37_19.chromosome + extra_files_path//GRCh37_19/GRCh37_19.chromosome.iit + extra_files_path//GRCh37_19/GRCh37_19.chrsubset + extra_files_path//GRCh37_19/GRCh37_19.contig + extra_files_path//GRCh37_19/GRCh37_19.contig.iit + extra_files_path//GRCh37_19/GRCh37_19.genomecomp + extra_files_path//GRCh37_19/GRCh37_19.maps + extra_files_path//GRCh37_19/GRCh37_19.metct12123offsetscomp + extra_files_path//GRCh37_19/GRCh37_19.metct123positions + extra_files_path//GRCh37_19/GRCh37_19.metga12123offsetscomp + extra_files_path//GRCh37_19/GRCh37_19.metga123positions + extra_files_path//GRCh37_19/GRCh37_19.ref12123offsetscomp + extra_files_path//GRCh37_19/GRCh37_19.ref123positions + extra_files_path//GRCh37_19/GRCh37_19.version + Expecting: extra_files_path//db_name>.ref3 extra_files_path/db_name/db_name.ref1[2345]1[2345]3offsetscomp @@ -77,7 +114,9 @@ extra_files_path/db_name/db_name.maps/*.iit """ log.info( "GmapDB set_meta %s %s" % (dataset,dataset.extra_files_path)) - pat = '(.*)\.((ref)|(met)[atgc][atgc]|(a2i)[atgc][atgc])((\d\d)(\d\d))?3positions(\.(.+))?' + chrom_pat = '^(.+).chromosome$' + #pat = '(.*)\.((ref)|(met)[atgc][atgc]|(a2i)[atgc][atgc])((\d\d)(\d\d))?3positions(\.(.+))?' + pat = '(.*)\.((ref)|(met)[atgc][atgc]|(a2i)[atgc][atgc])((\d\d)(\d\d))?(\d)(offsetscomp)' efp = dataset.extra_files_path flist = os.listdir(efp) for i,fname in enumerate(flist): @@ -85,24 +124,60 @@ fpath = os.path.join(efp,fname) if os.path.isdir(fpath): ilist = os.listdir(fpath) - kmers = {'':'default'} # HACK '' empty key added so user has default choice when selecting kmer from metadata + # kmers = {'':'default'} # HACK '' empty key added so user has default choice when selecting kmer from metadata + kmers = dict() for j,iname in enumerate(ilist): log.info( "GmapDB set_meta file %s %s" % (j,iname)) ipath = os.path.join(fpath,iname) + print >> sys.stderr, "GmapDB set_meta file %s %s %s" % (j,iname,ipath) if os.path.isdir(ipath): # find maps dataset.metadata.map_dir = iname + maps = [] + snps = [] for mapfile in os.listdir(ipath): mapname = mapfile.replace('.iit','') log.info( "GmapDB set_meta map %s %s" % (mapname,mapfile)) - dataset.metadata.maps.append(mapname) + print >> sys.stderr, "GmapDB set_meta map %s %s " % (mapname,mapfile) + maps.append(mapname) + if mapname.find('snp') >= 0: + snps.append(mapname) + if len(maps) > 0: + dataset.metadata.maps = maps + if len(snps) > 0: + dataset.metadata.snps = snps else: + m = re.match(chrom_pat,iname) + if m and len(m.groups()) == 1: + dataset.metadata.db_name = m.groups()[0] + print >> sys.stderr, "GmapDB set_meta file %s %s %s" % (j,iname,ipath) + try: + fh = open(ipath) + dataset.metadata.chromosomes = [] + dataset.metadata.circular = [] + dataset.metadata.chromlength = [] + for k,line in enumerate(fh): + fields = line.strip().split('\t') + print >> sys.stderr, "GmapDB set_meta chrom %s fields %s" % (line,fields) + if len(fields) > 2: + dataset.metadata.chromosomes.append(str(fields[0])) + dataset.metadata.chromlength.append(int(fields[2])) + if len(fields) > 3 and fields[3] == 'circular': + dataset.metadata.circular.append(str(fields[0])) + print >> sys.stderr, "GmapDB set_meta db_name %s chromosomes %s circular %s" % (dataset.metadata.db_name,dataset.metadata.chromosomes,dataset.metadata.circular) + except Exception, e: + log.info( "GmapDB set_meta error %s %s " % (iname, e)) + print >> sys.stderr, "GmapDB set_meta file %s Error %s" % (ipath,e) + finally: + if fh: + fh.close() + continue m = re.match(pat,iname) if m: log.info( "GmapDB set_meta m %s %s " % (iname, m)) + print >> sys.stderr, "GmapDB set_meta iname %s %s" % (iname,m.groups()) assert len(m.groups()) == 10 - dataset.metadata.db_name = fname if m.groups()[2] == 'ref': - if m.groups()[-1] != None: + if m.groups()[-1] != None and m.groups()[-1] != 'offsetscomp': dataset.metadata.snps.append(m.groups()[-1]) else: if m.groups()[-3] != None: @@ -115,6 +190,7 @@ elif m.groups()[4] == 'a2i': dataset.metadata.atoi = True dataset.metadata.kmers = kmers.keys() + self.regenerate_primary_file(dataset) class GmapSnpIndex( Text ): """ diff -r 74391fc6e3f2 -r f6ba0f12cca2 snpindex.xml --- a/snpindex.xml Fri Oct 05 13:08:43 2012 -0500 +++ b/snpindex.xml Wed Sep 28 10:43:44 2016 -0400 @@ -1,7 +1,7 @@ - + build index files for known SNPs - gmap + gmap snpindex --version /bin/bash $shscript 2>1 1> $output diff -r 74391fc6e3f2 -r f6ba0f12cca2 tool_dependencies.xml --- a/tool_dependencies.xml Fri Oct 05 13:08:43 2012 -0500 +++ b/tool_dependencies.xml Wed Sep 28 10:43:44 2016 -0400 @@ -1,21 +1,6 @@ - - - - wget http://research-pub.gene.com/gmap/src/gmap-gsnap-2011-11-30.tar.gz - ./configure --prefix=bin --with-gmapdb=../gmapdb - make - - bin - $INSTALL_DIR/bin - - - $INSTALL_DIR/bin - - - - - + +