# HG changeset patch # User Jim Johnson # Date 1421175260 21600 # Node ID 78bcf4ac437cd31fcf1ecb74dcf6b1a83273e999 # Parent 6a378d0f4856cfe9555b61e2e3b5df4ff28de230 Use tool_data_table with key and version columns added to allow for multiple versions in a .loc file diff -r 6a378d0f4856 -r 78bcf4ac437c data_manager/data_manager_snpEff_databases.py --- a/data_manager/data_manager_snpEff_databases.py Thu Oct 23 05:43:46 2014 -0500 +++ b/data_manager/data_manager_snpEff_databases.py Tue Jan 13 12:54:20 2015 -0600 @@ -18,6 +18,28 @@ sys.stderr.write(msg) sys.exit(1) +def getSnpeffVersion(jar_path): + snpeff_version = 'SnpEff ?.?' + (snpEff_dir,snpEff_jar) = os.path.split(jar_path) + stderr_path = 'snpeff.err' + stderr_fh = open(stderr_path,'w') + args = [ 'java','-jar', ] + args.append( snpEff_jar ) + args.append( '-h' ) + proc = subprocess.Popen( args=args, shell=False, cwd=snpEff_dir, stderr=stderr_fh.fileno() ) + return_code = proc.wait() + if return_code != 255: + sys.exit( return_code ) + stderr_fh.close() + fh = open(stderr_path,'r') + for line in fh: + m = re.match('^[Ss]npEff version (SnpEff)\s*(\d+\.\d+).*$',line) + if m: + snpeff_version = m.groups()[0] + m.groups()[1] + break + fh.close() + return snpeff_version + def fetch_databases(data_manager_dict, target_directory, jar_path): (snpEff_dir,snpEff_jar) = os.path.split(jar_path) if not os.path.exists(target_directory): @@ -35,9 +57,10 @@ if return_code: sys.exit( return_code ) databases_output.close() + snpeff_version = getSnpeffVersion(jar_path) try: data_manager_dict['data_tables'] = data_manager_dict.get( 'data_tables', {} ) - data_manager_dict['data_tables']['snpeff4_databases'] = data_manager_dict['data_tables'].get( 'snpeff4_databases', [] ) + data_manager_dict['data_tables']['snpeffv_databases'] = data_manager_dict['data_tables'].get( 'snpeffv_databases', [] ) data_table_entries = [] fh = open(databases_path,'r') for i,line in enumerate(fh): @@ -50,8 +73,9 @@ if genome_version == '30c2c903' or fields[1].strip() == 'TestCase' or fields[1].strip().startswith('Test_'): continue description = fields[1].strip() + ' : ' + genome_version - data_table_entries.append(dict(value=genome_version, name=description)) - data_manager_dict['data_tables']['snpeff4_databases'] = data_table_entries + key = snpeff_version + '_' + genome_version + data_table_entries.append(dict(key=key, version=snpeff_version, value=genome_version, name=description)) + data_manager_dict['data_tables']['snpeffv_databases'] = data_table_entries except Exception, e: stop_err( 'Error parsing %s %s\n' % (config,str( e )) ) else: diff -r 6a378d0f4856 -r 78bcf4ac437c data_manager/data_manager_snpEff_download.py --- a/data_manager/data_manager_snpEff_download.py Thu Oct 23 05:43:46 2014 -0500 +++ b/data_manager/data_manager_snpEff_download.py Tue Jan 13 12:54:20 2015 -0600 @@ -9,6 +9,7 @@ import shutil import optparse import urllib2 +import gzip from ftplib import FTP import tarfile @@ -64,6 +65,49 @@ return ','.join(descriptions) return organisms +def getSnpeffVersion(jar_path): + snpeff_version = 'SnpEff ?.?' + (snpEff_dir,snpEff_jar) = os.path.split(jar_path) + stderr_path = 'snpeff.err' + stderr_fh = open(stderr_path,'w') + args = [ 'java','-jar', ] + args.append( snpEff_jar ) + args.append( '-h' ) + proc = subprocess.Popen( args=args, shell=False, cwd=snpEff_dir, stderr=stderr_fh.fileno() ) + return_code = proc.wait() + if return_code != 255: + sys.exit( return_code ) + stderr_fh.close() + fh = open(stderr_path,'r') + for line in fh: + m = re.match('^[Ss]npEff version (SnpEff)\s*(\d+\.\d+).*$',line) + if m: + snpeff_version = m.groups()[0] + m.groups()[1] + break + fh.close() + return snpeff_version + +# Starting with SnpEff 4.1 the .bin files contain the SnpEff version: +# Example - the first 3 line of GRCh37.75/snpEffectPredictor.bin (uncompressed): +""" +SnpEff 4.1 +CHROMOSOME 2 1 0 179197 GL000219.1 false +CHROMOSOME 3 1 0 81347269 HSCHR17_1 false +""" +def getSnpeffVersionFromFile(path): + snpeff_version = None + try: + fh = gzip.open(path, 'rb') + buf = fh.read(100) + lines = buf.splitlines() + m = re.match('^(SnpEff)\s+(\d+\.\d+).*$',lines[0].strip()) + if m: + snpeff_version = m.groups()[0] + m.groups()[1] + fh.close() + except Exception, e: + stop_err( 'Error parsing SnpEff version from: %s %s\n' % (path,str( e )) ) + return snpeff_version + """ # Download human database 'hg19' java -jar snpEff.jar download -v hg19 @@ -74,7 +118,7 @@ regulation_HeLa-S3.bin regulation_pattern = 'regulation_(.+).bin' """ -def download_database(data_manager_dict, target_directory, jar_path,config,genome_version,organism): +def download_database(data_manager_dict, target_directory, jar_path, config, genome_version, organism): ## get data_dir from config ##--- ## Databases are stored here @@ -103,25 +147,28 @@ # annotation files that are included in snpEff by a flag annotations_dict = {'nextProt.bin' : '-nextprot','motif.bin': '-motif'} genome_path = os.path.join(data_dir,genome_version) + snpeff_version = getSnpeffVersion(jar_path) + key = snpeff_version + '_' + genome_version if os.path.isdir(genome_path): for root, dirs, files in os.walk(genome_path): for fname in files: if fname.startswith('snpEffectPredictor'): # if snpEffectPredictor.bin download succeeded name = genome_version + (' : ' + organism if organism else '') - data_table_entry = dict(value=genome_version, name=name, path=data_dir) - _add_data_table_entry( data_manager_dict, 'snpeff4_genomedb', data_table_entry ) + # version = getSnpeffVersionFromFile(os.path.join(root,fname)) + data_table_entry = dict(key=key,version=snpeff_version,value=genome_version, name=name, path=data_dir) + _add_data_table_entry( data_manager_dict, 'snpeffv_genomedb', data_table_entry ) else: m = re.match(regulation_pattern,fname) if m: name = m.groups()[0] - data_table_entry = dict(genome=genome_version,value=name, name=name) - _add_data_table_entry( data_manager_dict, 'snpeff4_regulationdb', data_table_entry ) + data_table_entry = dict(key=key,version=snpeff_version,genome=genome_version,value=name, name=name) + _add_data_table_entry( data_manager_dict, 'snpeffv_regulationdb', data_table_entry ) elif fname in annotations_dict: value = annotations_dict[fname] name = value.lstrip('-') - data_table_entry = dict(genome=genome_version,value=value, name=name) - _add_data_table_entry( data_manager_dict, 'snpeff4_annotations', data_table_entry ) + data_table_entry = dict(key=key,version=snpeff_version,genome=genome_version,value=value, name=name) + _add_data_table_entry( data_manager_dict, 'snpeffv_annotations', data_table_entry ) return data_manager_dict def _add_data_table_entry( data_manager_dict, data_table, data_table_entry ): diff -r 6a378d0f4856 -r 78bcf4ac437c data_manager/data_manager_snpEff_download.xml --- a/data_manager/data_manager_snpEff_download.xml Thu Oct 23 05:43:46 2014 -0500 +++ b/data_manager/data_manager_snpEff_download.xml Tue Jan 13 12:54:20 2015 -0600 @@ -28,15 +28,15 @@ - - + + -This tool downloads a SnpEff database and populates data tables: snpeff4_genomedb, snpeff4_regulationdb, and snpeff4_annotations. +This tool downloads a SnpEff database and populates data tables: snpeffv_genomedb, snpeffv_regulationdb, and snpeffv_annotations. To see the list of available SnpEff genomes run the "SnpEff Databases" data manager which records the available genome databases in data table: snpeff4_databases diff -r 6a378d0f4856 -r 78bcf4ac437c data_manager_conf.xml --- a/data_manager_conf.xml Thu Oct 23 05:43:46 2014 -0500 +++ b/data_manager_conf.xml Tue Jan 13 12:54:20 2015 -0600 @@ -1,36 +1,44 @@ - - + + + + - - + + + + - snpEff/v4/data + snpEff/v4_0/data - ${GALAXY_DATA_MANAGER_DATA_PATH}/snpEff/v4/data + ${GALAXY_DATA_MANAGER_DATA_PATH}/snpEff/v4_0/data abspath - + + + - + + + diff -r 6a378d0f4856 -r 78bcf4ac437c repository_dependencies.xml --- a/repository_dependencies.xml Thu Oct 23 05:43:46 2014 -0500 +++ b/repository_dependencies.xml Tue Jan 13 12:54:20 2015 -0600 @@ -1,4 +1,4 @@ - + diff -r 6a378d0f4856 -r 78bcf4ac437c tool-data/snpeff4_annotations.loc.sample --- a/tool-data/snpeff4_annotations.loc.sample Thu Oct 23 05:43:46 2014 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,5 +0,0 @@ -## Regulation Databases for SnpEff -## These are from the list on: http://snpeff.sourceforge.net/download.html -#genome annotation_name description -#GRCh37.71 nextprot nextprot -#GRCh37.71 motif motif diff -r 6a378d0f4856 -r 78bcf4ac437c tool-data/snpeff4_databases.loc.sample --- a/tool-data/snpeff4_databases.loc.sample Thu Oct 23 05:43:46 2014 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,5 +0,0 @@ -## Available Databases for SnpEff -## These are from the list on: http://snpeff.sourceforge.net/download.html -## the Description field in this sample is "Genome : Version" -#Version Description -#GRCh37.68 Homo sapiens : GRCh37.68 diff -r 6a378d0f4856 -r 78bcf4ac437c tool-data/snpeff4_genomedb.loc.sample --- a/tool-data/snpeff4_genomedb.loc.sample Thu Oct 23 05:43:46 2014 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,5 +0,0 @@ -## Downloaded Databases for SnpEff -## These are from the list on: http://snpeff.sourceforge.net/download.html -## the Description field in this sample is "Genome : Version" -#Version Description data_dir path -#GRCh37.68 Homo sapiens : GRCh37.68 /home/galaxy/snpEff/data diff -r 6a378d0f4856 -r 78bcf4ac437c tool-data/snpeff4_regulationdb.loc.sample --- a/tool-data/snpeff4_regulationdb.loc.sample Thu Oct 23 05:43:46 2014 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,4 +0,0 @@ -## Regulation Databases for SnpEff -## These are from the list on: http://snpeff.sourceforge.net/download.html -#genome regulation_name description -#GRCh37.70 CD4 CD4 diff -r 6a378d0f4856 -r 78bcf4ac437c tool-data/snpeffv_annotations.loc.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/snpeffv_annotations.loc.sample Tue Jan 13 12:54:20 2015 -0600 @@ -0,0 +1,5 @@ +## Regulation Databases for SnpEff +## These are from the list on: http://snpeff.sourceforge.net/download.html +#key snpeff_version genome annotation_name description +#SnpEff4.0_GRCh37.75 SnpEff4.0 GRCh37.75 nextprot nextprot +#SnpEff4.0_GRCh38.76 SnpEff4.1 GRCh38.76 motif motif diff -r 6a378d0f4856 -r 78bcf4ac437c tool-data/snpeffv_databases.loc.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/snpeffv_databases.loc.sample Tue Jan 13 12:54:20 2015 -0600 @@ -0,0 +1,5 @@ +## Available Databases for SnpEff +## These are from the list on: http://snpeff.sourceforge.net/download.html +## the Description field in this sample is "Genome : Version" +#key snpeff_version Version Description +#SnpEff4.0_GRCh37.75 SnpEff4.0 GRCh37.75 Homo sapiens : GRCh37.75 diff -r 6a378d0f4856 -r 78bcf4ac437c tool-data/snpeffv_genomedb.loc.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/snpeffv_genomedb.loc.sample Tue Jan 13 12:54:20 2015 -0600 @@ -0,0 +1,6 @@ +## Downloaded Databases for SnpEff +## These are from the list on: http://snpeff.sourceforge.net/download.html +## the Description field in this sample is "Genome : Version" +#Key snpeff_version Version Description data_dir path +#SnpEff4.0_GRCh37.74 SnpEff4.0 GRCh37.74 Homo sapiens : GRCh37.74 /home/galaxy/snpEff/v4_0/data +#SnpEff4.1_GRCh38.76 SnpEff4.1 GRCh38.76 Homo sapiens : GRCh38.76 /home/galaxy/snpEff/v4_1/data diff -r 6a378d0f4856 -r 78bcf4ac437c tool-data/snpeffv_regulationdb.loc.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/snpeffv_regulationdb.loc.sample Tue Jan 13 12:54:20 2015 -0600 @@ -0,0 +1,5 @@ +## Regulation Databases for SnpEff +## These are from the list on: http://snpeff.sourceforge.net/download.html +#Key snpeff_version genome regulation_name description +#SnpEff4.0_GRCh37.74 SnpEff4.0 GRCh37.74 CD4 CD4 +#SnpEff4.1_GRCh38.76 SnpEff4.1 GRCh38.76 CD4 CD4 diff -r 6a378d0f4856 -r 78bcf4ac437c tool_data_table_conf.xml.sample --- a/tool_data_table_conf.xml.sample Thu Oct 23 05:43:46 2014 -0500 +++ b/tool_data_table_conf.xml.sample Tue Jan 13 12:54:20 2015 -0600 @@ -1,19 +1,19 @@ - - value, name - +
+ key, version, value, name, path +
- - value, name, path - +
+ key, version, genome, value, name +
- - genome, value, name - +
+ key, version, genome, value, name +
- - genome, value, name - +
+ key, version, value, name +