Mercurial > repos > jjohnson > data_manager_snpeff
comparison data_manager/data_manager_snpEff_download.py @ 5:78bcf4ac437c
Use tool_data_table with key and version columns added to allow for multiple versions in a .loc file
| author | Jim Johnson <jj@umn.edu> | 
|---|---|
| date | Tue, 13 Jan 2015 12:54:20 -0600 | 
| parents | c6769a700e55 | 
| children | 
   comparison
  equal
  deleted
  inserted
  replaced
| 4:6a378d0f4856 | 5:78bcf4ac437c | 
|---|---|
| 7 import subprocess | 7 import subprocess | 
| 8 import fileinput | 8 import fileinput | 
| 9 import shutil | 9 import shutil | 
| 10 import optparse | 10 import optparse | 
| 11 import urllib2 | 11 import urllib2 | 
| 12 import gzip | |
| 12 from ftplib import FTP | 13 from ftplib import FTP | 
| 13 import tarfile | 14 import tarfile | 
| 14 | 15 | 
| 15 from galaxy.util.json import from_json_string, to_json_string | 16 from galaxy.util.json import from_json_string, to_json_string | 
| 16 | 17 | 
| 62 for genome in snpDBdict: | 63 for genome in snpDBdict: | 
| 63 descriptions.append(snpDBdict[genome] if genome in snpDBdict else genome) | 64 descriptions.append(snpDBdict[genome] if genome in snpDBdict else genome) | 
| 64 return ','.join(descriptions) | 65 return ','.join(descriptions) | 
| 65 return organisms | 66 return organisms | 
| 66 | 67 | 
| 68 def getSnpeffVersion(jar_path): | |
| 69 snpeff_version = 'SnpEff ?.?' | |
| 70 (snpEff_dir,snpEff_jar) = os.path.split(jar_path) | |
| 71 stderr_path = 'snpeff.err' | |
| 72 stderr_fh = open(stderr_path,'w') | |
| 73 args = [ 'java','-jar', ] | |
| 74 args.append( snpEff_jar ) | |
| 75 args.append( '-h' ) | |
| 76 proc = subprocess.Popen( args=args, shell=False, cwd=snpEff_dir, stderr=stderr_fh.fileno() ) | |
| 77 return_code = proc.wait() | |
| 78 if return_code != 255: | |
| 79 sys.exit( return_code ) | |
| 80 stderr_fh.close() | |
| 81 fh = open(stderr_path,'r') | |
| 82 for line in fh: | |
| 83 m = re.match('^[Ss]npEff version (SnpEff)\s*(\d+\.\d+).*$',line) | |
| 84 if m: | |
| 85 snpeff_version = m.groups()[0] + m.groups()[1] | |
| 86 break | |
| 87 fh.close() | |
| 88 return snpeff_version | |
| 89 | |
| 90 # Starting with SnpEff 4.1 the .bin files contain the SnpEff version: | |
| 91 # Example - the first 3 line of GRCh37.75/snpEffectPredictor.bin (uncompressed): | |
| 92 """ | |
| 93 SnpEff 4.1 | |
| 94 CHROMOSOME 2 1 0 179197 GL000219.1 false | |
| 95 CHROMOSOME 3 1 0 81347269 HSCHR17_1 false | |
| 96 """ | |
| 97 def getSnpeffVersionFromFile(path): | |
| 98 snpeff_version = None | |
| 99 try: | |
| 100 fh = gzip.open(path, 'rb') | |
| 101 buf = fh.read(100) | |
| 102 lines = buf.splitlines() | |
| 103 m = re.match('^(SnpEff)\s+(\d+\.\d+).*$',lines[0].strip()) | |
| 104 if m: | |
| 105 snpeff_version = m.groups()[0] + m.groups()[1] | |
| 106 fh.close() | |
| 107 except Exception, e: | |
| 108 stop_err( 'Error parsing SnpEff version from: %s %s\n' % (path,str( e )) ) | |
| 109 return snpeff_version | |
| 110 | |
| 67 """ | 111 """ | 
| 68 # Download human database 'hg19' | 112 # Download human database 'hg19' | 
| 69 java -jar snpEff.jar download -v hg19 | 113 java -jar snpEff.jar download -v hg19 | 
| 70 | 114 | 
| 71 <command>java -jar \$SNPEFF_JAR_PATH/snpEff.jar download -c \$JAVA_JAR_PATH/snpEff.config $genomeVersion > $logfile </command> | 115 <command>java -jar \$SNPEFF_JAR_PATH/snpEff.jar download -c \$JAVA_JAR_PATH/snpEff.config $genomeVersion > $logfile </command> | 
| 72 | 116 | 
| 73 snpEffectPredictor.bin | 117 snpEffectPredictor.bin | 
| 74 regulation_HeLa-S3.bin | 118 regulation_HeLa-S3.bin | 
| 75 regulation_pattern = 'regulation_(.+).bin' | 119 regulation_pattern = 'regulation_(.+).bin' | 
| 76 """ | 120 """ | 
| 77 def download_database(data_manager_dict, target_directory, jar_path,config,genome_version,organism): | 121 def download_database(data_manager_dict, target_directory, jar_path, config, genome_version, organism): | 
| 78 ## get data_dir from config | 122 ## get data_dir from config | 
| 79 ##--- | 123 ##--- | 
| 80 ## Databases are stored here | 124 ## Databases are stored here | 
| 81 ## E.g.: Information for 'hg19' is stored in data_dir/hg19/ | 125 ## E.g.: Information for 'hg19' is stored in data_dir/hg19/ | 
| 82 ## | 126 ## | 
| 101 ## search data_dir/genome_version for files | 145 ## search data_dir/genome_version for files | 
| 102 regulation_pattern = 'regulation_(.+).bin' | 146 regulation_pattern = 'regulation_(.+).bin' | 
| 103 # annotation files that are included in snpEff by a flag | 147 # annotation files that are included in snpEff by a flag | 
| 104 annotations_dict = {'nextProt.bin' : '-nextprot','motif.bin': '-motif'} | 148 annotations_dict = {'nextProt.bin' : '-nextprot','motif.bin': '-motif'} | 
| 105 genome_path = os.path.join(data_dir,genome_version) | 149 genome_path = os.path.join(data_dir,genome_version) | 
| 150 snpeff_version = getSnpeffVersion(jar_path) | |
| 151 key = snpeff_version + '_' + genome_version | |
| 106 if os.path.isdir(genome_path): | 152 if os.path.isdir(genome_path): | 
| 107 for root, dirs, files in os.walk(genome_path): | 153 for root, dirs, files in os.walk(genome_path): | 
| 108 for fname in files: | 154 for fname in files: | 
| 109 if fname.startswith('snpEffectPredictor'): | 155 if fname.startswith('snpEffectPredictor'): | 
| 110 # if snpEffectPredictor.bin download succeeded | 156 # if snpEffectPredictor.bin download succeeded | 
| 111 name = genome_version + (' : ' + organism if organism else '') | 157 name = genome_version + (' : ' + organism if organism else '') | 
| 112 data_table_entry = dict(value=genome_version, name=name, path=data_dir) | 158 # version = getSnpeffVersionFromFile(os.path.join(root,fname)) | 
| 113 _add_data_table_entry( data_manager_dict, 'snpeff4_genomedb', data_table_entry ) | 159 data_table_entry = dict(key=key,version=snpeff_version,value=genome_version, name=name, path=data_dir) | 
| 160 _add_data_table_entry( data_manager_dict, 'snpeffv_genomedb', data_table_entry ) | |
| 114 else: | 161 else: | 
| 115 m = re.match(regulation_pattern,fname) | 162 m = re.match(regulation_pattern,fname) | 
| 116 if m: | 163 if m: | 
| 117 name = m.groups()[0] | 164 name = m.groups()[0] | 
| 118 data_table_entry = dict(genome=genome_version,value=name, name=name) | 165 data_table_entry = dict(key=key,version=snpeff_version,genome=genome_version,value=name, name=name) | 
| 119 _add_data_table_entry( data_manager_dict, 'snpeff4_regulationdb', data_table_entry ) | 166 _add_data_table_entry( data_manager_dict, 'snpeffv_regulationdb', data_table_entry ) | 
| 120 elif fname in annotations_dict: | 167 elif fname in annotations_dict: | 
| 121 value = annotations_dict[fname] | 168 value = annotations_dict[fname] | 
| 122 name = value.lstrip('-') | 169 name = value.lstrip('-') | 
| 123 data_table_entry = dict(genome=genome_version,value=value, name=name) | 170 data_table_entry = dict(key=key,version=snpeff_version,genome=genome_version,value=value, name=name) | 
| 124 _add_data_table_entry( data_manager_dict, 'snpeff4_annotations', data_table_entry ) | 171 _add_data_table_entry( data_manager_dict, 'snpeffv_annotations', data_table_entry ) | 
| 125 return data_manager_dict | 172 return data_manager_dict | 
| 126 | 173 | 
| 127 def _add_data_table_entry( data_manager_dict, data_table, data_table_entry ): | 174 def _add_data_table_entry( data_manager_dict, data_table, data_table_entry ): | 
| 128 data_manager_dict['data_tables'] = data_manager_dict.get( 'data_tables', {} ) | 175 data_manager_dict['data_tables'] = data_manager_dict.get( 'data_tables', {} ) | 
| 129 data_manager_dict['data_tables'][data_table] = data_manager_dict['data_tables'].get( data_table, [] ) | 176 data_manager_dict['data_tables'][data_table] = data_manager_dict['data_tables'].get( data_table, [] ) | 
