Mercurial > repos > jjohnson > data_manager_snpeff
comparison data_manager/data_manager_snpEff_download.py @ 5:78bcf4ac437c
Use tool_data_table with key and version columns added to allow for multiple versions in a .loc file
author | Jim Johnson <jj@umn.edu> |
---|---|
date | Tue, 13 Jan 2015 12:54:20 -0600 |
parents | c6769a700e55 |
children |
comparison
equal
deleted
inserted
replaced
4:6a378d0f4856 | 5:78bcf4ac437c |
---|---|
7 import subprocess | 7 import subprocess |
8 import fileinput | 8 import fileinput |
9 import shutil | 9 import shutil |
10 import optparse | 10 import optparse |
11 import urllib2 | 11 import urllib2 |
12 import gzip | |
12 from ftplib import FTP | 13 from ftplib import FTP |
13 import tarfile | 14 import tarfile |
14 | 15 |
15 from galaxy.util.json import from_json_string, to_json_string | 16 from galaxy.util.json import from_json_string, to_json_string |
16 | 17 |
62 for genome in snpDBdict: | 63 for genome in snpDBdict: |
63 descriptions.append(snpDBdict[genome] if genome in snpDBdict else genome) | 64 descriptions.append(snpDBdict[genome] if genome in snpDBdict else genome) |
64 return ','.join(descriptions) | 65 return ','.join(descriptions) |
65 return organisms | 66 return organisms |
66 | 67 |
68 def getSnpeffVersion(jar_path): | |
69 snpeff_version = 'SnpEff ?.?' | |
70 (snpEff_dir,snpEff_jar) = os.path.split(jar_path) | |
71 stderr_path = 'snpeff.err' | |
72 stderr_fh = open(stderr_path,'w') | |
73 args = [ 'java','-jar', ] | |
74 args.append( snpEff_jar ) | |
75 args.append( '-h' ) | |
76 proc = subprocess.Popen( args=args, shell=False, cwd=snpEff_dir, stderr=stderr_fh.fileno() ) | |
77 return_code = proc.wait() | |
78 if return_code != 255: | |
79 sys.exit( return_code ) | |
80 stderr_fh.close() | |
81 fh = open(stderr_path,'r') | |
82 for line in fh: | |
83 m = re.match('^[Ss]npEff version (SnpEff)\s*(\d+\.\d+).*$',line) | |
84 if m: | |
85 snpeff_version = m.groups()[0] + m.groups()[1] | |
86 break | |
87 fh.close() | |
88 return snpeff_version | |
89 | |
90 # Starting with SnpEff 4.1 the .bin files contain the SnpEff version: | |
91 # Example - the first 3 line of GRCh37.75/snpEffectPredictor.bin (uncompressed): | |
92 """ | |
93 SnpEff 4.1 | |
94 CHROMOSOME 2 1 0 179197 GL000219.1 false | |
95 CHROMOSOME 3 1 0 81347269 HSCHR17_1 false | |
96 """ | |
97 def getSnpeffVersionFromFile(path): | |
98 snpeff_version = None | |
99 try: | |
100 fh = gzip.open(path, 'rb') | |
101 buf = fh.read(100) | |
102 lines = buf.splitlines() | |
103 m = re.match('^(SnpEff)\s+(\d+\.\d+).*$',lines[0].strip()) | |
104 if m: | |
105 snpeff_version = m.groups()[0] + m.groups()[1] | |
106 fh.close() | |
107 except Exception, e: | |
108 stop_err( 'Error parsing SnpEff version from: %s %s\n' % (path,str( e )) ) | |
109 return snpeff_version | |
110 | |
67 """ | 111 """ |
68 # Download human database 'hg19' | 112 # Download human database 'hg19' |
69 java -jar snpEff.jar download -v hg19 | 113 java -jar snpEff.jar download -v hg19 |
70 | 114 |
71 <command>java -jar \$SNPEFF_JAR_PATH/snpEff.jar download -c \$JAVA_JAR_PATH/snpEff.config $genomeVersion > $logfile </command> | 115 <command>java -jar \$SNPEFF_JAR_PATH/snpEff.jar download -c \$JAVA_JAR_PATH/snpEff.config $genomeVersion > $logfile </command> |
72 | 116 |
73 snpEffectPredictor.bin | 117 snpEffectPredictor.bin |
74 regulation_HeLa-S3.bin | 118 regulation_HeLa-S3.bin |
75 regulation_pattern = 'regulation_(.+).bin' | 119 regulation_pattern = 'regulation_(.+).bin' |
76 """ | 120 """ |
77 def download_database(data_manager_dict, target_directory, jar_path,config,genome_version,organism): | 121 def download_database(data_manager_dict, target_directory, jar_path, config, genome_version, organism): |
78 ## get data_dir from config | 122 ## get data_dir from config |
79 ##--- | 123 ##--- |
80 ## Databases are stored here | 124 ## Databases are stored here |
81 ## E.g.: Information for 'hg19' is stored in data_dir/hg19/ | 125 ## E.g.: Information for 'hg19' is stored in data_dir/hg19/ |
82 ## | 126 ## |
101 ## search data_dir/genome_version for files | 145 ## search data_dir/genome_version for files |
102 regulation_pattern = 'regulation_(.+).bin' | 146 regulation_pattern = 'regulation_(.+).bin' |
103 # annotation files that are included in snpEff by a flag | 147 # annotation files that are included in snpEff by a flag |
104 annotations_dict = {'nextProt.bin' : '-nextprot','motif.bin': '-motif'} | 148 annotations_dict = {'nextProt.bin' : '-nextprot','motif.bin': '-motif'} |
105 genome_path = os.path.join(data_dir,genome_version) | 149 genome_path = os.path.join(data_dir,genome_version) |
150 snpeff_version = getSnpeffVersion(jar_path) | |
151 key = snpeff_version + '_' + genome_version | |
106 if os.path.isdir(genome_path): | 152 if os.path.isdir(genome_path): |
107 for root, dirs, files in os.walk(genome_path): | 153 for root, dirs, files in os.walk(genome_path): |
108 for fname in files: | 154 for fname in files: |
109 if fname.startswith('snpEffectPredictor'): | 155 if fname.startswith('snpEffectPredictor'): |
110 # if snpEffectPredictor.bin download succeeded | 156 # if snpEffectPredictor.bin download succeeded |
111 name = genome_version + (' : ' + organism if organism else '') | 157 name = genome_version + (' : ' + organism if organism else '') |
112 data_table_entry = dict(value=genome_version, name=name, path=data_dir) | 158 # version = getSnpeffVersionFromFile(os.path.join(root,fname)) |
113 _add_data_table_entry( data_manager_dict, 'snpeff4_genomedb', data_table_entry ) | 159 data_table_entry = dict(key=key,version=snpeff_version,value=genome_version, name=name, path=data_dir) |
160 _add_data_table_entry( data_manager_dict, 'snpeffv_genomedb', data_table_entry ) | |
114 else: | 161 else: |
115 m = re.match(regulation_pattern,fname) | 162 m = re.match(regulation_pattern,fname) |
116 if m: | 163 if m: |
117 name = m.groups()[0] | 164 name = m.groups()[0] |
118 data_table_entry = dict(genome=genome_version,value=name, name=name) | 165 data_table_entry = dict(key=key,version=snpeff_version,genome=genome_version,value=name, name=name) |
119 _add_data_table_entry( data_manager_dict, 'snpeff4_regulationdb', data_table_entry ) | 166 _add_data_table_entry( data_manager_dict, 'snpeffv_regulationdb', data_table_entry ) |
120 elif fname in annotations_dict: | 167 elif fname in annotations_dict: |
121 value = annotations_dict[fname] | 168 value = annotations_dict[fname] |
122 name = value.lstrip('-') | 169 name = value.lstrip('-') |
123 data_table_entry = dict(genome=genome_version,value=value, name=name) | 170 data_table_entry = dict(key=key,version=snpeff_version,genome=genome_version,value=value, name=name) |
124 _add_data_table_entry( data_manager_dict, 'snpeff4_annotations', data_table_entry ) | 171 _add_data_table_entry( data_manager_dict, 'snpeffv_annotations', data_table_entry ) |
125 return data_manager_dict | 172 return data_manager_dict |
126 | 173 |
127 def _add_data_table_entry( data_manager_dict, data_table, data_table_entry ): | 174 def _add_data_table_entry( data_manager_dict, data_table, data_table_entry ): |
128 data_manager_dict['data_tables'] = data_manager_dict.get( 'data_tables', {} ) | 175 data_manager_dict['data_tables'] = data_manager_dict.get( 'data_tables', {} ) |
129 data_manager_dict['data_tables'][data_table] = data_manager_dict['data_tables'].get( data_table, [] ) | 176 data_manager_dict['data_tables'][data_table] = data_manager_dict['data_tables'].get( data_table, [] ) |