Mercurial > repos > jjohnson > defuse
annotate datamanager_create_reference.py @ 26:8f0775c43739
Fix metadata_source in defuse_results_to_vcf.xml
| author | Jim Johnson <jj@umn.edu> | 
|---|---|
| date | Fri, 09 Aug 2013 11:36:24 -0500 | 
| parents | b649c729be4c | 
| children | 
| rev | line source | 
|---|---|
| 19 
1af6f32ff592
Add datamanager, move to defuse_reference.loc
 Jim Johnson <jj@umn.edu> parents: diff
changeset | 1 #!/usr/bin/env python | 
| 
1af6f32ff592
Add datamanager, move to defuse_reference.loc
 Jim Johnson <jj@umn.edu> parents: diff
changeset | 2 | 
| 
1af6f32ff592
Add datamanager, move to defuse_reference.loc
 Jim Johnson <jj@umn.edu> parents: diff
changeset | 3 import sys | 
| 
1af6f32ff592
Add datamanager, move to defuse_reference.loc
 Jim Johnson <jj@umn.edu> parents: diff
changeset | 4 import os | 
| 
1af6f32ff592
Add datamanager, move to defuse_reference.loc
 Jim Johnson <jj@umn.edu> parents: diff
changeset | 5 import re | 
| 
1af6f32ff592
Add datamanager, move to defuse_reference.loc
 Jim Johnson <jj@umn.edu> parents: diff
changeset | 6 import tempfile | 
| 
1af6f32ff592
Add datamanager, move to defuse_reference.loc
 Jim Johnson <jj@umn.edu> parents: diff
changeset | 7 import subprocess | 
| 
1af6f32ff592
Add datamanager, move to defuse_reference.loc
 Jim Johnson <jj@umn.edu> parents: diff
changeset | 8 import fileinput | 
| 
1af6f32ff592
Add datamanager, move to defuse_reference.loc
 Jim Johnson <jj@umn.edu> parents: diff
changeset | 9 import shutil | 
| 
1af6f32ff592
Add datamanager, move to defuse_reference.loc
 Jim Johnson <jj@umn.edu> parents: diff
changeset | 10 import optparse | 
| 
1af6f32ff592
Add datamanager, move to defuse_reference.loc
 Jim Johnson <jj@umn.edu> parents: diff
changeset | 11 import urllib2 | 
| 
1af6f32ff592
Add datamanager, move to defuse_reference.loc
 Jim Johnson <jj@umn.edu> parents: diff
changeset | 12 from ftplib import FTP | 
| 
1af6f32ff592
Add datamanager, move to defuse_reference.loc
 Jim Johnson <jj@umn.edu> parents: diff
changeset | 13 import tarfile | 
| 
1af6f32ff592
Add datamanager, move to defuse_reference.loc
 Jim Johnson <jj@umn.edu> parents: diff
changeset | 14 | 
| 
1af6f32ff592
Add datamanager, move to defuse_reference.loc
 Jim Johnson <jj@umn.edu> parents: diff
changeset | 15 from galaxy.util.json import from_json_string, to_json_string | 
| 
1af6f32ff592
Add datamanager, move to defuse_reference.loc
 Jim Johnson <jj@umn.edu> parents: diff
changeset | 16 | 
| 
1af6f32ff592
Add datamanager, move to defuse_reference.loc
 Jim Johnson <jj@umn.edu> parents: diff
changeset | 17 | 
| 
1af6f32ff592
Add datamanager, move to defuse_reference.loc
 Jim Johnson <jj@umn.edu> parents: diff
changeset | 18 def stop_err(msg): | 
| 
1af6f32ff592
Add datamanager, move to defuse_reference.loc
 Jim Johnson <jj@umn.edu> parents: diff
changeset | 19 sys.stderr.write(msg) | 
| 
1af6f32ff592
Add datamanager, move to defuse_reference.loc
 Jim Johnson <jj@umn.edu> parents: diff
changeset | 20 sys.exit(1) | 
| 
1af6f32ff592
Add datamanager, move to defuse_reference.loc
 Jim Johnson <jj@umn.edu> parents: diff
changeset | 21 | 
| 
1af6f32ff592
Add datamanager, move to defuse_reference.loc
 Jim Johnson <jj@umn.edu> parents: diff
changeset | 22 def get_config_dict(config,dataset_directory=None): | 
| 
1af6f32ff592
Add datamanager, move to defuse_reference.loc
 Jim Johnson <jj@umn.edu> parents: diff
changeset | 23 keys = ['dataset_directory','ensembl_organism','ensembl_prefix','ensembl_version','ensembl_genome_version','ucsc_genome_version','ncbi_organism','ncbi_prefix','chromosomes','mt_chromosome','gene_sources','ig_gene_sources','rrna_gene_sources'] | 
| 
1af6f32ff592
Add datamanager, move to defuse_reference.loc
 Jim Johnson <jj@umn.edu> parents: diff
changeset | 24 pat = '^([^=]+?)\s*=\s*(.*)$' | 
| 
1af6f32ff592
Add datamanager, move to defuse_reference.loc
 Jim Johnson <jj@umn.edu> parents: diff
changeset | 25 config_dict = {} | 
| 
1af6f32ff592
Add datamanager, move to defuse_reference.loc
 Jim Johnson <jj@umn.edu> parents: diff
changeset | 26 try: | 
| 
1af6f32ff592
Add datamanager, move to defuse_reference.loc
 Jim Johnson <jj@umn.edu> parents: diff
changeset | 27 fh = open(config) | 
| 
1af6f32ff592
Add datamanager, move to defuse_reference.loc
 Jim Johnson <jj@umn.edu> parents: diff
changeset | 28 for i,l in enumerate(fh): | 
| 
1af6f32ff592
Add datamanager, move to defuse_reference.loc
 Jim Johnson <jj@umn.edu> parents: diff
changeset | 29 line = l.strip() | 
| 
1af6f32ff592
Add datamanager, move to defuse_reference.loc
 Jim Johnson <jj@umn.edu> parents: diff
changeset | 30 if line.startswith('#'): | 
| 
1af6f32ff592
Add datamanager, move to defuse_reference.loc
 Jim Johnson <jj@umn.edu> parents: diff
changeset | 31 continue | 
| 
1af6f32ff592
Add datamanager, move to defuse_reference.loc
 Jim Johnson <jj@umn.edu> parents: diff
changeset | 32 m = re.match(pat,line) | 
| 
1af6f32ff592
Add datamanager, move to defuse_reference.loc
 Jim Johnson <jj@umn.edu> parents: diff
changeset | 33 if m and len(m.groups()) == 2: | 
| 
1af6f32ff592
Add datamanager, move to defuse_reference.loc
 Jim Johnson <jj@umn.edu> parents: diff
changeset | 34 (k,v) = m.groups() | 
| 
1af6f32ff592
Add datamanager, move to defuse_reference.loc
 Jim Johnson <jj@umn.edu> parents: diff
changeset | 35 if k in keys: | 
| 
1af6f32ff592
Add datamanager, move to defuse_reference.loc
 Jim Johnson <jj@umn.edu> parents: diff
changeset | 36 config_dict[k] = v | 
| 
1af6f32ff592
Add datamanager, move to defuse_reference.loc
 Jim Johnson <jj@umn.edu> parents: diff
changeset | 37 except Exception, e: | 
| 
1af6f32ff592
Add datamanager, move to defuse_reference.loc
 Jim Johnson <jj@umn.edu> parents: diff
changeset | 38 stop_err( 'Error parsing %s %s\n' % (config,str( e )) ) | 
| 
1af6f32ff592
Add datamanager, move to defuse_reference.loc
 Jim Johnson <jj@umn.edu> parents: diff
changeset | 39 else: | 
| 
1af6f32ff592
Add datamanager, move to defuse_reference.loc
 Jim Johnson <jj@umn.edu> parents: diff
changeset | 40 fh.close() | 
| 
1af6f32ff592
Add datamanager, move to defuse_reference.loc
 Jim Johnson <jj@umn.edu> parents: diff
changeset | 41 if dataset_directory: | 
| 
1af6f32ff592
Add datamanager, move to defuse_reference.loc
 Jim Johnson <jj@umn.edu> parents: diff
changeset | 42 config_dict['dataset_directory'] = dataset_directory | 
| 
1af6f32ff592
Add datamanager, move to defuse_reference.loc
 Jim Johnson <jj@umn.edu> parents: diff
changeset | 43 return config_dict | 
| 
1af6f32ff592
Add datamanager, move to defuse_reference.loc
 Jim Johnson <jj@umn.edu> parents: diff
changeset | 44 | 
| 
1af6f32ff592
Add datamanager, move to defuse_reference.loc
 Jim Johnson <jj@umn.edu> parents: diff
changeset | 45 def run_defuse_script(data_manager_dict, params, target_directory, dbkey, description, config, script): | 
| 
1af6f32ff592
Add datamanager, move to defuse_reference.loc
 Jim Johnson <jj@umn.edu> parents: diff
changeset | 46 if not os.path.isdir(target_directory): | 
| 
1af6f32ff592
Add datamanager, move to defuse_reference.loc
 Jim Johnson <jj@umn.edu> parents: diff
changeset | 47 os.makedirs(target_directory) | 
| 
1af6f32ff592
Add datamanager, move to defuse_reference.loc
 Jim Johnson <jj@umn.edu> parents: diff
changeset | 48 ## Name the config consistently with data_manager_conf.xml | 
| 
1af6f32ff592
Add datamanager, move to defuse_reference.loc
 Jim Johnson <jj@umn.edu> parents: diff
changeset | 49 # copy the config file to the target_directory | 
| 
1af6f32ff592
Add datamanager, move to defuse_reference.loc
 Jim Johnson <jj@umn.edu> parents: diff
changeset | 50 # when DataManager moves files to there tool-data location, the config will get moved as well, | 
| 
1af6f32ff592
Add datamanager, move to defuse_reference.loc
 Jim Johnson <jj@umn.edu> parents: diff
changeset | 51 # and the value_translation in data_manager_conf.xml will tell us the new location | 
| 
1af6f32ff592
Add datamanager, move to defuse_reference.loc
 Jim Johnson <jj@umn.edu> parents: diff
changeset | 52 # defuse.xml will use the path to this config file to set the dataset_directory | 
| 
1af6f32ff592
Add datamanager, move to defuse_reference.loc
 Jim Johnson <jj@umn.edu> parents: diff
changeset | 53 config_name = '%s.config' % dbkey | 
| 
1af6f32ff592
Add datamanager, move to defuse_reference.loc
 Jim Johnson <jj@umn.edu> parents: diff
changeset | 54 defuse_config = os.path.join( target_directory, config_name) | 
| 
1af6f32ff592
Add datamanager, move to defuse_reference.loc
 Jim Johnson <jj@umn.edu> parents: diff
changeset | 55 shutil.copyfile(config,defuse_config) | 
| 
1af6f32ff592
Add datamanager, move to defuse_reference.loc
 Jim Johnson <jj@umn.edu> parents: diff
changeset | 56 cmd = "/bin/bash %s %s" % (script,target_directory) | 
| 
1af6f32ff592
Add datamanager, move to defuse_reference.loc
 Jim Johnson <jj@umn.edu> parents: diff
changeset | 57 # Run | 
| 
1af6f32ff592
Add datamanager, move to defuse_reference.loc
 Jim Johnson <jj@umn.edu> parents: diff
changeset | 58 try: | 
| 
1af6f32ff592
Add datamanager, move to defuse_reference.loc
 Jim Johnson <jj@umn.edu> parents: diff
changeset | 59 tmp_out = tempfile.NamedTemporaryFile().name | 
| 
1af6f32ff592
Add datamanager, move to defuse_reference.loc
 Jim Johnson <jj@umn.edu> parents: diff
changeset | 60 tmp_stdout = open( tmp_out, 'wb' ) | 
| 
1af6f32ff592
Add datamanager, move to defuse_reference.loc
 Jim Johnson <jj@umn.edu> parents: diff
changeset | 61 tmp_err = tempfile.NamedTemporaryFile().name | 
| 
1af6f32ff592
Add datamanager, move to defuse_reference.loc
 Jim Johnson <jj@umn.edu> parents: diff
changeset | 62 tmp_stderr = open( tmp_err, 'wb' ) | 
| 
1af6f32ff592
Add datamanager, move to defuse_reference.loc
 Jim Johnson <jj@umn.edu> parents: diff
changeset | 63 proc = subprocess.Popen( args=cmd, shell=True, cwd=".", stdout=tmp_stdout, stderr=tmp_stderr ) | 
| 
1af6f32ff592
Add datamanager, move to defuse_reference.loc
 Jim Johnson <jj@umn.edu> parents: diff
changeset | 64 returncode = proc.wait() | 
| 
1af6f32ff592
Add datamanager, move to defuse_reference.loc
 Jim Johnson <jj@umn.edu> parents: diff
changeset | 65 tmp_stderr.close() | 
| 
1af6f32ff592
Add datamanager, move to defuse_reference.loc
 Jim Johnson <jj@umn.edu> parents: diff
changeset | 66 # get stderr, allowing for case where it's very large | 
| 
1af6f32ff592
Add datamanager, move to defuse_reference.loc
 Jim Johnson <jj@umn.edu> parents: diff
changeset | 67 tmp_stderr = open( tmp_err, 'rb' ) | 
| 
1af6f32ff592
Add datamanager, move to defuse_reference.loc
 Jim Johnson <jj@umn.edu> parents: diff
changeset | 68 stderr = '' | 
| 
1af6f32ff592
Add datamanager, move to defuse_reference.loc
 Jim Johnson <jj@umn.edu> parents: diff
changeset | 69 buffsize = 1048576 | 
| 
1af6f32ff592
Add datamanager, move to defuse_reference.loc
 Jim Johnson <jj@umn.edu> parents: diff
changeset | 70 try: | 
| 
1af6f32ff592
Add datamanager, move to defuse_reference.loc
 Jim Johnson <jj@umn.edu> parents: diff
changeset | 71 while True: | 
| 
1af6f32ff592
Add datamanager, move to defuse_reference.loc
 Jim Johnson <jj@umn.edu> parents: diff
changeset | 72 stderr += tmp_stderr.read( buffsize ) | 
| 
1af6f32ff592
Add datamanager, move to defuse_reference.loc
 Jim Johnson <jj@umn.edu> parents: diff
changeset | 73 if not stderr or len( stderr ) % buffsize != 0: | 
| 
1af6f32ff592
Add datamanager, move to defuse_reference.loc
 Jim Johnson <jj@umn.edu> parents: diff
changeset | 74 break | 
| 
1af6f32ff592
Add datamanager, move to defuse_reference.loc
 Jim Johnson <jj@umn.edu> parents: diff
changeset | 75 except OverflowError: | 
| 
1af6f32ff592
Add datamanager, move to defuse_reference.loc
 Jim Johnson <jj@umn.edu> parents: diff
changeset | 76 pass | 
| 
1af6f32ff592
Add datamanager, move to defuse_reference.loc
 Jim Johnson <jj@umn.edu> parents: diff
changeset | 77 tmp_stdout.close() | 
| 
1af6f32ff592
Add datamanager, move to defuse_reference.loc
 Jim Johnson <jj@umn.edu> parents: diff
changeset | 78 tmp_stderr.close() | 
| 
1af6f32ff592
Add datamanager, move to defuse_reference.loc
 Jim Johnson <jj@umn.edu> parents: diff
changeset | 79 if returncode != 0: | 
| 
1af6f32ff592
Add datamanager, move to defuse_reference.loc
 Jim Johnson <jj@umn.edu> parents: diff
changeset | 80 raise Exception, stderr | 
| 
1af6f32ff592
Add datamanager, move to defuse_reference.loc
 Jim Johnson <jj@umn.edu> parents: diff
changeset | 81 | 
| 
1af6f32ff592
Add datamanager, move to defuse_reference.loc
 Jim Johnson <jj@umn.edu> parents: diff
changeset | 82 # TODO: look for errors in program output. | 
| 
1af6f32ff592
Add datamanager, move to defuse_reference.loc
 Jim Johnson <jj@umn.edu> parents: diff
changeset | 83 except Exception, e: | 
| 
1af6f32ff592
Add datamanager, move to defuse_reference.loc
 Jim Johnson <jj@umn.edu> parents: diff
changeset | 84 stop_err( 'Error creating defuse reference:\n' + str( e ) ) | 
| 
1af6f32ff592
Add datamanager, move to defuse_reference.loc
 Jim Johnson <jj@umn.edu> parents: diff
changeset | 85 config_dict = get_config_dict(config, dataset_directory=target_directory) | 
| 20 
b649c729be4c
Change tool data table to defuse_reference
 Jim Johnson <jj@umn.edu> parents: 
19diff
changeset | 86 data_table_entry = dict(value=dbkey, dbkey=dbkey, name=description, path=config_name) | 
| 19 
1af6f32ff592
Add datamanager, move to defuse_reference.loc
 Jim Johnson <jj@umn.edu> parents: diff
changeset | 87 _add_data_table_entry( data_manager_dict, data_table_entry ) | 
| 
1af6f32ff592
Add datamanager, move to defuse_reference.loc
 Jim Johnson <jj@umn.edu> parents: diff
changeset | 88 def _add_data_table_entry( data_manager_dict, data_table_entry ): | 
| 
1af6f32ff592
Add datamanager, move to defuse_reference.loc
 Jim Johnson <jj@umn.edu> parents: diff
changeset | 89 data_manager_dict['data_tables'] = data_manager_dict.get( 'data_tables', {} ) | 
| 20 
b649c729be4c
Change tool data table to defuse_reference
 Jim Johnson <jj@umn.edu> parents: 
19diff
changeset | 90 data_manager_dict['data_tables']['defuse_reference'] = data_manager_dict['data_tables'].get( 'defuse_reference', [] ) | 
| 
b649c729be4c
Change tool data table to defuse_reference
 Jim Johnson <jj@umn.edu> parents: 
19diff
changeset | 91 data_manager_dict['data_tables']['defuse_reference'].append( data_table_entry ) | 
| 19 
1af6f32ff592
Add datamanager, move to defuse_reference.loc
 Jim Johnson <jj@umn.edu> parents: diff
changeset | 92 return data_manager_dict | 
| 
1af6f32ff592
Add datamanager, move to defuse_reference.loc
 Jim Johnson <jj@umn.edu> parents: diff
changeset | 93 | 
| 
1af6f32ff592
Add datamanager, move to defuse_reference.loc
 Jim Johnson <jj@umn.edu> parents: diff
changeset | 94 def main(): | 
| 
1af6f32ff592
Add datamanager, move to defuse_reference.loc
 Jim Johnson <jj@umn.edu> parents: diff
changeset | 95 #Parse Command Line | 
| 
1af6f32ff592
Add datamanager, move to defuse_reference.loc
 Jim Johnson <jj@umn.edu> parents: diff
changeset | 96 parser = optparse.OptionParser() | 
| 
1af6f32ff592
Add datamanager, move to defuse_reference.loc
 Jim Johnson <jj@umn.edu> parents: diff
changeset | 97 parser.add_option( '-k', '--dbkey', dest='dbkey', action='store', type="string", default=None, help='dbkey' ) | 
| 
1af6f32ff592
Add datamanager, move to defuse_reference.loc
 Jim Johnson <jj@umn.edu> parents: diff
changeset | 98 parser.add_option( '-d', '--description', dest='description', action='store', type="string", default=None, help='description' ) | 
| 
1af6f32ff592
Add datamanager, move to defuse_reference.loc
 Jim Johnson <jj@umn.edu> parents: diff
changeset | 99 parser.add_option( '-c', '--defuse_config', dest='defuse_config', action='store', type="string", default=None, help='defuse_config' ) | 
| 
1af6f32ff592
Add datamanager, move to defuse_reference.loc
 Jim Johnson <jj@umn.edu> parents: diff
changeset | 100 parser.add_option( '-s', '--defuse_script', dest='defuse_script', action='store', type="string", default=None, help='defuse_script' ) | 
| 
1af6f32ff592
Add datamanager, move to defuse_reference.loc
 Jim Johnson <jj@umn.edu> parents: diff
changeset | 101 (options, args) = parser.parse_args() | 
| 
1af6f32ff592
Add datamanager, move to defuse_reference.loc
 Jim Johnson <jj@umn.edu> parents: diff
changeset | 102 | 
| 
1af6f32ff592
Add datamanager, move to defuse_reference.loc
 Jim Johnson <jj@umn.edu> parents: diff
changeset | 103 filename = args[0] | 
| 
1af6f32ff592
Add datamanager, move to defuse_reference.loc
 Jim Johnson <jj@umn.edu> parents: diff
changeset | 104 | 
| 
1af6f32ff592
Add datamanager, move to defuse_reference.loc
 Jim Johnson <jj@umn.edu> parents: diff
changeset | 105 params = from_json_string( open( filename ).read() ) | 
| 
1af6f32ff592
Add datamanager, move to defuse_reference.loc
 Jim Johnson <jj@umn.edu> parents: diff
changeset | 106 target_directory = params[ 'output_data' ][0]['extra_files_path'] | 
| 
1af6f32ff592
Add datamanager, move to defuse_reference.loc
 Jim Johnson <jj@umn.edu> parents: diff
changeset | 107 os.mkdir( target_directory ) | 
| 
1af6f32ff592
Add datamanager, move to defuse_reference.loc
 Jim Johnson <jj@umn.edu> parents: diff
changeset | 108 data_manager_dict = {} | 
| 
1af6f32ff592
Add datamanager, move to defuse_reference.loc
 Jim Johnson <jj@umn.edu> parents: diff
changeset | 109 | 
| 
1af6f32ff592
Add datamanager, move to defuse_reference.loc
 Jim Johnson <jj@umn.edu> parents: diff
changeset | 110 | 
| 
1af6f32ff592
Add datamanager, move to defuse_reference.loc
 Jim Johnson <jj@umn.edu> parents: diff
changeset | 111 #Create Defuse Reference Data | 
| 
1af6f32ff592
Add datamanager, move to defuse_reference.loc
 Jim Johnson <jj@umn.edu> parents: diff
changeset | 112 run_defuse_script( data_manager_dict, params, target_directory, options.dbkey, options.description,options.defuse_config,options.defuse_script) | 
| 
1af6f32ff592
Add datamanager, move to defuse_reference.loc
 Jim Johnson <jj@umn.edu> parents: diff
changeset | 113 | 
| 
1af6f32ff592
Add datamanager, move to defuse_reference.loc
 Jim Johnson <jj@umn.edu> parents: diff
changeset | 114 #save info to json file | 
| 
1af6f32ff592
Add datamanager, move to defuse_reference.loc
 Jim Johnson <jj@umn.edu> parents: diff
changeset | 115 open( filename, 'wb' ).write( to_json_string( data_manager_dict ) ) | 
| 
1af6f32ff592
Add datamanager, move to defuse_reference.loc
 Jim Johnson <jj@umn.edu> parents: diff
changeset | 116 | 
| 
1af6f32ff592
Add datamanager, move to defuse_reference.loc
 Jim Johnson <jj@umn.edu> parents: diff
changeset | 117 if __name__ == "__main__": main() | 
| 
1af6f32ff592
Add datamanager, move to defuse_reference.loc
 Jim Johnson <jj@umn.edu> parents: diff
changeset | 118 | 
