annotate datamanager_create_reference.py @ 22:68494d6aabeb

Update datamanager and defuse.xml config file generation
author Jim Johnson <jj@umn.edu>
date Thu, 27 Jun 2013 13:11:50 -0500
parents b649c729be4c
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
19
1af6f32ff592 Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
diff changeset
1 #!/usr/bin/env python
1af6f32ff592 Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
diff changeset
2
1af6f32ff592 Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
diff changeset
3 import sys
1af6f32ff592 Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
diff changeset
4 import os
1af6f32ff592 Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
diff changeset
5 import re
1af6f32ff592 Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
diff changeset
6 import tempfile
1af6f32ff592 Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
diff changeset
7 import subprocess
1af6f32ff592 Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
diff changeset
8 import fileinput
1af6f32ff592 Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
diff changeset
9 import shutil
1af6f32ff592 Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
diff changeset
10 import optparse
1af6f32ff592 Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
diff changeset
11 import urllib2
1af6f32ff592 Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
diff changeset
12 from ftplib import FTP
1af6f32ff592 Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
diff changeset
13 import tarfile
1af6f32ff592 Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
diff changeset
14
1af6f32ff592 Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
diff changeset
15 from galaxy.util.json import from_json_string, to_json_string
1af6f32ff592 Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
diff changeset
16
1af6f32ff592 Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
diff changeset
17
1af6f32ff592 Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
diff changeset
18 def stop_err(msg):
1af6f32ff592 Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
diff changeset
19 sys.stderr.write(msg)
1af6f32ff592 Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
diff changeset
20 sys.exit(1)
1af6f32ff592 Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
diff changeset
21
1af6f32ff592 Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
diff changeset
22 def get_config_dict(config,dataset_directory=None):
1af6f32ff592 Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
diff changeset
23 keys = ['dataset_directory','ensembl_organism','ensembl_prefix','ensembl_version','ensembl_genome_version','ucsc_genome_version','ncbi_organism','ncbi_prefix','chromosomes','mt_chromosome','gene_sources','ig_gene_sources','rrna_gene_sources']
1af6f32ff592 Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
diff changeset
24 pat = '^([^=]+?)\s*=\s*(.*)$'
1af6f32ff592 Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
diff changeset
25 config_dict = {}
1af6f32ff592 Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
diff changeset
26 try:
1af6f32ff592 Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
diff changeset
27 fh = open(config)
1af6f32ff592 Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
diff changeset
28 for i,l in enumerate(fh):
1af6f32ff592 Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
diff changeset
29 line = l.strip()
1af6f32ff592 Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
diff changeset
30 if line.startswith('#'):
1af6f32ff592 Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
diff changeset
31 continue
1af6f32ff592 Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
diff changeset
32 m = re.match(pat,line)
1af6f32ff592 Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
diff changeset
33 if m and len(m.groups()) == 2:
1af6f32ff592 Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
diff changeset
34 (k,v) = m.groups()
1af6f32ff592 Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
diff changeset
35 if k in keys:
1af6f32ff592 Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
diff changeset
36 config_dict[k] = v
1af6f32ff592 Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
diff changeset
37 except Exception, e:
1af6f32ff592 Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
diff changeset
38 stop_err( 'Error parsing %s %s\n' % (config,str( e )) )
1af6f32ff592 Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
diff changeset
39 else:
1af6f32ff592 Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
diff changeset
40 fh.close()
1af6f32ff592 Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
diff changeset
41 if dataset_directory:
1af6f32ff592 Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
diff changeset
42 config_dict['dataset_directory'] = dataset_directory
1af6f32ff592 Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
diff changeset
43 return config_dict
1af6f32ff592 Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
diff changeset
44
1af6f32ff592 Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
diff changeset
45 def run_defuse_script(data_manager_dict, params, target_directory, dbkey, description, config, script):
1af6f32ff592 Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
diff changeset
46 if not os.path.isdir(target_directory):
1af6f32ff592 Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
diff changeset
47 os.makedirs(target_directory)
1af6f32ff592 Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
diff changeset
48 ## Name the config consistently with data_manager_conf.xml
1af6f32ff592 Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
diff changeset
49 # copy the config file to the target_directory
1af6f32ff592 Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
diff changeset
50 # when DataManager moves files to there tool-data location, the config will get moved as well,
1af6f32ff592 Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
diff changeset
51 # and the value_translation in data_manager_conf.xml will tell us the new location
1af6f32ff592 Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
diff changeset
52 # defuse.xml will use the path to this config file to set the dataset_directory
1af6f32ff592 Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
diff changeset
53 config_name = '%s.config' % dbkey
1af6f32ff592 Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
diff changeset
54 defuse_config = os.path.join( target_directory, config_name)
1af6f32ff592 Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
diff changeset
55 shutil.copyfile(config,defuse_config)
1af6f32ff592 Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
diff changeset
56 cmd = "/bin/bash %s %s" % (script,target_directory)
1af6f32ff592 Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
diff changeset
57 # Run
1af6f32ff592 Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
diff changeset
58 try:
1af6f32ff592 Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
diff changeset
59 tmp_out = tempfile.NamedTemporaryFile().name
1af6f32ff592 Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
diff changeset
60 tmp_stdout = open( tmp_out, 'wb' )
1af6f32ff592 Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
diff changeset
61 tmp_err = tempfile.NamedTemporaryFile().name
1af6f32ff592 Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
diff changeset
62 tmp_stderr = open( tmp_err, 'wb' )
1af6f32ff592 Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
diff changeset
63 proc = subprocess.Popen( args=cmd, shell=True, cwd=".", stdout=tmp_stdout, stderr=tmp_stderr )
1af6f32ff592 Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
diff changeset
64 returncode = proc.wait()
1af6f32ff592 Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
diff changeset
65 tmp_stderr.close()
1af6f32ff592 Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
diff changeset
66 # get stderr, allowing for case where it's very large
1af6f32ff592 Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
diff changeset
67 tmp_stderr = open( tmp_err, 'rb' )
1af6f32ff592 Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
diff changeset
68 stderr = ''
1af6f32ff592 Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
diff changeset
69 buffsize = 1048576
1af6f32ff592 Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
diff changeset
70 try:
1af6f32ff592 Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
diff changeset
71 while True:
1af6f32ff592 Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
diff changeset
72 stderr += tmp_stderr.read( buffsize )
1af6f32ff592 Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
diff changeset
73 if not stderr or len( stderr ) % buffsize != 0:
1af6f32ff592 Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
diff changeset
74 break
1af6f32ff592 Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
diff changeset
75 except OverflowError:
1af6f32ff592 Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
diff changeset
76 pass
1af6f32ff592 Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
diff changeset
77 tmp_stdout.close()
1af6f32ff592 Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
diff changeset
78 tmp_stderr.close()
1af6f32ff592 Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
diff changeset
79 if returncode != 0:
1af6f32ff592 Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
diff changeset
80 raise Exception, stderr
1af6f32ff592 Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
diff changeset
81
1af6f32ff592 Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
diff changeset
82 # TODO: look for errors in program output.
1af6f32ff592 Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
diff changeset
83 except Exception, e:
1af6f32ff592 Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
diff changeset
84 stop_err( 'Error creating defuse reference:\n' + str( e ) )
1af6f32ff592 Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
diff changeset
85 config_dict = get_config_dict(config, dataset_directory=target_directory)
20
b649c729be4c Change tool data table to defuse_reference
Jim Johnson <jj@umn.edu>
parents: 19
diff changeset
86 data_table_entry = dict(value=dbkey, dbkey=dbkey, name=description, path=config_name)
19
1af6f32ff592 Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
diff changeset
87 _add_data_table_entry( data_manager_dict, data_table_entry )
1af6f32ff592 Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
diff changeset
88 def _add_data_table_entry( data_manager_dict, data_table_entry ):
1af6f32ff592 Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
diff changeset
89 data_manager_dict['data_tables'] = data_manager_dict.get( 'data_tables', {} )
20
b649c729be4c Change tool data table to defuse_reference
Jim Johnson <jj@umn.edu>
parents: 19
diff changeset
90 data_manager_dict['data_tables']['defuse_reference'] = data_manager_dict['data_tables'].get( 'defuse_reference', [] )
b649c729be4c Change tool data table to defuse_reference
Jim Johnson <jj@umn.edu>
parents: 19
diff changeset
91 data_manager_dict['data_tables']['defuse_reference'].append( data_table_entry )
19
1af6f32ff592 Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
diff changeset
92 return data_manager_dict
1af6f32ff592 Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
diff changeset
93
1af6f32ff592 Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
diff changeset
94 def main():
1af6f32ff592 Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
diff changeset
95 #Parse Command Line
1af6f32ff592 Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
diff changeset
96 parser = optparse.OptionParser()
1af6f32ff592 Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
diff changeset
97 parser.add_option( '-k', '--dbkey', dest='dbkey', action='store', type="string", default=None, help='dbkey' )
1af6f32ff592 Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
diff changeset
98 parser.add_option( '-d', '--description', dest='description', action='store', type="string", default=None, help='description' )
1af6f32ff592 Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
diff changeset
99 parser.add_option( '-c', '--defuse_config', dest='defuse_config', action='store', type="string", default=None, help='defuse_config' )
1af6f32ff592 Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
diff changeset
100 parser.add_option( '-s', '--defuse_script', dest='defuse_script', action='store', type="string", default=None, help='defuse_script' )
1af6f32ff592 Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
diff changeset
101 (options, args) = parser.parse_args()
1af6f32ff592 Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
diff changeset
102
1af6f32ff592 Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
diff changeset
103 filename = args[0]
1af6f32ff592 Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
diff changeset
104
1af6f32ff592 Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
diff changeset
105 params = from_json_string( open( filename ).read() )
1af6f32ff592 Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
diff changeset
106 target_directory = params[ 'output_data' ][0]['extra_files_path']
1af6f32ff592 Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
diff changeset
107 os.mkdir( target_directory )
1af6f32ff592 Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
diff changeset
108 data_manager_dict = {}
1af6f32ff592 Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
diff changeset
109
1af6f32ff592 Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
diff changeset
110
1af6f32ff592 Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
diff changeset
111 #Create Defuse Reference Data
1af6f32ff592 Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
diff changeset
112 run_defuse_script( data_manager_dict, params, target_directory, options.dbkey, options.description,options.defuse_config,options.defuse_script)
1af6f32ff592 Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
diff changeset
113
1af6f32ff592 Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
diff changeset
114 #save info to json file
1af6f32ff592 Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
diff changeset
115 open( filename, 'wb' ).write( to_json_string( data_manager_dict ) )
1af6f32ff592 Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
diff changeset
116
1af6f32ff592 Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
diff changeset
117 if __name__ == "__main__": main()
1af6f32ff592 Add datamanager, move to defuse_reference.loc
Jim Johnson <jj@umn.edu>
parents:
diff changeset
118