comparison hubArchiveCreator.py @ 0:3e0c61b52a06 draft

planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
author yating-l
date Mon, 31 Oct 2016 16:36:25 -0400
parents
children 4ced8f116509
comparison
equal deleted inserted replaced
-1:000000000000 0:3e0c61b52a06
1 #!/usr/bin/python
2 # -*- coding: utf8 -*-
3
4 """
5 This Galaxy tool permits to prepare your files to be ready for
6 Assembly Hub visualization.
7 Program test arguments:
8 hubArchiveCreator.py -g test-data/augustusDbia3.gff3 -f test-data/dbia3.fa -d . -u ./tools -o output.html
9 """
10
11 import argparse
12 import collections
13 import json
14 import logging
15 import os
16 import sys
17
18 # Internal dependencies
19 from Bam import Bam
20 from BedSimpleRepeats import BedSimpleRepeats
21 from Bed import Bed
22 from BigWig import BigWig
23 from util.Fasta import Fasta
24 from util.Filters import TraceBackFormatter
25 from Gff3 import Gff3
26 from Gtf import Gtf
27 from Psl import Psl
28 from TrackHub import TrackHub
29
30 # TODO: Verify each subprocessed dependency is accessible [gff3ToGenePred, genePredToBed, twoBitInfo, faToTwoBit, bedToBigBed, sort
31
32
33 def main(argv):
34 # Command Line parsing init
35 parser = argparse.ArgumentParser(description='Create a foo.txt inside the given folder.')
36
37 # Reference genome mandatory
38 parser.add_argument('-f', '--fasta', help='Fasta file of the reference genome')
39
40 # GFF3 Management
41 parser.add_argument('--gff3', action='append', help='GFF3 format')
42
43 # GTF Management
44 parser.add_argument('--gtf', action='append', help='GTF format')
45
46 # Bed4+12 (TrfBig)
47 parser.add_argument('--bedSimpleRepeats', action='append', help='Bed4+12 format, using simpleRepeats.as')
48
49 # Generic Bed (Blastx transformed to bed)
50 parser.add_argument('--bed', action='append', help='Bed generic format')
51
52 # BigWig Management
53 parser.add_argument('--bigwig', action='append', help='BigWig format')
54
55 # Bam Management
56 parser.add_argument('--bam', action='append', help='Bam format')
57
58 # Psl Management
59 parser.add_argument('--psl', action='append', help='Psl format')
60
61 # TODO: Check if the running directory can have issues if we run the tool outside
62 parser.add_argument('-d', '--directory',
63 help='Running tool directory, where to find the templates. Default is running directory')
64 parser.add_argument('-u', '--ucsc_tools_path',
65 help='Directory where to find the executables needed to run this tool')
66 parser.add_argument('-e', '--extra_files_path',
67 help='Name, in galaxy, of the output folder. Where you would want to build the Track Hub Archive')
68 parser.add_argument('-o', '--output', help='Name of the HTML summarizing the content of the Track Hub Archive')
69
70 parser.add_argument('-j', '--data_json', help='Json containing the metadata of the inputs')
71
72 parser.add_argument('--user_email', help='Email of the user who launched the Hub Archive Creation')
73
74 parser.add_argument('--genome_name', help='UCSC Genome Browser assembly ID')
75
76 parser.add_argument('--debug_mode', action='store_true', help='Allow more details about the errors')
77
78 # Begin init variables
79
80 toolDirectory = '.'
81 extra_files_path = '.'
82
83 # Get the args passed in parameter
84 args = parser.parse_args()
85
86 extra_files_path = args.extra_files_path
87 toolDirectory = args.directory
88
89 #### Logging management ####
90 # If we are in Debug mode, also print in stdout the debug dump
91
92 configure_logger(extra_files_path=extra_files_path, debug=args.debug_mode)
93
94 #### END Logging management ####
95
96 array_inputs_reference_genome = json.loads(args.fasta)
97
98 # TODO: Replace these with the object Fasta
99 input_fasta_file = array_inputs_reference_genome["false_path"]
100 input_fasta_file_name = sanitize_name_input(array_inputs_reference_genome["name"])
101 genome_name = sanitize_name_input(args.genome_name)
102
103 reference_genome = Fasta(input_fasta_file,
104 input_fasta_file_name, genome_name)
105
106 user_email = args.user_email
107
108
109 # TODO: Use a class to have a better management of the structure of these inputs
110 # These inputs are populated in the Galaxy Wrapper xml and are in this format:
111 # ARRAY[DICT{FILE_PATH: DICT{NAME: NAME_VALUE, EXTRA_DATA: EXTRA_DATA_VALUE}}]
112 # EXTRA_DATA could be anything, for example the index of a BAM => {"index", FILE_PATH}
113 array_inputs_bam = args.bam
114 array_inputs_bed_generic = args.bed
115 array_inputs_bed_simple_repeats = args.bedSimpleRepeats
116 array_inputs_bigwig = args.bigwig
117 array_inputs_gff3 = args.gff3
118 array_inputs_gtf = args.gtf
119 array_inputs_psl = args.psl
120
121 outputFile = args.output
122
123 json_inputs_data = args.data_json
124
125 inputs_data = json.loads(json_inputs_data)
126 # We remove the spaces in ["name"] of inputs_data
127 sanitize_name_inputs(inputs_data)
128
129 # TODO: Check here all the binaries / tools we need. Exception if missing
130
131 # Create the Track Hub folder
132 trackHub = TrackHub(reference_genome, user_email, outputFile, extra_files_path, toolDirectory)
133
134 all_datatype_dictionary = {}
135
136 for (inputs, datatype_class) in [
137 (array_inputs_bam, Bam),
138 (array_inputs_bed_generic, Bed),
139 (array_inputs_bigwig, BigWig),
140 (array_inputs_bed_simple_repeats, BedSimpleRepeats),
141 (array_inputs_gff3, Gff3),
142 (array_inputs_gtf, Gtf),
143 (array_inputs_psl, Psl)]:
144 if inputs:
145 all_datatype_dictionary.update(create_ordered_datatype_objects(datatype_class, inputs, inputs_data))
146
147 # Create Ordered Dictionary to add the tracks in the tool form order
148 all_datatype_ordered_dictionary = collections.OrderedDict(all_datatype_dictionary)
149
150 logging.debug("----- End of all_datatype_dictionary processing -----")
151 logging.debug("all_datatype_ordered_dictionary keys are: {0}".format(all_datatype_ordered_dictionary.values()))
152
153 logging.debug("----- Beginning of Track adding processing -----")
154 for index, datatypeObject in all_datatype_ordered_dictionary.iteritems():
155 trackHub.addTrack(datatypeObject.track.trackDb)
156 logging.debug("----- End of Track adding processing -----")
157
158 # We process all the modifications to create the zip file
159 #trackHub.createZip()
160
161 # We terminate le process and so create a HTML file summarizing all the files
162 trackHub.terminate()
163
164 logging.debug('#### End of HubArchiveCreator Debug Mode: Bye! ####')
165
166 sys.exit(0)
167
168
169 def sanitize_name_input(string_to_sanitize):
170 return string_to_sanitize \
171 .replace("/", "_") \
172 .replace(" ", "_")
173
174
175 def sanitize_name_inputs(inputs_data):
176 """
177 Sometimes output from Galaxy, or even just file name from user have spaces
178 Also, it can contain '/' character and could break the use of os.path function
179 :param inputs_data: dict[string, dict[string, string]]
180 :return:
181 """
182 for key in inputs_data:
183 inputs_data[key]["name"] = sanitize_name_input(inputs_data[key]["name"])
184
185
186 def create_ordered_datatype_objects(ExtensionClass, array_inputs, inputs_data):
187 """
188 Function which executes the creation all the necessary files / folders for a special Datatype, for TrackHub
189 and update the dictionary of datatype
190 :param ExtensionClass: T <= Datatype
191 :param array_inputs: list[string]
192 :param inputs_data:
193 """
194
195 datatype_dictionary = {}
196
197 # TODO: Optimize this double loop
198 for input_false_path in array_inputs:
199 for key, data_value in inputs_data.items():
200 if key == input_false_path:
201 logging.debug("input_false_path: " + input_false_path)
202 logging.debug("data_value: " + str(data_value))
203 extensionObject = ExtensionClass(input_false_path, data_value)
204 datatype_dictionary.update({data_value["order_index"]: extensionObject})
205 return datatype_dictionary
206
207 def configure_logger(extra_files_path=None, debug=False):
208 if not extra_files_path:
209 raise Exception("Extra files path is not set. Stopping the application")
210
211
212 # All case log: log everything in a .log file
213 logger_file_name = ''.join([__name__, '.log'])
214 logging_file_path = os.path.join(extra_files_path, logger_file_name)
215
216 logging.basicConfig(filename=logging_file_path, level=logging.DEBUG)
217
218 log_stdout = logging.StreamHandler(sys.stdout)
219 if not debug:
220 configure_logger_user(log_stdout)
221 else:
222 configure_logger_dev(log_stdout)
223
224 # stderr configuration
225 configure_logger_stderr()
226
227 logging.debug('#### Welcome in HubArchiveCreator Debug Mode ####\n')
228
229 def configure_logger_user(log_stdout=None):
230 """
231 User Logger is defined as following:
232 - User needs to have WARN, ERROR and CRITICAL but well formatted / without traceback
233 in STDOUT
234 - Still access to full, brute and traceback for errors
235 in STDERR
236 - And further access to debug if needed
237 in .log
238 :return:
239 """
240 if not log_stdout:
241 raise Exception("No log_stdout given. Stopping the application")
242
243 # stdout for INFO / WARN / ERROR / CRITICAL
244 log_stdout.setLevel(logging.INFO)
245
246 formatter = TraceBackFormatter('%(message)s')
247
248 log_stdout.setFormatter(formatter)
249
250 logging.getLogger().addHandler(log_stdout)
251
252 def configure_logger_dev(log_stdout=None):
253 """
254 Dev Logger is defined as following:
255 - Dev needs to have WARN, ERROR and CRITICAL but well formatted / without traceback, in stdout
256 - Still access to full, brute and traceback in stderr for errors
257 - And further access to debug if needed
258 :return:
259 """
260 if not log_stdout:
261 raise Exception("No log_stdout given. Stopping the application")
262 log_format = '%(message)s'
263
264 # stdout and stderr and both identical for INFO / WARN / ERROR / CRITICAL
265 log_stdout.setLevel(logging.DEBUG)
266
267 formatter = logging.Formatter(log_format)
268
269 log_stdout.setFormatter(formatter)
270
271 logging.getLogger().addHandler(log_stdout)
272
273 def configure_logger_stderr():
274 """
275 Configure what should be logged in stderr
276 :return:
277 """
278 log_error = logging.StreamHandler(sys.stderr)
279 log_error.setLevel(logging.ERROR)
280 log_error_format = '%(message)s'
281
282 formatter_error = logging.Formatter(log_error_format)
283
284 log_error.setFormatter(formatter_error)
285
286 logging.getLogger().addHandler(log_error)
287
288 if __name__ == "__main__":
289 logging.getLogger(__name__)
290 main(sys.argv)