comparison util/subtools.py @ 1:85195e0d4b71 draft

planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit b1ae7349f118a0fe7923d765020dfc684cf84116-dirty
author yating-l
date Fri, 29 Sep 2017 13:32:23 -0400
parents
children
comparison
equal deleted inserted replaced
0:f9ccc5ad1713 1:85195e0d4b71
1 #!/usr/bin/python
2 # -*- coding: utf8 -*-
3
4 """
5 This class handles the subprocess calls of the different tools used
6 in HubArchiveCreator
7 """
8
9 import logging
10 import os
11 import subprocess
12 import sys
13 import string
14 import tempfile
15
16
17 class PopenError(Exception):
18 def __init__(self, cmd, error, return_code):
19 self.cmd = cmd
20 self.error = error
21 self.return_code = return_code
22
23 def __str__(self):
24 message = "The subprocess {0} has returned the error: {1}.".format(
25 self.cmd, self.return_code)
26 message = ','.join(
27 (message, "Its error message is: {0}".format(self.error)))
28 return repr(message)
29
30
31 def _handleExceptionAndCheckCall(array_call, **kwargs):
32 """
33 This class handle exceptions and call the tool.
34 It maps the signature of subprocess.check_call:
35 See https://docs.python.org/2/library/subprocess.html#subprocess.check_call
36 """
37 stdout = kwargs.get('stdout', subprocess.PIPE)
38 stderr = kwargs.get('stderr', subprocess.PIPE)
39 shell = kwargs.get('shell', False)
40
41 cmd = array_call[0]
42
43 output = None
44 error = None
45
46 # TODO: Check the value of array_call and <=[0]
47 logging.debug("Calling {0}:".format(cmd))
48 logging.debug("%s", array_call)
49 logging.debug("---------")
50
51 # TODO: Use universal_newlines option from Popen?
52 try:
53 p = subprocess.Popen(array_call, stdout=stdout,
54 stderr=stderr, shell=shell)
55
56 # TODO: Change this because of possible memory issues => https://docs.python.org/2/library/subprocess.html#subprocess.Popen.communicate
57
58 output, error = p.communicate()
59
60 if stdout == subprocess.PIPE:
61 logging.debug("\t{0}".format(output))
62 else:
63 logging.debug("\tOutput in file {0}".format(stdout.name))
64 # If we detect an error from the subprocess, then we raise an exception
65 # TODO: Manage if we raise an exception for everything, or use CRITICAL etc... but not stop process
66 # TODO: The responsability of returning a sys.exit() should not be there, but up in the app.
67 if p.returncode:
68 if stderr == subprocess.PIPE:
69 raise PopenError(cmd, error, p.returncode)
70 else:
71 # TODO: To Handle properly with a design behind, if we received a option as a file for the error
72 raise Exception("Error when calling {0}. Error as been logged in your file {1}. Error code: {2}"
73 .format(cmd, stderr.name, p.returncode))
74
75 except OSError as e:
76 message = "The subprocess {0} has encountered an OSError: {1}".format(
77 cmd, e.strerror)
78 if e.filename:
79 message = '\n'.join(
80 (message, ", against this file: {0}".format(e.filename)))
81 logging.error(message)
82 sys.exit(-1)
83 except PopenError as p:
84 message = "The subprocess {0} has returned the error: {1}.".format(
85 p.cmd, p.return_code)
86 message = '\n'.join(
87 (message, "Its error message is: {0}".format(p.error)))
88
89 logging.exception(message)
90
91 sys.exit(p.return_code)
92 except Exception as e:
93 message = "The subprocess {0} has encountered an unknown error: {1}".format(
94 cmd, e)
95 logging.exception(message)
96
97 sys.exit(-1)
98 return p
99
100
101 def twoBitInfo(two_bit_file_name, two_bit_info_file):
102 """
103 Call twoBitInfo and write the result into twoBit_info_file
104 :param two_bit_file_name:
105 :param two_bit_info_file:
106 :return the subprocess.check_call return object:
107 """
108 array_call = ['twoBitInfo', two_bit_file_name, two_bit_info_file]
109 p = _handleExceptionAndCheckCall(array_call)
110 return p
111
112
113 def faToTwoBit(fasta_file_name, twoBitFile):
114 """
115 This function call faToTwoBit UCSC tool, and return the twoBitFile
116 :param fasta_file_name:
117 :param mySpecieFolder:
118 :return:
119 """
120
121 array_call = ['faToTwoBit', fasta_file_name, twoBitFile]
122 _handleExceptionAndCheckCall(array_call)
123
124 return twoBitFile
125
126
127 def gtfToGenePred(input_gtf_file_name, gene_pred_file_name):
128 """
129 Call gtfToGenePred and write the result into gene_pred_file_name
130 :param input_gtf_file_name:
131 :param gene_pred_file_name:
132 :return:
133 """
134 array_call = ['gtfToGenePred', input_gtf_file_name, gene_pred_file_name]
135 p = _handleExceptionAndCheckCall(array_call)
136 return p
137
138
139 def gff3ToGenePred(input_gff3_file_name, gene_pred_file_name):
140 """
141 Call gff3ToGenePred and write the result into gene_pred_file_name
142 :param input_gff3_file_name:
143 :param gene_pred_file_name:
144 :return:
145 """
146 array_call = ['gff3ToGenePred', input_gff3_file_name, gene_pred_file_name]
147 p = _handleExceptionAndCheckCall(array_call)
148 return p
149
150
151 def genePredToBigGenePred(gene_pred_file_name, unsorted_bigGenePred_file_name):
152 """
153 Call genePredToBigGenePred and write the result into unsorted_bigGenePred_file_name
154 :param gene_pred_file_name:
155 :param unsorted_bigGenePred_file_name:
156 :return:
157 """
158 array_call = ['genePredToBigGenePred',
159 gene_pred_file_name,
160 unsorted_bigGenePred_file_name]
161 p = _handleExceptionAndCheckCall(array_call)
162 return p
163
164
165 def genePredToBed(gene_pred_file_name, unsorted_bed_file_name):
166 """
167 Call genePredToBed and write the result into unsorted_bed_file_name
168 :param gene_pred_file_name:
169 :param unsorted_bed_file_name:
170 :return:
171 """
172 array_call = ['genePredToBed', gene_pred_file_name, unsorted_bed_file_name]
173 p = _handleExceptionAndCheckCall(array_call)
174 return p
175
176
177 def sort(unsorted_bed_file_name, sorted_bed_file_name):
178 """
179 Call sort with -k1,1 -k2,2n on unsorted_bed_file_name and write the result into sorted_bed_file_name
180 :param unsorted_bed_file_name:
181 :param sorted_bed_file_name:
182 :return:
183 """
184 array_call = ['sort', '-k', '1,1', '-k', '2,2n',
185 unsorted_bed_file_name, '-o', sorted_bed_file_name]
186 p = _handleExceptionAndCheckCall(array_call)
187 return p
188
189
190 def sortChromSizes(two_bit_info_file_name, chrom_sizes_file_name):
191 """
192 Call sort with -k2rn on two_bit_info_file_name and write the result into chrom_sizes_file_name
193 :param two_bit_info_file_name:
194 :param chrom_sizes_file_name:
195 :return:
196 """
197 array_call = ['sort', '-k2rn', two_bit_info_file_name,
198 '-o', chrom_sizes_file_name]
199 p = _handleExceptionAndCheckCall(array_call)
200 return p
201
202
203 def bedToBigBed(sorted_bed_file_name, chrom_sizes_file_name, big_bed_file_name, options=None):
204 """
205 Call bedToBigBed on sorted_bed_file_name, using chrom_sizes_file_name and write the result into big_bed_file_name
206 :param sorted_bed_file_name:
207 :param chrom_sizes_file_name:
208 :param big_bed_file_name:
209 :return:
210 """
211
212 array_call = ['bedToBigBed', sorted_bed_file_name,
213 chrom_sizes_file_name, big_bed_file_name]
214 if options:
215 typeOption = options.get("typeOption")
216 autoSql = options.get("autoSql")
217 tab = options.get("tab")
218 extraIndex = options.get("extraIndex")
219 if typeOption:
220 typeOption = ''.join(['-type=', typeOption])
221 array_call.append(typeOption)
222 if autoSql:
223 autoSql = ''.join(['-as=', autoSql])
224 array_call.append(autoSql)
225 if tab:
226 array_call.append('-tab')
227 if extraIndex:
228 index = ''.join(['-extraIndex=', extraIndex])
229 array_call.append(index)
230 p = _handleExceptionAndCheckCall(array_call)
231 return p
232
233 def sortBam(input_bam_file_name, output_sorted_bam_name):
234 """
235 Call samtools on input_bam_file_name and output the result in output_sorted_bam_name
236 :param input_bam_file_name:
237 :param output_sorted_bam_name:
238 :return:
239 """
240 array_call = ['samtools', 'sort',
241 input_bam_file_name, '-o', output_sorted_bam_name]
242 p = _handleExceptionAndCheckCall(array_call)
243 return p
244
245
246 def createBamIndex(input_sorted_bam_file_name, output_name_index_name):
247 """
248 Call `samtools index` on imput_sorted_bam_file_name and output the result in output_name_index_name
249 :param input_sorted_bam_file_name:
250 :param output_name_index_name:
251 :return:
252 """
253 array_call = ['samtools', 'index',
254 input_sorted_bam_file_name, output_name_index_name]
255 p = _handleExceptionAndCheckCall(array_call)
256 return p
257
258
259 def pslToBigPsl(input_psl_file_name, output_bed12_file_name):
260 """
261 Call `pslToBigPsl` on input_psl_file_name and output the result in output_bed12_file_name
262 :param input_psl_file_name: Name of the psl input file
263 :param output_bed12_file_name: Name of the output file where to store the result of the cmd
264 :return:
265 """
266 # The command to send
267 array_call = ['pslToBigPsl', input_psl_file_name, output_bed12_file_name]
268
269 p = _handleExceptionAndCheckCall(array_call)
270 return p
271
272 # santitize trackName. Because track name must begin with a letter and
273 # contain only the following chars: [a-zA-Z0-9_].
274 # See the "track" Common settings at:
275 # https://genome.ucsc.edu/goldenpath/help/trackDb/trackDbHub.html#bigPsl_-_Pairwise_Alignments
276
277 def validateFiles(input_file, chrom_sizes_file_name, file_type, options=None):
278 """
279 Call validateFiles on input_file, using chrom_sizes_file_name and file_type
280 :param input_file:
281 :param chrom_sizes_file_name:
282 :param file_type:
283 :return:
284 """
285
286 array_call = ['validateFiles', '-chromInfo=' + chrom_sizes_file_name, '-type='+ file_type, input_file]
287 if options:
288 tab = options.get("tab")
289 autoSql = options.get("autoSql")
290 logging.debug("tab: {0}".format(tab))
291 logging.debug("autoSql: {0}".format(autoSql))
292 if autoSql:
293 autoSql = ''.join(['-as=', autoSql])
294 array_call.append(autoSql)
295 if tab:
296 array_call.append('-tab')
297 p = _handleExceptionAndCheckCall(array_call)
298 return p
299
300 def pslCheck(input_file, options=None):
301 """
302 Call pslCheck on input_file
303 :param input_file:
304 :return:
305 """
306
307 array_call = ['pslCheck', input_file]
308 p = _handleExceptionAndCheckCall(array_call)
309 return p
310
311
312