6
|
1 #!/usr/bin/python
|
|
2 # -*- coding: utf8 -*-
|
|
3
|
|
4 """
|
|
5 This class handles the subprocess calls of the different tools used
|
|
6 in HubArchiveCreator
|
|
7 """
|
|
8
|
|
9 import logging
|
|
10 import os
|
|
11 import subprocess
|
|
12 import sys
|
|
13
|
|
14 class PopenError(Exception):
|
|
15 def __init__(self, cmd, error, return_code):
|
|
16 self.cmd = cmd
|
|
17 self.error = error
|
|
18 self.return_code = return_code
|
|
19
|
|
20 def __str__(self):
|
|
21 message = "The subprocess {0} has returned the error: {1}.".format(self.cmd, self.return_code)
|
|
22 message = ','.join((message, "Its error message is: {0}".format(self.error)))
|
|
23 return repr(message)
|
|
24
|
|
25 def _handleExceptionAndCheckCall(array_call, **kwargs):
|
|
26 """
|
|
27 This class handle exceptions and call the tool.
|
|
28 It maps the signature of subprocess.check_call:
|
|
29 See https://docs.python.org/2/library/subprocess.html#subprocess.check_call
|
|
30 """
|
|
31 stdout = kwargs.get('stdout', subprocess.PIPE)
|
|
32 stderr = kwargs.get('stderr', subprocess.PIPE)
|
|
33 shell = kwargs.get('shell', False)
|
|
34
|
|
35 cmd = array_call[0]
|
|
36
|
|
37 output = None
|
|
38 error = None
|
|
39
|
|
40 # TODO: Check the value of array_call and <=[0]
|
|
41 logging.debug("Calling {0}:".format(cmd))
|
|
42
|
|
43 logging.debug("---------")
|
|
44
|
|
45 # TODO: Use universal_newlines option from Popen?
|
|
46 try:
|
|
47 p = subprocess.Popen(array_call, stdout=stdout, stderr=stderr, shell=shell)
|
|
48
|
|
49 # TODO: Change this because of possible memory issues => https://docs.python.org/2/library/subprocess.html#subprocess.Popen.communicate
|
|
50
|
|
51 output, error = p.communicate()
|
|
52
|
|
53 if stdout == subprocess.PIPE:
|
|
54 logging.debug("\t{0}".format(output))
|
|
55 else:
|
|
56 logging.debug("\tOutput in file {0}".format(stdout.name))
|
|
57 # If we detect an error from the subprocess, then we raise an exception
|
|
58 # TODO: Manage if we raise an exception for everything, or use CRITICAL etc... but not stop process
|
|
59 # TODO: The responsability of returning a sys.exit() should not be there, but up in the app.
|
|
60 if p.returncode:
|
|
61 if stderr == subprocess.PIPE:
|
|
62 raise PopenError(cmd, error, p.returncode)
|
|
63 else:
|
|
64 # TODO: To Handle properly with a design behind, if we received a option as a file for the error
|
|
65 raise Exception("Error when calling {0}. Error as been logged in your file {1}. Error code: {2}"\
|
|
66 .format(cmd, stderr.name, p.returncode))
|
|
67
|
|
68 except OSError as e:
|
|
69 message = "The subprocess {0} has encountered an OSError: {1}".format(cmd, e.strerror)
|
|
70 if e.filename:
|
|
71 message = '\n'.join((message, ", against this file: {0}".format(e.filename)))
|
|
72 logging.error(message)
|
|
73 sys.exit(-1)
|
|
74 except PopenError as p:
|
|
75 message = "The subprocess {0} has returned the error: {1}.".format(p.cmd, p.return_code)
|
|
76 message = '\n'.join((message, "Its error message is: {0}".format(p.error)))
|
|
77
|
|
78 logging.exception(message)
|
|
79
|
|
80 sys.exit(p.return_code)
|
|
81 except Exception as e:
|
|
82 message = "The subprocess {0} has encountered an unknown error: {1}".format(cmd, e)
|
|
83 logging.exception(message)
|
|
84
|
|
85 sys.exit(-1)
|
|
86 return p
|
|
87
|
|
88 def twoBitInfo(two_bit_file_name, two_bit_info_file):
|
|
89 """
|
|
90 Call twoBitInfo and write the result into twoBit_info_file
|
|
91 :param two_bit_file_name:
|
|
92 :param two_bit_info_file:
|
|
93 :return the subprocess.check_call return object:
|
|
94 """
|
|
95 array_call = ['twoBitInfo', two_bit_file_name, two_bit_info_file]
|
|
96 p = _handleExceptionAndCheckCall(array_call)
|
|
97 return p
|
|
98
|
|
99 def faToTwoBit(fasta_file_name, twoBitFile):
|
|
100 """
|
|
101 This function call faToTwoBit UCSC tool, and return the twoBitFile
|
|
102 :param fasta_file_name:
|
|
103 :param mySpecieFolder:
|
|
104 :return:
|
|
105 """
|
|
106
|
|
107 array_call = ['faToTwoBit', fasta_file_name, twoBitFile]
|
|
108 _handleExceptionAndCheckCall(array_call)
|
|
109
|
|
110 return twoBitFile
|
|
111
|
|
112 def gtfToGenePred(input_gtf_file_name, gene_pred_file_name):
|
|
113 """
|
|
114 Call gtfToGenePred and write the result into gene_pred_file_name
|
|
115 :param input_gtf_file_name:
|
|
116 :param gene_pred_file_name:
|
|
117 :return:
|
|
118 """
|
|
119 array_call = ['gtfToGenePred', input_gtf_file_name, gene_pred_file_name]
|
|
120 p = _handleExceptionAndCheckCall(array_call)
|
|
121 return p
|
|
122
|
|
123 def gff3ToGenePred(input_gff3_file_name, gene_pred_file_name):
|
|
124 """
|
|
125 Call gff3ToGenePred and write the result into gene_pred_file_name
|
|
126 :param input_gff3_file_name:
|
|
127 :param gene_pred_file_name:
|
|
128 :return:
|
|
129 """
|
|
130 array_call = ['gff3ToGenePred', input_gff3_file_name, gene_pred_file_name]
|
|
131 p = _handleExceptionAndCheckCall(array_call)
|
|
132 return p
|
|
133
|
|
134 def genePredToBigGenePred(gene_pred_file_name, unsorted_bigGenePred_file_name):
|
|
135 """
|
|
136 Call genePredToBigGenePred and write the result into unsorted_bigGenePred_file_name
|
|
137 :param gene_pred_file_name:
|
|
138 :param unsorted_bigGenePred_file_name:
|
|
139 :return:
|
|
140 """
|
|
141 array_call = ['genePredToBigGenePred',
|
|
142 gene_pred_file_name,
|
|
143 unsorted_bigGenePred_file_name]
|
|
144 p = _handleExceptionAndCheckCall(array_call)
|
|
145 return p
|
|
146
|
|
147 def genePredToBed(gene_pred_file_name, unsorted_bed_file_name):
|
|
148 """
|
|
149 Call genePredToBed and write the result into unsorted_bed_file_name
|
|
150 :param gene_pred_file_name:
|
|
151 :param unsorted_bed_file_name:
|
|
152 :return:
|
|
153 """
|
|
154 array_call = ['genePredToBed', gene_pred_file_name, unsorted_bed_file_name]
|
|
155 p = _handleExceptionAndCheckCall(array_call)
|
|
156 return p
|
|
157
|
|
158 def sort(unsorted_bed_file_name, sorted_bed_file_name):
|
|
159 """
|
|
160 Call sort with -k1,1 -k2,2n on unsorted_bed_file_name and write the result into sorted_bed_file_name
|
|
161 :param unsorted_bed_file_name:
|
|
162 :param sorted_bed_file_name:
|
|
163 :return:
|
|
164 """
|
|
165 array_call = ['sort', '-k', '1,1', '-k', '2,2n', unsorted_bed_file_name, '-o', sorted_bed_file_name]
|
|
166 p = _handleExceptionAndCheckCall(array_call)
|
|
167 return p
|
|
168
|
|
169 def sortChromSizes(two_bit_info_file_name, chrom_sizes_file_name):
|
|
170 """
|
|
171 Call sort with -k2rn on two_bit_info_file_name and write the result into chrom_sizes_file_name
|
|
172 :param two_bit_info_file_name:
|
|
173 :param chrom_sizes_file_name:
|
|
174 :return:
|
|
175 """
|
|
176 array_call = ['sort', '-k2rn', two_bit_info_file_name, '-o', chrom_sizes_file_name]
|
|
177 p = _handleExceptionAndCheckCall(array_call)
|
|
178 return p
|
|
179
|
|
180 def bedToBigBed(sorted_bed_file_name, chrom_sizes_file_name, big_bed_file_name,
|
|
181 typeOption=None, autoSql=None, tab=False):
|
|
182 """
|
|
183 Call bedToBigBed on sorted_bed_file_name, using chrom_sizes_file_name and write the result into big_bed_file_name
|
|
184 :param sorted_bed_file_name:
|
|
185 :param chrom_sizes_file_name:
|
|
186 :param big_bed_file_name:
|
|
187 :return:
|
|
188 """
|
|
189
|
|
190 # TODO: Move this into the _handleExceptionAndCheckCall function
|
|
191 # Parse the array
|
|
192 logging.debug("sorted_bed_file_name: {0}".format(sorted_bed_file_name))
|
|
193 logging.debug("chrom_sizes_file_name: {0}".format(chrom_sizes_file_name))
|
|
194 logging.debug("big_bed_file_name: {0}".format(big_bed_file_name))
|
|
195 logging.debug("typeOption: {0}".format(typeOption))
|
|
196 logging.debug("autoSql: {0}".format(autoSql))
|
|
197 logging.debug("tab option: {0}".format(tab))
|
|
198
|
|
199 array_call = ['bedToBigBed', sorted_bed_file_name, chrom_sizes_file_name, big_bed_file_name]
|
|
200 if typeOption:
|
|
201 typeOption = ''.join(['-type=', typeOption])
|
|
202 array_call.append(typeOption)
|
|
203 if autoSql:
|
|
204 autoSql = ''.join(['-as=', autoSql])
|
|
205 array_call.append(autoSql)
|
|
206 if tab:
|
|
207 array_call.append('-tab')
|
|
208
|
|
209 p = _handleExceptionAndCheckCall(array_call)
|
|
210 return p
|
|
211
|
|
212 def sortBam(input_bam_file_name, output_sorted_bam_name):
|
|
213 """
|
|
214 Call samtools on input_bam_file_name and output the result in output_sorted_bam_name
|
|
215 :param input_bam_file_name:
|
|
216 :param output_sorted_bam_name:
|
|
217 :return:
|
|
218 """
|
|
219 array_call = ['samtools', 'sort', input_bam_file_name, '-o', output_sorted_bam_name]
|
|
220 p = _handleExceptionAndCheckCall(array_call)
|
|
221 return p
|
|
222
|
|
223 def createBamIndex(input_sorted_bam_file_name, output_name_index_name):
|
|
224 """
|
|
225 Call `samtools index` on imput_sorted_bam_file_name and output the result in output_name_index_name
|
|
226 :param input_sorted_bam_file_name:
|
|
227 :param output_name_index_name:
|
|
228 :return:
|
|
229 """
|
|
230 array_call = ['samtools', 'index', input_sorted_bam_file_name, output_name_index_name]
|
|
231 p = _handleExceptionAndCheckCall(array_call)
|
|
232 return p
|
|
233
|
|
234 def pslToBigPsl(input_psl_file_name, output_bed12_file_name):
|
|
235 """
|
|
236 Call `pslToBigPsl` on input_psl_file_name and output the result in output_bed12_file_name
|
|
237 :param input_psl_file_name: Name of the psl input file
|
|
238 :param output_bed12_file_name: Name of the output file where to store the result of the cmd
|
|
239 :return:
|
|
240 """
|
|
241 # The command to send
|
|
242 array_call = ['pslToBigPsl', input_psl_file_name, output_bed12_file_name]
|
|
243
|
|
244 p = _handleExceptionAndCheckCall(array_call)
|
|
245 return p
|