Mercurial > repos > yating-l > hubarchivecreatortest
comparison util/subtools.py @ 1:85195e0d4b71 draft
planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit b1ae7349f118a0fe7923d765020dfc684cf84116-dirty
| author | yating-l |
|---|---|
| date | Fri, 29 Sep 2017 13:32:23 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 0:f9ccc5ad1713 | 1:85195e0d4b71 |
|---|---|
| 1 #!/usr/bin/python | |
| 2 # -*- coding: utf8 -*- | |
| 3 | |
| 4 """ | |
| 5 This class handles the subprocess calls of the different tools used | |
| 6 in HubArchiveCreator | |
| 7 """ | |
| 8 | |
| 9 import logging | |
| 10 import os | |
| 11 import subprocess | |
| 12 import sys | |
| 13 import string | |
| 14 import tempfile | |
| 15 | |
| 16 | |
| 17 class PopenError(Exception): | |
| 18 def __init__(self, cmd, error, return_code): | |
| 19 self.cmd = cmd | |
| 20 self.error = error | |
| 21 self.return_code = return_code | |
| 22 | |
| 23 def __str__(self): | |
| 24 message = "The subprocess {0} has returned the error: {1}.".format( | |
| 25 self.cmd, self.return_code) | |
| 26 message = ','.join( | |
| 27 (message, "Its error message is: {0}".format(self.error))) | |
| 28 return repr(message) | |
| 29 | |
| 30 | |
| 31 def _handleExceptionAndCheckCall(array_call, **kwargs): | |
| 32 """ | |
| 33 This class handle exceptions and call the tool. | |
| 34 It maps the signature of subprocess.check_call: | |
| 35 See https://docs.python.org/2/library/subprocess.html#subprocess.check_call | |
| 36 """ | |
| 37 stdout = kwargs.get('stdout', subprocess.PIPE) | |
| 38 stderr = kwargs.get('stderr', subprocess.PIPE) | |
| 39 shell = kwargs.get('shell', False) | |
| 40 | |
| 41 cmd = array_call[0] | |
| 42 | |
| 43 output = None | |
| 44 error = None | |
| 45 | |
| 46 # TODO: Check the value of array_call and <=[0] | |
| 47 logging.debug("Calling {0}:".format(cmd)) | |
| 48 logging.debug("%s", array_call) | |
| 49 logging.debug("---------") | |
| 50 | |
| 51 # TODO: Use universal_newlines option from Popen? | |
| 52 try: | |
| 53 p = subprocess.Popen(array_call, stdout=stdout, | |
| 54 stderr=stderr, shell=shell) | |
| 55 | |
| 56 # TODO: Change this because of possible memory issues => https://docs.python.org/2/library/subprocess.html#subprocess.Popen.communicate | |
| 57 | |
| 58 output, error = p.communicate() | |
| 59 | |
| 60 if stdout == subprocess.PIPE: | |
| 61 logging.debug("\t{0}".format(output)) | |
| 62 else: | |
| 63 logging.debug("\tOutput in file {0}".format(stdout.name)) | |
| 64 # If we detect an error from the subprocess, then we raise an exception | |
| 65 # TODO: Manage if we raise an exception for everything, or use CRITICAL etc... but not stop process | |
| 66 # TODO: The responsability of returning a sys.exit() should not be there, but up in the app. | |
| 67 if p.returncode: | |
| 68 if stderr == subprocess.PIPE: | |
| 69 raise PopenError(cmd, error, p.returncode) | |
| 70 else: | |
| 71 # TODO: To Handle properly with a design behind, if we received a option as a file for the error | |
| 72 raise Exception("Error when calling {0}. Error as been logged in your file {1}. Error code: {2}" | |
| 73 .format(cmd, stderr.name, p.returncode)) | |
| 74 | |
| 75 except OSError as e: | |
| 76 message = "The subprocess {0} has encountered an OSError: {1}".format( | |
| 77 cmd, e.strerror) | |
| 78 if e.filename: | |
| 79 message = '\n'.join( | |
| 80 (message, ", against this file: {0}".format(e.filename))) | |
| 81 logging.error(message) | |
| 82 sys.exit(-1) | |
| 83 except PopenError as p: | |
| 84 message = "The subprocess {0} has returned the error: {1}.".format( | |
| 85 p.cmd, p.return_code) | |
| 86 message = '\n'.join( | |
| 87 (message, "Its error message is: {0}".format(p.error))) | |
| 88 | |
| 89 logging.exception(message) | |
| 90 | |
| 91 sys.exit(p.return_code) | |
| 92 except Exception as e: | |
| 93 message = "The subprocess {0} has encountered an unknown error: {1}".format( | |
| 94 cmd, e) | |
| 95 logging.exception(message) | |
| 96 | |
| 97 sys.exit(-1) | |
| 98 return p | |
| 99 | |
| 100 | |
| 101 def twoBitInfo(two_bit_file_name, two_bit_info_file): | |
| 102 """ | |
| 103 Call twoBitInfo and write the result into twoBit_info_file | |
| 104 :param two_bit_file_name: | |
| 105 :param two_bit_info_file: | |
| 106 :return the subprocess.check_call return object: | |
| 107 """ | |
| 108 array_call = ['twoBitInfo', two_bit_file_name, two_bit_info_file] | |
| 109 p = _handleExceptionAndCheckCall(array_call) | |
| 110 return p | |
| 111 | |
| 112 | |
| 113 def faToTwoBit(fasta_file_name, twoBitFile): | |
| 114 """ | |
| 115 This function call faToTwoBit UCSC tool, and return the twoBitFile | |
| 116 :param fasta_file_name: | |
| 117 :param mySpecieFolder: | |
| 118 :return: | |
| 119 """ | |
| 120 | |
| 121 array_call = ['faToTwoBit', fasta_file_name, twoBitFile] | |
| 122 _handleExceptionAndCheckCall(array_call) | |
| 123 | |
| 124 return twoBitFile | |
| 125 | |
| 126 | |
| 127 def gtfToGenePred(input_gtf_file_name, gene_pred_file_name): | |
| 128 """ | |
| 129 Call gtfToGenePred and write the result into gene_pred_file_name | |
| 130 :param input_gtf_file_name: | |
| 131 :param gene_pred_file_name: | |
| 132 :return: | |
| 133 """ | |
| 134 array_call = ['gtfToGenePred', input_gtf_file_name, gene_pred_file_name] | |
| 135 p = _handleExceptionAndCheckCall(array_call) | |
| 136 return p | |
| 137 | |
| 138 | |
| 139 def gff3ToGenePred(input_gff3_file_name, gene_pred_file_name): | |
| 140 """ | |
| 141 Call gff3ToGenePred and write the result into gene_pred_file_name | |
| 142 :param input_gff3_file_name: | |
| 143 :param gene_pred_file_name: | |
| 144 :return: | |
| 145 """ | |
| 146 array_call = ['gff3ToGenePred', input_gff3_file_name, gene_pred_file_name] | |
| 147 p = _handleExceptionAndCheckCall(array_call) | |
| 148 return p | |
| 149 | |
| 150 | |
| 151 def genePredToBigGenePred(gene_pred_file_name, unsorted_bigGenePred_file_name): | |
| 152 """ | |
| 153 Call genePredToBigGenePred and write the result into unsorted_bigGenePred_file_name | |
| 154 :param gene_pred_file_name: | |
| 155 :param unsorted_bigGenePred_file_name: | |
| 156 :return: | |
| 157 """ | |
| 158 array_call = ['genePredToBigGenePred', | |
| 159 gene_pred_file_name, | |
| 160 unsorted_bigGenePred_file_name] | |
| 161 p = _handleExceptionAndCheckCall(array_call) | |
| 162 return p | |
| 163 | |
| 164 | |
| 165 def genePredToBed(gene_pred_file_name, unsorted_bed_file_name): | |
| 166 """ | |
| 167 Call genePredToBed and write the result into unsorted_bed_file_name | |
| 168 :param gene_pred_file_name: | |
| 169 :param unsorted_bed_file_name: | |
| 170 :return: | |
| 171 """ | |
| 172 array_call = ['genePredToBed', gene_pred_file_name, unsorted_bed_file_name] | |
| 173 p = _handleExceptionAndCheckCall(array_call) | |
| 174 return p | |
| 175 | |
| 176 | |
| 177 def sort(unsorted_bed_file_name, sorted_bed_file_name): | |
| 178 """ | |
| 179 Call sort with -k1,1 -k2,2n on unsorted_bed_file_name and write the result into sorted_bed_file_name | |
| 180 :param unsorted_bed_file_name: | |
| 181 :param sorted_bed_file_name: | |
| 182 :return: | |
| 183 """ | |
| 184 array_call = ['sort', '-k', '1,1', '-k', '2,2n', | |
| 185 unsorted_bed_file_name, '-o', sorted_bed_file_name] | |
| 186 p = _handleExceptionAndCheckCall(array_call) | |
| 187 return p | |
| 188 | |
| 189 | |
| 190 def sortChromSizes(two_bit_info_file_name, chrom_sizes_file_name): | |
| 191 """ | |
| 192 Call sort with -k2rn on two_bit_info_file_name and write the result into chrom_sizes_file_name | |
| 193 :param two_bit_info_file_name: | |
| 194 :param chrom_sizes_file_name: | |
| 195 :return: | |
| 196 """ | |
| 197 array_call = ['sort', '-k2rn', two_bit_info_file_name, | |
| 198 '-o', chrom_sizes_file_name] | |
| 199 p = _handleExceptionAndCheckCall(array_call) | |
| 200 return p | |
| 201 | |
| 202 | |
| 203 def bedToBigBed(sorted_bed_file_name, chrom_sizes_file_name, big_bed_file_name, options=None): | |
| 204 """ | |
| 205 Call bedToBigBed on sorted_bed_file_name, using chrom_sizes_file_name and write the result into big_bed_file_name | |
| 206 :param sorted_bed_file_name: | |
| 207 :param chrom_sizes_file_name: | |
| 208 :param big_bed_file_name: | |
| 209 :return: | |
| 210 """ | |
| 211 | |
| 212 array_call = ['bedToBigBed', sorted_bed_file_name, | |
| 213 chrom_sizes_file_name, big_bed_file_name] | |
| 214 if options: | |
| 215 typeOption = options.get("typeOption") | |
| 216 autoSql = options.get("autoSql") | |
| 217 tab = options.get("tab") | |
| 218 extraIndex = options.get("extraIndex") | |
| 219 if typeOption: | |
| 220 typeOption = ''.join(['-type=', typeOption]) | |
| 221 array_call.append(typeOption) | |
| 222 if autoSql: | |
| 223 autoSql = ''.join(['-as=', autoSql]) | |
| 224 array_call.append(autoSql) | |
| 225 if tab: | |
| 226 array_call.append('-tab') | |
| 227 if extraIndex: | |
| 228 index = ''.join(['-extraIndex=', extraIndex]) | |
| 229 array_call.append(index) | |
| 230 p = _handleExceptionAndCheckCall(array_call) | |
| 231 return p | |
| 232 | |
| 233 def sortBam(input_bam_file_name, output_sorted_bam_name): | |
| 234 """ | |
| 235 Call samtools on input_bam_file_name and output the result in output_sorted_bam_name | |
| 236 :param input_bam_file_name: | |
| 237 :param output_sorted_bam_name: | |
| 238 :return: | |
| 239 """ | |
| 240 array_call = ['samtools', 'sort', | |
| 241 input_bam_file_name, '-o', output_sorted_bam_name] | |
| 242 p = _handleExceptionAndCheckCall(array_call) | |
| 243 return p | |
| 244 | |
| 245 | |
| 246 def createBamIndex(input_sorted_bam_file_name, output_name_index_name): | |
| 247 """ | |
| 248 Call `samtools index` on imput_sorted_bam_file_name and output the result in output_name_index_name | |
| 249 :param input_sorted_bam_file_name: | |
| 250 :param output_name_index_name: | |
| 251 :return: | |
| 252 """ | |
| 253 array_call = ['samtools', 'index', | |
| 254 input_sorted_bam_file_name, output_name_index_name] | |
| 255 p = _handleExceptionAndCheckCall(array_call) | |
| 256 return p | |
| 257 | |
| 258 | |
| 259 def pslToBigPsl(input_psl_file_name, output_bed12_file_name): | |
| 260 """ | |
| 261 Call `pslToBigPsl` on input_psl_file_name and output the result in output_bed12_file_name | |
| 262 :param input_psl_file_name: Name of the psl input file | |
| 263 :param output_bed12_file_name: Name of the output file where to store the result of the cmd | |
| 264 :return: | |
| 265 """ | |
| 266 # The command to send | |
| 267 array_call = ['pslToBigPsl', input_psl_file_name, output_bed12_file_name] | |
| 268 | |
| 269 p = _handleExceptionAndCheckCall(array_call) | |
| 270 return p | |
| 271 | |
| 272 # santitize trackName. Because track name must begin with a letter and | |
| 273 # contain only the following chars: [a-zA-Z0-9_]. | |
| 274 # See the "track" Common settings at: | |
| 275 # https://genome.ucsc.edu/goldenpath/help/trackDb/trackDbHub.html#bigPsl_-_Pairwise_Alignments | |
| 276 | |
| 277 def validateFiles(input_file, chrom_sizes_file_name, file_type, options=None): | |
| 278 """ | |
| 279 Call validateFiles on input_file, using chrom_sizes_file_name and file_type | |
| 280 :param input_file: | |
| 281 :param chrom_sizes_file_name: | |
| 282 :param file_type: | |
| 283 :return: | |
| 284 """ | |
| 285 | |
| 286 array_call = ['validateFiles', '-chromInfo=' + chrom_sizes_file_name, '-type='+ file_type, input_file] | |
| 287 if options: | |
| 288 tab = options.get("tab") | |
| 289 autoSql = options.get("autoSql") | |
| 290 logging.debug("tab: {0}".format(tab)) | |
| 291 logging.debug("autoSql: {0}".format(autoSql)) | |
| 292 if autoSql: | |
| 293 autoSql = ''.join(['-as=', autoSql]) | |
| 294 array_call.append(autoSql) | |
| 295 if tab: | |
| 296 array_call.append('-tab') | |
| 297 p = _handleExceptionAndCheckCall(array_call) | |
| 298 return p | |
| 299 | |
| 300 def pslCheck(input_file, options=None): | |
| 301 """ | |
| 302 Call pslCheck on input_file | |
| 303 :param input_file: | |
| 304 :return: | |
| 305 """ | |
| 306 | |
| 307 array_call = ['pslCheck', input_file] | |
| 308 p = _handleExceptionAndCheckCall(array_call) | |
| 309 return p | |
| 310 | |
| 311 | |
| 312 |
