diff util/subtools.py @ 38:d17f629f5486 draft

planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit e4c1d387db160a3bf4a1e8abc288bdffbbbe2818-dirty
author yating-l
date Fri, 06 Apr 2018 13:44:56 -0400
parents 68466f5064ce
children 4a69515eed63
line wrap: on
line diff
--- a/util/subtools.py	Thu Feb 15 16:54:36 2018 -0500
+++ b/util/subtools.py	Fri Apr 06 13:44:56 2018 -0400
@@ -9,7 +9,7 @@
 import os
 import sys
 import tempfile
-import string
+import shutil
 import logging
 
 class PopenError(Exception):
@@ -229,6 +229,58 @@
     else:
         raise ValueError('Did not find bai file')
 
+def createFastaIndex(fastaFile):
+    subprocess.call(['samtools', 'faidx', fastaFile])
+    filename = fastaFile + '.fai'
+    if os.path.exists(filename):
+        return filename
+    else:
+        raise ValueError('Did not find fai file')
+
+def gff3sort(inputFile, outputFile, precise=False):
+    array_call = ['gff3sort.pl', inputFile, '>', outputFile]
+    if precise:
+        array_call.append('--precise')
+    p = _handleExceptionAndCheckCall(array_call)
+    return p
+
+def bedSort(inputFile, outputFile):
+    array_call = ['sort', '-k1,1', '-k2,2n', '-k6,6', inputFile, '>', outputFile]
+    p = _handleExceptionAndCheckCall(array_call)
+    return p
+
+def bgzip(inputFile):
+    subprocess.call(['bgzip', inputFile])
+    filename = inputFile + '.gz'
+    if os.path.exists(filename):
+        return filename
+    else:
+        raise ValueError('Did not find gz file')
+
+def createTabix(inputFile, dataType):
+    subprocess.call(['tabix', '-p', dataType, inputFile])
+    filename = inputFile + '.tbi'
+    if os.path.exists(filename):
+        return filename
+    else:
+        raise ValueError('Did not find tbi file')
+
+def generate_tabix_indexed_track(inputFile, dataType, outputFolder):
+    if "bed" in dataType:
+        fileType = 'bed'
+        sortedFile = tempfile.NamedTemporaryFile(bufsize=0)
+        bedSort(inputFile, sortedFile.name)
+    elif "gff" in dataType:
+        fileType = 'gff'
+        sortedFile = tempfile.NamedTemporaryFile(bufsize=0)
+        gff3sort(inputFile, sortedFile.name)
+    compressedFile = bgzip(sortedFile)
+    tabixFile = createTabix(compressedFile, fileType)
+    trackPath = os.path.join(outputFolder, inputFile)
+    trackIndexPath = os.path.join(outputFolder, inputFile+'.tbi')
+    shutil.copy(compressedFile, trackPath)
+    shutil.copy(tabixFile, trackIndexPath)
+
 def flatfile_to_json(inputFile, dataType, trackType, trackLabel, outputFolder, options=None, compress=True):
     if "bed" in dataType:
         fileType = "--bed"