diff Bed.py @ 0:3e0c61b52a06 draft

planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
author yating-l
date Mon, 31 Oct 2016 16:36:25 -0400
parents
children 4ced8f116509
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Bed.py	Mon Oct 31 16:36:25 2016 -0400
@@ -0,0 +1,110 @@
+#!/usr/bin/python
+
+import os
+import tempfile
+
+# Internal dependencies
+from Datatype import Datatype
+from Track import Track
+from TrackDb import TrackDb
+from util import subtools
+
+
+class Bed( Datatype ):
+    def __init__( self, inputBedGeneric, data_bed_generic):
+        super(Bed, self).__init__()
+
+        self.track = None
+
+        self.inputBedGeneric = inputBedGeneric
+
+        self.sortedBedFile = tempfile.NamedTemporaryFile(suffix=".sortedBed")
+
+        self.data_bed_generic = data_bed_generic
+        self.name_bed_generic = self.data_bed_generic["name"]
+        self.priority = self.data_bed_generic["order_index"]
+        self.track_color = self.data_bed_generic["track_color"]
+        # TODO: Think about how to avoid repetition of the group_name everywhere
+        self.group_name = self.data_bed_generic["group_name"]
+   
+        modified = self._checkAndFixBed()
+        # Sort processing
+        subtools.sort(self.inputBedGeneric, self.sortedBedFile.name)
+        
+       
+
+
+        # bedToBigBed processing
+        # TODO: Change the name of the bb, to tool + genome + possible adding if multiple +  .bb
+        trackName = "".join( ( self.name_bed_generic, ".bb") )
+
+        myBigBedFilePath = os.path.join(self.myTrackFolderPath, trackName)
+        with open(myBigBedFilePath, 'w') as self.bigBedFile:
+            subtools.bedToBigBed(self.sortedBedFile.name,
+                                 self.chromSizesFile.name,
+                                 self.bigBedFile.name,
+                                 typeOption='bed12+1',
+                                 tab=True)
+        
+        # Create the Track Object
+        self.createTrack(file_path=trackName,
+                         track_name=trackName,
+                         long_label=self.name_bed_generic, track_type='bigBed', visibility='dense',
+                         priority=self.priority,
+                         track_file=myBigBedFilePath,
+                         track_color=self.track_color,
+                         group_name=self.group_name,
+                         )
+
+        # dataURL = "tracks/%s" % trackName
+        #
+        # trackDb = TrackDb(
+        #     trackName=trackName,
+        #     longLabel=self.name_bed_generic,
+        #     shortLabel=self.getShortName(self.name_bed_generic),
+        #     trackDataURL=dataURL,
+        #     trackType='bigBed',
+        #     visibility='dense',
+        #     thickDrawItem='on',
+        #     priority=self.priority,
+        # )
+        #
+        # # Return the BigBed track
+        # self.track = Track(
+        #     trackFile=myBigBedFilePath,
+        #     trackDb=trackDb,
+        # )
+
+        print("- Bed %s created" % self.name_bed_generic)
+        if (len(modified) != 0):
+            print("The lines that were removed: " + str(len(modified)))
+        #print("- %s created in %s" % (trackName, myBigBedFilePath))
+
+    # TODO: bed verifier, check if there are invalid strands: "." 
+    def _checkAndFixBed(self):
+        """
+        Call _checkAndFixBed, check the integrity of bed file, 
+        if the strand is not "+" or "-" truncate that line and report to users
+        create  column and move the score column to
+        """
+        # Store the lines that have been removed 
+        removedLines = []
+        # Remove the lines with invalid strand
+        temp_bed = tempfile.NamedTemporaryFile(bufsize=0, suffix=".bed", delete=False)
+        with open(temp_bed.name, 'w') as tmp:
+            with open(self.inputBedGeneric, 'r') as f:
+                lines = f.readlines()
+                for line in lines:
+                    fields = line.split()
+                    strand = fields[5]
+                    score = fields[4]
+                    fields[4] = '1000'
+                    fields.append(score)
+                    if (strand == '+' or strand == '-'):
+                        tmp.write('\t'.join(map(str, fields)))
+                        tmp.write("\n")
+                    else:
+                        removedLines.append(line)
+        self.inputBedGeneric = temp_bed.name
+            
+        return removedLines   
\ No newline at end of file