Mercurial > repos > yating-l > hubarchivecreator
diff Bed.py @ 0:3e0c61b52a06 draft
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
author | yating-l |
---|---|
date | Mon, 31 Oct 2016 16:36:25 -0400 |
parents | |
children | 4ced8f116509 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Bed.py Mon Oct 31 16:36:25 2016 -0400 @@ -0,0 +1,110 @@ +#!/usr/bin/python + +import os +import tempfile + +# Internal dependencies +from Datatype import Datatype +from Track import Track +from TrackDb import TrackDb +from util import subtools + + +class Bed( Datatype ): + def __init__( self, inputBedGeneric, data_bed_generic): + super(Bed, self).__init__() + + self.track = None + + self.inputBedGeneric = inputBedGeneric + + self.sortedBedFile = tempfile.NamedTemporaryFile(suffix=".sortedBed") + + self.data_bed_generic = data_bed_generic + self.name_bed_generic = self.data_bed_generic["name"] + self.priority = self.data_bed_generic["order_index"] + self.track_color = self.data_bed_generic["track_color"] + # TODO: Think about how to avoid repetition of the group_name everywhere + self.group_name = self.data_bed_generic["group_name"] + + modified = self._checkAndFixBed() + # Sort processing + subtools.sort(self.inputBedGeneric, self.sortedBedFile.name) + + + + + # bedToBigBed processing + # TODO: Change the name of the bb, to tool + genome + possible adding if multiple + .bb + trackName = "".join( ( self.name_bed_generic, ".bb") ) + + myBigBedFilePath = os.path.join(self.myTrackFolderPath, trackName) + with open(myBigBedFilePath, 'w') as self.bigBedFile: + subtools.bedToBigBed(self.sortedBedFile.name, + self.chromSizesFile.name, + self.bigBedFile.name, + typeOption='bed12+1', + tab=True) + + # Create the Track Object + self.createTrack(file_path=trackName, + track_name=trackName, + long_label=self.name_bed_generic, track_type='bigBed', visibility='dense', + priority=self.priority, + track_file=myBigBedFilePath, + track_color=self.track_color, + group_name=self.group_name, + ) + + # dataURL = "tracks/%s" % trackName + # + # trackDb = TrackDb( + # trackName=trackName, + # longLabel=self.name_bed_generic, + # shortLabel=self.getShortName(self.name_bed_generic), + # trackDataURL=dataURL, + # trackType='bigBed', + # visibility='dense', + # thickDrawItem='on', + # priority=self.priority, + # ) + # + # # Return the BigBed track + # self.track = Track( + # trackFile=myBigBedFilePath, + # trackDb=trackDb, + # ) + + print("- Bed %s created" % self.name_bed_generic) + if (len(modified) != 0): + print("The lines that were removed: " + str(len(modified))) + #print("- %s created in %s" % (trackName, myBigBedFilePath)) + + # TODO: bed verifier, check if there are invalid strands: "." + def _checkAndFixBed(self): + """ + Call _checkAndFixBed, check the integrity of bed file, + if the strand is not "+" or "-" truncate that line and report to users + create column and move the score column to + """ + # Store the lines that have been removed + removedLines = [] + # Remove the lines with invalid strand + temp_bed = tempfile.NamedTemporaryFile(bufsize=0, suffix=".bed", delete=False) + with open(temp_bed.name, 'w') as tmp: + with open(self.inputBedGeneric, 'r') as f: + lines = f.readlines() + for line in lines: + fields = line.split() + strand = fields[5] + score = fields[4] + fields[4] = '1000' + fields.append(score) + if (strand == '+' or strand == '-'): + tmp.write('\t'.join(map(str, fields))) + tmp.write("\n") + else: + removedLines.append(line) + self.inputBedGeneric = temp_bed.name + + return removedLines \ No newline at end of file