Mercurial > repos > yating-l > hubarchivecreator
comparison Bed.py @ 0:3e0c61b52a06 draft
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
author | yating-l |
---|---|
date | Mon, 31 Oct 2016 16:36:25 -0400 |
parents | |
children | 4ced8f116509 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:3e0c61b52a06 |
---|---|
1 #!/usr/bin/python | |
2 | |
3 import os | |
4 import tempfile | |
5 | |
6 # Internal dependencies | |
7 from Datatype import Datatype | |
8 from Track import Track | |
9 from TrackDb import TrackDb | |
10 from util import subtools | |
11 | |
12 | |
13 class Bed( Datatype ): | |
14 def __init__( self, inputBedGeneric, data_bed_generic): | |
15 super(Bed, self).__init__() | |
16 | |
17 self.track = None | |
18 | |
19 self.inputBedGeneric = inputBedGeneric | |
20 | |
21 self.sortedBedFile = tempfile.NamedTemporaryFile(suffix=".sortedBed") | |
22 | |
23 self.data_bed_generic = data_bed_generic | |
24 self.name_bed_generic = self.data_bed_generic["name"] | |
25 self.priority = self.data_bed_generic["order_index"] | |
26 self.track_color = self.data_bed_generic["track_color"] | |
27 # TODO: Think about how to avoid repetition of the group_name everywhere | |
28 self.group_name = self.data_bed_generic["group_name"] | |
29 | |
30 modified = self._checkAndFixBed() | |
31 # Sort processing | |
32 subtools.sort(self.inputBedGeneric, self.sortedBedFile.name) | |
33 | |
34 | |
35 | |
36 | |
37 # bedToBigBed processing | |
38 # TODO: Change the name of the bb, to tool + genome + possible adding if multiple + .bb | |
39 trackName = "".join( ( self.name_bed_generic, ".bb") ) | |
40 | |
41 myBigBedFilePath = os.path.join(self.myTrackFolderPath, trackName) | |
42 with open(myBigBedFilePath, 'w') as self.bigBedFile: | |
43 subtools.bedToBigBed(self.sortedBedFile.name, | |
44 self.chromSizesFile.name, | |
45 self.bigBedFile.name, | |
46 typeOption='bed12+1', | |
47 tab=True) | |
48 | |
49 # Create the Track Object | |
50 self.createTrack(file_path=trackName, | |
51 track_name=trackName, | |
52 long_label=self.name_bed_generic, track_type='bigBed', visibility='dense', | |
53 priority=self.priority, | |
54 track_file=myBigBedFilePath, | |
55 track_color=self.track_color, | |
56 group_name=self.group_name, | |
57 ) | |
58 | |
59 # dataURL = "tracks/%s" % trackName | |
60 # | |
61 # trackDb = TrackDb( | |
62 # trackName=trackName, | |
63 # longLabel=self.name_bed_generic, | |
64 # shortLabel=self.getShortName(self.name_bed_generic), | |
65 # trackDataURL=dataURL, | |
66 # trackType='bigBed', | |
67 # visibility='dense', | |
68 # thickDrawItem='on', | |
69 # priority=self.priority, | |
70 # ) | |
71 # | |
72 # # Return the BigBed track | |
73 # self.track = Track( | |
74 # trackFile=myBigBedFilePath, | |
75 # trackDb=trackDb, | |
76 # ) | |
77 | |
78 print("- Bed %s created" % self.name_bed_generic) | |
79 if (len(modified) != 0): | |
80 print("The lines that were removed: " + str(len(modified))) | |
81 #print("- %s created in %s" % (trackName, myBigBedFilePath)) | |
82 | |
83 # TODO: bed verifier, check if there are invalid strands: "." | |
84 def _checkAndFixBed(self): | |
85 """ | |
86 Call _checkAndFixBed, check the integrity of bed file, | |
87 if the strand is not "+" or "-" truncate that line and report to users | |
88 create column and move the score column to | |
89 """ | |
90 # Store the lines that have been removed | |
91 removedLines = [] | |
92 # Remove the lines with invalid strand | |
93 temp_bed = tempfile.NamedTemporaryFile(bufsize=0, suffix=".bed", delete=False) | |
94 with open(temp_bed.name, 'w') as tmp: | |
95 with open(self.inputBedGeneric, 'r') as f: | |
96 lines = f.readlines() | |
97 for line in lines: | |
98 fields = line.split() | |
99 strand = fields[5] | |
100 score = fields[4] | |
101 fields[4] = '1000' | |
102 fields.append(score) | |
103 if (strand == '+' or strand == '-'): | |
104 tmp.write('\t'.join(map(str, fields))) | |
105 tmp.write("\n") | |
106 else: | |
107 removedLines.append(line) | |
108 self.inputBedGeneric = temp_bed.name | |
109 | |
110 return removedLines |