annotate bedClass.py @ 0:f5d5eed73180 draft default tip

Imported from capsule None
author devteam
date Thu, 23 Jan 2014 12:31:34 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
f5d5eed73180 Imported from capsule None
devteam
parents:
diff changeset
1 #!/usr/bin/python
f5d5eed73180 Imported from capsule None
devteam
parents:
diff changeset
2
f5d5eed73180 Imported from capsule None
devteam
parents:
diff changeset
3 import os.path
f5d5eed73180 Imported from capsule None
devteam
parents:
diff changeset
4 import sys
f5d5eed73180 Imported from capsule None
devteam
parents:
diff changeset
5
f5d5eed73180 Imported from capsule None
devteam
parents:
diff changeset
6 class bed:
f5d5eed73180 Imported from capsule None
devteam
parents:
diff changeset
7 def __init__(self):
f5d5eed73180 Imported from capsule None
devteam
parents:
diff changeset
8 self.numberTargets = 0
f5d5eed73180 Imported from capsule None
devteam
parents:
diff changeset
9 self.referenceSequences = {}
f5d5eed73180 Imported from capsule None
devteam
parents:
diff changeset
10 self.referenceSequenceList = []
f5d5eed73180 Imported from capsule None
devteam
parents:
diff changeset
11
f5d5eed73180 Imported from capsule None
devteam
parents:
diff changeset
12 def openBed(self, filename):
f5d5eed73180 Imported from capsule None
devteam
parents:
diff changeset
13 if filename == "stdin": self.filehandle = sys.stdin
f5d5eed73180 Imported from capsule None
devteam
parents:
diff changeset
14 else:
f5d5eed73180 Imported from capsule None
devteam
parents:
diff changeset
15 try: self.filehandle = open(filename,"r")
f5d5eed73180 Imported from capsule None
devteam
parents:
diff changeset
16 except IOError:
f5d5eed73180 Imported from capsule None
devteam
parents:
diff changeset
17 print >> sys.stderr, "Failed to find file: ",filename
f5d5eed73180 Imported from capsule None
devteam
parents:
diff changeset
18 exit(1)
f5d5eed73180 Imported from capsule None
devteam
parents:
diff changeset
19
f5d5eed73180 Imported from capsule None
devteam
parents:
diff changeset
20 # Get a bed record.
f5d5eed73180 Imported from capsule None
devteam
parents:
diff changeset
21 def getRecord(self):
f5d5eed73180 Imported from capsule None
devteam
parents:
diff changeset
22 self.record = self.filehandle.readline()
f5d5eed73180 Imported from capsule None
devteam
parents:
diff changeset
23 if not self.record: return False
f5d5eed73180 Imported from capsule None
devteam
parents:
diff changeset
24
f5d5eed73180 Imported from capsule None
devteam
parents:
diff changeset
25 self.numberTargets = self.numberTargets + 1
f5d5eed73180 Imported from capsule None
devteam
parents:
diff changeset
26 self.ref = ""
f5d5eed73180 Imported from capsule None
devteam
parents:
diff changeset
27 self.start = 0
f5d5eed73180 Imported from capsule None
devteam
parents:
diff changeset
28 self.end = 0
f5d5eed73180 Imported from capsule None
devteam
parents:
diff changeset
29
f5d5eed73180 Imported from capsule None
devteam
parents:
diff changeset
30 # bed file should be 0-based, half-open, so the start coordinate
f5d5eed73180 Imported from capsule None
devteam
parents:
diff changeset
31 # must be that in the bed file plus one.
f5d5eed73180 Imported from capsule None
devteam
parents:
diff changeset
32 entries = self.record.rstrip("\n").split("\t")
f5d5eed73180 Imported from capsule None
devteam
parents:
diff changeset
33 self.referenceSequence = entries[0]
f5d5eed73180 Imported from capsule None
devteam
parents:
diff changeset
34
f5d5eed73180 Imported from capsule None
devteam
parents:
diff changeset
35 # Add the reference sequence to the dictionary. If it didn't previously
f5d5eed73180 Imported from capsule None
devteam
parents:
diff changeset
36 # exist append the reference sequence to the end of the list as well.
f5d5eed73180 Imported from capsule None
devteam
parents:
diff changeset
37 # This ensures that the order in which the reference sequences appeared
f5d5eed73180 Imported from capsule None
devteam
parents:
diff changeset
38 # in the header can be preserved.
f5d5eed73180 Imported from capsule None
devteam
parents:
diff changeset
39 if self.referenceSequence not in self.referenceSequences:
f5d5eed73180 Imported from capsule None
devteam
parents:
diff changeset
40 self.referenceSequences[self.referenceSequence] = True
f5d5eed73180 Imported from capsule None
devteam
parents:
diff changeset
41 self.referenceSequenceList.append(self.referenceSequence)
f5d5eed73180 Imported from capsule None
devteam
parents:
diff changeset
42
f5d5eed73180 Imported from capsule None
devteam
parents:
diff changeset
43 try: self.start = int(entries[1]) + 1
f5d5eed73180 Imported from capsule None
devteam
parents:
diff changeset
44 except:
f5d5eed73180 Imported from capsule None
devteam
parents:
diff changeset
45 text = "start position need is not an integer"
f5d5eed73180 Imported from capsule None
devteam
parents:
diff changeset
46 self.generalError(text, "start", entries[1])
f5d5eed73180 Imported from capsule None
devteam
parents:
diff changeset
47
f5d5eed73180 Imported from capsule None
devteam
parents:
diff changeset
48 try: self.end = int(entries[2])
f5d5eed73180 Imported from capsule None
devteam
parents:
diff changeset
49 except:
f5d5eed73180 Imported from capsule None
devteam
parents:
diff changeset
50 text = "end position need is not an integer"
f5d5eed73180 Imported from capsule None
devteam
parents:
diff changeset
51 self.generalError(text, "end", entries[2])
f5d5eed73180 Imported from capsule None
devteam
parents:
diff changeset
52
f5d5eed73180 Imported from capsule None
devteam
parents:
diff changeset
53 # Check that the record is a valid interval.
f5d5eed73180 Imported from capsule None
devteam
parents:
diff changeset
54 if self.end - self.start < 0:
f5d5eed73180 Imported from capsule None
devteam
parents:
diff changeset
55 print >> sys.stderr, "Invalid target interval:\n\t", self.record
f5d5eed73180 Imported from capsule None
devteam
parents:
diff changeset
56 exit(1)
f5d5eed73180 Imported from capsule None
devteam
parents:
diff changeset
57
f5d5eed73180 Imported from capsule None
devteam
parents:
diff changeset
58 return True
f5d5eed73180 Imported from capsule None
devteam
parents:
diff changeset
59
f5d5eed73180 Imported from capsule None
devteam
parents:
diff changeset
60 # Parse through the bed file until the correct reference sequence is
f5d5eed73180 Imported from capsule None
devteam
parents:
diff changeset
61 # encountered and the end position is greater than or equal to that requested.
f5d5eed73180 Imported from capsule None
devteam
parents:
diff changeset
62 def parseBed(self, referenceSequence, position):
f5d5eed73180 Imported from capsule None
devteam
parents:
diff changeset
63 success = True
f5d5eed73180 Imported from capsule None
devteam
parents:
diff changeset
64 if self.referenceSequence != referenceSequence:
f5d5eed73180 Imported from capsule None
devteam
parents:
diff changeset
65 while self.referenceSequence != referenceSequence and success: success = self.getRecord()
f5d5eed73180 Imported from capsule None
devteam
parents:
diff changeset
66
f5d5eed73180 Imported from capsule None
devteam
parents:
diff changeset
67 while self.referenceSequence == referenceSequence and self.end < position and success: success = self.getRecord()
f5d5eed73180 Imported from capsule None
devteam
parents:
diff changeset
68
f5d5eed73180 Imported from capsule None
devteam
parents:
diff changeset
69 return success
f5d5eed73180 Imported from capsule None
devteam
parents:
diff changeset
70
f5d5eed73180 Imported from capsule None
devteam
parents:
diff changeset
71 # Close the bed file.
f5d5eed73180 Imported from capsule None
devteam
parents:
diff changeset
72 def closeBed(self, filename):
f5d5eed73180 Imported from capsule None
devteam
parents:
diff changeset
73 self.filehandle.close()
f5d5eed73180 Imported from capsule None
devteam
parents:
diff changeset
74
f5d5eed73180 Imported from capsule None
devteam
parents:
diff changeset
75 # Define error messages for different handled errors.
f5d5eed73180 Imported from capsule None
devteam
parents:
diff changeset
76 def generalError(self, text, field, fieldValue):
f5d5eed73180 Imported from capsule None
devteam
parents:
diff changeset
77 print >> sys.stderr, "\nError encountered when attempting to read:"
f5d5eed73180 Imported from capsule None
devteam
parents:
diff changeset
78 if field != "": print >> sys.stderr, "\t", field, ": ", fieldValue
f5d5eed73180 Imported from capsule None
devteam
parents:
diff changeset
79 print >> sys.stderr, "\n", text
f5d5eed73180 Imported from capsule None
devteam
parents:
diff changeset
80 exit(1)