Mercurial > repos > devteam > vcf_extract
comparison bedClass.py @ 0:c6fb674dfda3 draft default tip
Imported from capsule None
author | devteam |
---|---|
date | Thu, 23 Jan 2014 12:31:12 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:c6fb674dfda3 |
---|---|
1 #!/usr/bin/python | |
2 | |
3 import os.path | |
4 import sys | |
5 | |
6 class bed: | |
7 def __init__(self): | |
8 self.numberTargets = 0 | |
9 self.referenceSequences = {} | |
10 self.referenceSequenceList = [] | |
11 | |
12 def openBed(self, filename): | |
13 if filename == "stdin": self.filehandle = sys.stdin | |
14 else: | |
15 try: self.filehandle = open(filename,"r") | |
16 except IOError: | |
17 print >> sys.stderr, "Failed to find file: ",filename | |
18 exit(1) | |
19 | |
20 # Get a bed record. | |
21 def getRecord(self): | |
22 self.record = self.filehandle.readline() | |
23 if not self.record: return False | |
24 | |
25 self.numberTargets = self.numberTargets + 1 | |
26 self.ref = "" | |
27 self.start = 0 | |
28 self.end = 0 | |
29 | |
30 # bed file should be 0-based, half-open, so the start coordinate | |
31 # must be that in the bed file plus one. | |
32 entries = self.record.rstrip("\n").split("\t") | |
33 self.referenceSequence = entries[0] | |
34 | |
35 # Add the reference sequence to the dictionary. If it didn't previously | |
36 # exist append the reference sequence to the end of the list as well. | |
37 # This ensures that the order in which the reference sequences appeared | |
38 # in the header can be preserved. | |
39 if self.referenceSequence not in self.referenceSequences: | |
40 self.referenceSequences[self.referenceSequence] = True | |
41 self.referenceSequenceList.append(self.referenceSequence) | |
42 | |
43 try: self.start = int(entries[1]) + 1 | |
44 except: | |
45 text = "start position need is not an integer" | |
46 self.generalError(text, "start", entries[1]) | |
47 | |
48 try: self.end = int(entries[2]) | |
49 except: | |
50 text = "end position need is not an integer" | |
51 self.generalError(text, "end", entries[2]) | |
52 | |
53 # Check that the record is a valid interval. | |
54 if self.end - self.start < 0: | |
55 print >> sys.stderr, "Invalid target interval:\n\t", self.record | |
56 exit(1) | |
57 | |
58 return True | |
59 | |
60 # Parse through the bed file until the correct reference sequence is | |
61 # encountered and the end position is greater than or equal to that requested. | |
62 def parseBed(self, referenceSequence, position): | |
63 success = True | |
64 if self.referenceSequence != referenceSequence: | |
65 while self.referenceSequence != referenceSequence and success: success = self.getRecord() | |
66 | |
67 while self.referenceSequence == referenceSequence and self.end < position and success: success = self.getRecord() | |
68 | |
69 return success | |
70 | |
71 # Close the bed file. | |
72 def closeBed(self, filename): | |
73 self.filehandle.close() | |
74 | |
75 # Define error messages for different handled errors. | |
76 def generalError(self, text, field, fieldValue): | |
77 print >> sys.stderr, "\nError encountered when attempting to read:" | |
78 if field != "": print >> sys.stderr, "\t", field, ": ", fieldValue | |
79 print >> sys.stderr, "\n", text | |
80 exit(1) |