annotate gff2Togff3.py @ 21:211c9b3a5c15 draft

planemo upload commit 6e3286c6569d531846474dcd6959378af0317ce3-dirty
author yating-l
date Fri, 12 Aug 2016 18:26:59 -0400
parents a7f57cf408e8
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
21
211c9b3a5c15 planemo upload commit 6e3286c6569d531846474dcd6959378af0317ce3-dirty
yating-l
parents: 19
diff changeset
1 import argparse
211c9b3a5c15 planemo upload commit 6e3286c6569d531846474dcd6959378af0317ce3-dirty
yating-l
parents: 19
diff changeset
2 import sys
211c9b3a5c15 planemo upload commit 6e3286c6569d531846474dcd6959378af0317ce3-dirty
yating-l
parents: 19
diff changeset
3 import fileinput
19
a7f57cf408e8 planemo upload commit ff6c810cf2b46e59b45738700e68431743e4b83d
yating-l
parents:
diff changeset
4 from Group import Group
a7f57cf408e8 planemo upload commit ff6c810cf2b46e59b45738700e68431743e4b83d
yating-l
parents:
diff changeset
5
21
211c9b3a5c15 planemo upload commit 6e3286c6569d531846474dcd6959378af0317ce3-dirty
yating-l
parents: 19
diff changeset
6 def main():
211c9b3a5c15 planemo upload commit 6e3286c6569d531846474dcd6959378af0317ce3-dirty
yating-l
parents: 19
diff changeset
7 parser = argparse.ArgumentParser(description='Get a gff file and the output gff3 file')
211c9b3a5c15 planemo upload commit 6e3286c6569d531846474dcd6959378af0317ce3-dirty
yating-l
parents: 19
diff changeset
8 parser.add_argument('--input', help='input gff file')
211c9b3a5c15 planemo upload commit 6e3286c6569d531846474dcd6959378af0317ce3-dirty
yating-l
parents: 19
diff changeset
9 parser.add_argument('--output', help='output gff3 file', required=True)
211c9b3a5c15 planemo upload commit 6e3286c6569d531846474dcd6959378af0317ce3-dirty
yating-l
parents: 19
diff changeset
10 args = parser.parse_args()
211c9b3a5c15 planemo upload commit 6e3286c6569d531846474dcd6959378af0317ce3-dirty
yating-l
parents: 19
diff changeset
11 input = args.input
211c9b3a5c15 planemo upload commit 6e3286c6569d531846474dcd6959378af0317ce3-dirty
yating-l
parents: 19
diff changeset
12 output = args.output
211c9b3a5c15 planemo upload commit 6e3286c6569d531846474dcd6959378af0317ce3-dirty
yating-l
parents: 19
diff changeset
13 if not sys.stdin.isatty():
211c9b3a5c15 planemo upload commit 6e3286c6569d531846474dcd6959378af0317ce3-dirty
yating-l
parents: 19
diff changeset
14 c = Convertor(sys.stdin, output)
211c9b3a5c15 planemo upload commit 6e3286c6569d531846474dcd6959378af0317ce3-dirty
yating-l
parents: 19
diff changeset
15 else:
211c9b3a5c15 planemo upload commit 6e3286c6569d531846474dcd6959378af0317ce3-dirty
yating-l
parents: 19
diff changeset
16 c = Convertor(input, output)
211c9b3a5c15 planemo upload commit 6e3286c6569d531846474dcd6959378af0317ce3-dirty
yating-l
parents: 19
diff changeset
17 c.convert()
211c9b3a5c15 planemo upload commit 6e3286c6569d531846474dcd6959378af0317ce3-dirty
yating-l
parents: 19
diff changeset
18
211c9b3a5c15 planemo upload commit 6e3286c6569d531846474dcd6959378af0317ce3-dirty
yating-l
parents: 19
diff changeset
19 class Convertor:
19
a7f57cf408e8 planemo upload commit ff6c810cf2b46e59b45738700e68431743e4b83d
yating-l
parents:
diff changeset
20 def __init__(self, input, output):
21
211c9b3a5c15 planemo upload commit 6e3286c6569d531846474dcd6959378af0317ce3-dirty
yating-l
parents: 19
diff changeset
21 if type(input) is str:
211c9b3a5c15 planemo upload commit 6e3286c6569d531846474dcd6959378af0317ce3-dirty
yating-l
parents: 19
diff changeset
22 with open(input) as self.f:
211c9b3a5c15 planemo upload commit 6e3286c6569d531846474dcd6959378af0317ce3-dirty
yating-l
parents: 19
diff changeset
23 self.li = [line.rstrip().split("\t") for line in self.f]
211c9b3a5c15 planemo upload commit 6e3286c6569d531846474dcd6959378af0317ce3-dirty
yating-l
parents: 19
diff changeset
24 else:
211c9b3a5c15 planemo upload commit 6e3286c6569d531846474dcd6959378af0317ce3-dirty
yating-l
parents: 19
diff changeset
25 self.li = [line.rstrip().split("\t") for line in input]
19
a7f57cf408e8 planemo upload commit ff6c810cf2b46e59b45738700e68431743e4b83d
yating-l
parents:
diff changeset
26 self.gff3 = open(output, "w")
a7f57cf408e8 planemo upload commit ff6c810cf2b46e59b45738700e68431743e4b83d
yating-l
parents:
diff changeset
27 self.gff3.write("##gff-version 3\n")
a7f57cf408e8 planemo upload commit ff6c810cf2b46e59b45738700e68431743e4b83d
yating-l
parents:
diff changeset
28
a7f57cf408e8 planemo upload commit ff6c810cf2b46e59b45738700e68431743e4b83d
yating-l
parents:
diff changeset
29 def convert(self):
a7f57cf408e8 planemo upload commit ff6c810cf2b46e59b45738700e68431743e4b83d
yating-l
parents:
diff changeset
30 index = 0
a7f57cf408e8 planemo upload commit ff6c810cf2b46e59b45738700e68431743e4b83d
yating-l
parents:
diff changeset
31 while index in range(0, len(self.li)):
a7f57cf408e8 planemo upload commit ff6c810cf2b46e59b45738700e68431743e4b83d
yating-l
parents:
diff changeset
32 index = self.groupAsgene(index)
a7f57cf408e8 planemo upload commit ff6c810cf2b46e59b45738700e68431743e4b83d
yating-l
parents:
diff changeset
33 self.gff3.close()
a7f57cf408e8 planemo upload commit ff6c810cf2b46e59b45738700e68431743e4b83d
yating-l
parents:
diff changeset
34
a7f57cf408e8 planemo upload commit ff6c810cf2b46e59b45738700e68431743e4b83d
yating-l
parents:
diff changeset
35
a7f57cf408e8 planemo upload commit ff6c810cf2b46e59b45738700e68431743e4b83d
yating-l
parents:
diff changeset
36 def groupAsgene(self, start = 0):
a7f57cf408e8 planemo upload commit ff6c810cf2b46e59b45738700e68431743e4b83d
yating-l
parents:
diff changeset
37 gene = self.li[start][8]
a7f57cf408e8 planemo upload commit ff6c810cf2b46e59b45738700e68431743e4b83d
yating-l
parents:
diff changeset
38 index = len(self.li)
a7f57cf408e8 planemo upload commit ff6c810cf2b46e59b45738700e68431743e4b83d
yating-l
parents:
diff changeset
39 for i in range(start+1, len(self.li)):
a7f57cf408e8 planemo upload commit ff6c810cf2b46e59b45738700e68431743e4b83d
yating-l
parents:
diff changeset
40 line = self.li[i]
a7f57cf408e8 planemo upload commit ff6c810cf2b46e59b45738700e68431743e4b83d
yating-l
parents:
diff changeset
41 if gene != line[8]:
a7f57cf408e8 planemo upload commit ff6c810cf2b46e59b45738700e68431743e4b83d
yating-l
parents:
diff changeset
42 index = i
a7f57cf408e8 planemo upload commit ff6c810cf2b46e59b45738700e68431743e4b83d
yating-l
parents:
diff changeset
43 break
a7f57cf408e8 planemo upload commit ff6c810cf2b46e59b45738700e68431743e4b83d
yating-l
parents:
diff changeset
44 if index >= len(self.li):
a7f57cf408e8 planemo upload commit ff6c810cf2b46e59b45738700e68431743e4b83d
yating-l
parents:
diff changeset
45 group = self.li[start:len(self.li)]
a7f57cf408e8 planemo upload commit ff6c810cf2b46e59b45738700e68431743e4b83d
yating-l
parents:
diff changeset
46 else:
a7f57cf408e8 planemo upload commit ff6c810cf2b46e59b45738700e68431743e4b83d
yating-l
parents:
diff changeset
47 group = self.li[start:index]
a7f57cf408e8 planemo upload commit ff6c810cf2b46e59b45738700e68431743e4b83d
yating-l
parents:
diff changeset
48 g = Group(group)
a7f57cf408e8 planemo upload commit ff6c810cf2b46e59b45738700e68431743e4b83d
yating-l
parents:
diff changeset
49 g.writer(self.gff3)
a7f57cf408e8 planemo upload commit ff6c810cf2b46e59b45738700e68431743e4b83d
yating-l
parents:
diff changeset
50 return index
a7f57cf408e8 planemo upload commit ff6c810cf2b46e59b45738700e68431743e4b83d
yating-l
parents:
diff changeset
51
a7f57cf408e8 planemo upload commit ff6c810cf2b46e59b45738700e68431743e4b83d
yating-l
parents:
diff changeset
52
a7f57cf408e8 planemo upload commit ff6c810cf2b46e59b45738700e68431743e4b83d
yating-l
parents:
diff changeset
53
a7f57cf408e8 planemo upload commit ff6c810cf2b46e59b45738700e68431743e4b83d
yating-l
parents:
diff changeset
54
a7f57cf408e8 planemo upload commit ff6c810cf2b46e59b45738700e68431743e4b83d
yating-l
parents:
diff changeset
55 if __name__ == "__main__":
21
211c9b3a5c15 planemo upload commit 6e3286c6569d531846474dcd6959378af0317ce3-dirty
yating-l
parents: 19
diff changeset
56 main()
19
a7f57cf408e8 planemo upload commit ff6c810cf2b46e59b45738700e68431743e4b83d
yating-l
parents:
diff changeset
57
a7f57cf408e8 planemo upload commit ff6c810cf2b46e59b45738700e68431743e4b83d
yating-l
parents:
diff changeset
58
a7f57cf408e8 planemo upload commit ff6c810cf2b46e59b45738700e68431743e4b83d
yating-l
parents:
diff changeset
59