annotate bedToGff3.py @ 15:671231da45f9 draft

planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 594c5fa1e3cdb378ecf6b5490ec2cbac5fa8a61e-dirty
author yating-l
date Wed, 05 Jul 2017 15:47:15 -0400
parents 8d1cf7ce65cd
children 466d52f83079
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
8d1cf7ce65cd planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit d583ac16a6c6942730ea536eb59cc37941816030-dirty
yating-l
parents:
diff changeset
1 #!/usr/bin/env python
8d1cf7ce65cd planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit d583ac16a6c6942730ea536eb59cc37941816030-dirty
yating-l
parents:
diff changeset
2
8d1cf7ce65cd planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit d583ac16a6c6942730ea536eb59cc37941816030-dirty
yating-l
parents:
diff changeset
3 '''
8d1cf7ce65cd planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit d583ac16a6c6942730ea536eb59cc37941816030-dirty
yating-l
parents:
diff changeset
4 Convert BED format to gff3
15
671231da45f9 planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 594c5fa1e3cdb378ecf6b5490ec2cbac5fa8a61e-dirty
yating-l
parents: 0
diff changeset
5 reference for gff3: https://github.com/The-Sequence-Ontology/Specifications/blob/master/gff3.md
0
8d1cf7ce65cd planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit d583ac16a6c6942730ea536eb59cc37941816030-dirty
yating-l
parents:
diff changeset
6 '''
8d1cf7ce65cd planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit d583ac16a6c6942730ea536eb59cc37941816030-dirty
yating-l
parents:
diff changeset
7 import os
8d1cf7ce65cd planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit d583ac16a6c6942730ea536eb59cc37941816030-dirty
yating-l
parents:
diff changeset
8 from collections import OrderedDict
8d1cf7ce65cd planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit d583ac16a6c6942730ea536eb59cc37941816030-dirty
yating-l
parents:
diff changeset
9 import utils
8d1cf7ce65cd planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit d583ac16a6c6942730ea536eb59cc37941816030-dirty
yating-l
parents:
diff changeset
10
8d1cf7ce65cd planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit d583ac16a6c6942730ea536eb59cc37941816030-dirty
yating-l
parents:
diff changeset
11 class bedToGff3():
8d1cf7ce65cd planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit d583ac16a6c6942730ea536eb59cc37941816030-dirty
yating-l
parents:
diff changeset
12 def __init__(self, inputBedFile, chrom_sizes, bed_type, output):
8d1cf7ce65cd planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit d583ac16a6c6942730ea536eb59cc37941816030-dirty
yating-l
parents:
diff changeset
13 self.input = inputBedFile
8d1cf7ce65cd planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit d583ac16a6c6942730ea536eb59cc37941816030-dirty
yating-l
parents:
diff changeset
14 #file_dir = os.path.basename(inputBedFile)
8d1cf7ce65cd planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit d583ac16a6c6942730ea536eb59cc37941816030-dirty
yating-l
parents:
diff changeset
15 #print file_dir + "\n\n"
8d1cf7ce65cd planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit d583ac16a6c6942730ea536eb59cc37941816030-dirty
yating-l
parents:
diff changeset
16 self.output = output
8d1cf7ce65cd planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit d583ac16a6c6942730ea536eb59cc37941816030-dirty
yating-l
parents:
diff changeset
17 self.chrom_sizes = chrom_sizes
8d1cf7ce65cd planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit d583ac16a6c6942730ea536eb59cc37941816030-dirty
yating-l
parents:
diff changeset
18 self.type = bed_type
8d1cf7ce65cd planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit d583ac16a6c6942730ea536eb59cc37941816030-dirty
yating-l
parents:
diff changeset
19 if self.type == "trfbig":
8d1cf7ce65cd planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit d583ac16a6c6942730ea536eb59cc37941816030-dirty
yating-l
parents:
diff changeset
20 self.trfbig_to_gff3()
8d1cf7ce65cd planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit d583ac16a6c6942730ea536eb59cc37941816030-dirty
yating-l
parents:
diff changeset
21 if self.type == "regtools":
8d1cf7ce65cd planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit d583ac16a6c6942730ea536eb59cc37941816030-dirty
yating-l
parents:
diff changeset
22 self.splicejunctions_to_gff3()
15
671231da45f9 planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 594c5fa1e3cdb378ecf6b5490ec2cbac5fa8a61e-dirty
yating-l
parents: 0
diff changeset
23 if self.type == "blat":
671231da45f9 planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 594c5fa1e3cdb378ecf6b5490ec2cbac5fa8a61e-dirty
yating-l
parents: 0
diff changeset
24 self.bigpsl_to_gff3()
0
8d1cf7ce65cd planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit d583ac16a6c6942730ea536eb59cc37941816030-dirty
yating-l
parents:
diff changeset
25
8d1cf7ce65cd planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit d583ac16a6c6942730ea536eb59cc37941816030-dirty
yating-l
parents:
diff changeset
26 def trfbig_to_gff3(self):
8d1cf7ce65cd planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit d583ac16a6c6942730ea536eb59cc37941816030-dirty
yating-l
parents:
diff changeset
27 gff3 = open(self.output, 'w')
8d1cf7ce65cd planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit d583ac16a6c6942730ea536eb59cc37941816030-dirty
yating-l
parents:
diff changeset
28 gff3.write("##gff-version 3\n")
8d1cf7ce65cd planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit d583ac16a6c6942730ea536eb59cc37941816030-dirty
yating-l
parents:
diff changeset
29 sizes_dict = utils.sequence_region(self.chrom_sizes)
8d1cf7ce65cd planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit d583ac16a6c6942730ea536eb59cc37941816030-dirty
yating-l
parents:
diff changeset
30 seq_regions = dict()
8d1cf7ce65cd planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit d583ac16a6c6942730ea536eb59cc37941816030-dirty
yating-l
parents:
diff changeset
31 with open(self.input, 'r') as bed:
8d1cf7ce65cd planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit d583ac16a6c6942730ea536eb59cc37941816030-dirty
yating-l
parents:
diff changeset
32 for line in bed:
8d1cf7ce65cd planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit d583ac16a6c6942730ea536eb59cc37941816030-dirty
yating-l
parents:
diff changeset
33 field = OrderedDict()
8d1cf7ce65cd planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit d583ac16a6c6942730ea536eb59cc37941816030-dirty
yating-l
parents:
diff changeset
34 attribute = OrderedDict()
8d1cf7ce65cd planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit d583ac16a6c6942730ea536eb59cc37941816030-dirty
yating-l
parents:
diff changeset
35 li = line.rstrip().split("\t")
8d1cf7ce65cd planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit d583ac16a6c6942730ea536eb59cc37941816030-dirty
yating-l
parents:
diff changeset
36 field['seqid'] = li[0]
8d1cf7ce65cd planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit d583ac16a6c6942730ea536eb59cc37941816030-dirty
yating-l
parents:
diff changeset
37 if field['seqid'] not in seq_regions:
8d1cf7ce65cd planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit d583ac16a6c6942730ea536eb59cc37941816030-dirty
yating-l
parents:
diff changeset
38 end_region = sizes_dict[field['seqid']]
8d1cf7ce65cd planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit d583ac16a6c6942730ea536eb59cc37941816030-dirty
yating-l
parents:
diff changeset
39 gff3.write("##sequence-region " + field['seqid'] + ' 1 ' + str(end_region) + '\n')
8d1cf7ce65cd planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit d583ac16a6c6942730ea536eb59cc37941816030-dirty
yating-l
parents:
diff changeset
40 seq_regions[field['seqid']] = end_region
8d1cf7ce65cd planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit d583ac16a6c6942730ea536eb59cc37941816030-dirty
yating-l
parents:
diff changeset
41 field['source'] = li[3]
8d1cf7ce65cd planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit d583ac16a6c6942730ea536eb59cc37941816030-dirty
yating-l
parents:
diff changeset
42 field['type'] = 'tandem_repeat'
8d1cf7ce65cd planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit d583ac16a6c6942730ea536eb59cc37941816030-dirty
yating-l
parents:
diff changeset
43 # The first base in a chromosome is numbered 0 in BED format
8d1cf7ce65cd planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit d583ac16a6c6942730ea536eb59cc37941816030-dirty
yating-l
parents:
diff changeset
44 field['start'] = str(int(li[1]) + 1)
8d1cf7ce65cd planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit d583ac16a6c6942730ea536eb59cc37941816030-dirty
yating-l
parents:
diff changeset
45 field['end'] = li[2]
8d1cf7ce65cd planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit d583ac16a6c6942730ea536eb59cc37941816030-dirty
yating-l
parents:
diff changeset
46 field['score'] = li[9]
8d1cf7ce65cd planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit d583ac16a6c6942730ea536eb59cc37941816030-dirty
yating-l
parents:
diff changeset
47 field['strand'] = '+'
8d1cf7ce65cd planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit d583ac16a6c6942730ea536eb59cc37941816030-dirty
yating-l
parents:
diff changeset
48 field['phase'] = '.'
8d1cf7ce65cd planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit d583ac16a6c6942730ea536eb59cc37941816030-dirty
yating-l
parents:
diff changeset
49 attribute['length of repeat unit'] = li[4]
8d1cf7ce65cd planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit d583ac16a6c6942730ea536eb59cc37941816030-dirty
yating-l
parents:
diff changeset
50 attribute['mean number of copies of repeat'] = li[5]
8d1cf7ce65cd planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit d583ac16a6c6942730ea536eb59cc37941816030-dirty
yating-l
parents:
diff changeset
51 attribute['length of consensus sequence'] = li[6]
8d1cf7ce65cd planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit d583ac16a6c6942730ea536eb59cc37941816030-dirty
yating-l
parents:
diff changeset
52 attribute['percentage match'] = li[7]
8d1cf7ce65cd planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit d583ac16a6c6942730ea536eb59cc37941816030-dirty
yating-l
parents:
diff changeset
53 attribute['percentage indel'] = li[8]
8d1cf7ce65cd planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit d583ac16a6c6942730ea536eb59cc37941816030-dirty
yating-l
parents:
diff changeset
54 attribute['percent of a\'s in repeat unit'] = li[10]
8d1cf7ce65cd planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit d583ac16a6c6942730ea536eb59cc37941816030-dirty
yating-l
parents:
diff changeset
55 attribute['percent of c\'s in repeat unit'] = li[11]
8d1cf7ce65cd planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit d583ac16a6c6942730ea536eb59cc37941816030-dirty
yating-l
parents:
diff changeset
56 attribute['percent of g\'s in repeat unit'] = li[12]
8d1cf7ce65cd planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit d583ac16a6c6942730ea536eb59cc37941816030-dirty
yating-l
parents:
diff changeset
57 attribute['percent of t\'s in repeat unit'] = li[13]
8d1cf7ce65cd planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit d583ac16a6c6942730ea536eb59cc37941816030-dirty
yating-l
parents:
diff changeset
58 attribute['entropy'] = li[14]
8d1cf7ce65cd planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit d583ac16a6c6942730ea536eb59cc37941816030-dirty
yating-l
parents:
diff changeset
59 attribute['sequence of repeat unit element'] = li[15]
8d1cf7ce65cd planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit d583ac16a6c6942730ea536eb59cc37941816030-dirty
yating-l
parents:
diff changeset
60 utils.write_features(field, attribute, gff3)
8d1cf7ce65cd planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit d583ac16a6c6942730ea536eb59cc37941816030-dirty
yating-l
parents:
diff changeset
61 gff3.close()
8d1cf7ce65cd planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit d583ac16a6c6942730ea536eb59cc37941816030-dirty
yating-l
parents:
diff changeset
62
8d1cf7ce65cd planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit d583ac16a6c6942730ea536eb59cc37941816030-dirty
yating-l
parents:
diff changeset
63
8d1cf7ce65cd planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit d583ac16a6c6942730ea536eb59cc37941816030-dirty
yating-l
parents:
diff changeset
64 def splicejunctions_to_gff3(self):
8d1cf7ce65cd planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit d583ac16a6c6942730ea536eb59cc37941816030-dirty
yating-l
parents:
diff changeset
65 gff3 = open(self.output, 'w')
8d1cf7ce65cd planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit d583ac16a6c6942730ea536eb59cc37941816030-dirty
yating-l
parents:
diff changeset
66 gff3.write("##gff-version 3\n")
8d1cf7ce65cd planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit d583ac16a6c6942730ea536eb59cc37941816030-dirty
yating-l
parents:
diff changeset
67 sizes_dict = utils.sequence_region(self.chrom_sizes)
8d1cf7ce65cd planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit d583ac16a6c6942730ea536eb59cc37941816030-dirty
yating-l
parents:
diff changeset
68 seq_regions = dict()
8d1cf7ce65cd planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit d583ac16a6c6942730ea536eb59cc37941816030-dirty
yating-l
parents:
diff changeset
69 with open(self.input, 'r') as bed:
8d1cf7ce65cd planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit d583ac16a6c6942730ea536eb59cc37941816030-dirty
yating-l
parents:
diff changeset
70 for line in bed:
8d1cf7ce65cd planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit d583ac16a6c6942730ea536eb59cc37941816030-dirty
yating-l
parents:
diff changeset
71 field = OrderedDict()
8d1cf7ce65cd planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit d583ac16a6c6942730ea536eb59cc37941816030-dirty
yating-l
parents:
diff changeset
72 attribute = OrderedDict()
8d1cf7ce65cd planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit d583ac16a6c6942730ea536eb59cc37941816030-dirty
yating-l
parents:
diff changeset
73 li = line.rstrip().split("\t")
8d1cf7ce65cd planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit d583ac16a6c6942730ea536eb59cc37941816030-dirty
yating-l
parents:
diff changeset
74 field['seqid'] = li[0]
8d1cf7ce65cd planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit d583ac16a6c6942730ea536eb59cc37941816030-dirty
yating-l
parents:
diff changeset
75 if field['seqid'] not in seq_regions:
8d1cf7ce65cd planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit d583ac16a6c6942730ea536eb59cc37941816030-dirty
yating-l
parents:
diff changeset
76 end_region = sizes_dict[field['seqid']]
8d1cf7ce65cd planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit d583ac16a6c6942730ea536eb59cc37941816030-dirty
yating-l
parents:
diff changeset
77 gff3.write("##sequence-region " + field['seqid'] + ' 1 ' + str(end_region) + '\n')
8d1cf7ce65cd planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit d583ac16a6c6942730ea536eb59cc37941816030-dirty
yating-l
parents:
diff changeset
78 seq_regions[field['seqid']] = end_region
8d1cf7ce65cd planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit d583ac16a6c6942730ea536eb59cc37941816030-dirty
yating-l
parents:
diff changeset
79 field['source'] = li[3]
8d1cf7ce65cd planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit d583ac16a6c6942730ea536eb59cc37941816030-dirty
yating-l
parents:
diff changeset
80 field['type'] = 'junction'
8d1cf7ce65cd planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit d583ac16a6c6942730ea536eb59cc37941816030-dirty
yating-l
parents:
diff changeset
81 # The first base in a chromosome is numbered 0 in BED format
8d1cf7ce65cd planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit d583ac16a6c6942730ea536eb59cc37941816030-dirty
yating-l
parents:
diff changeset
82 field['start'] = int(li[1]) + 1
8d1cf7ce65cd planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit d583ac16a6c6942730ea536eb59cc37941816030-dirty
yating-l
parents:
diff changeset
83 field['end'] = li[2]
8d1cf7ce65cd planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit d583ac16a6c6942730ea536eb59cc37941816030-dirty
yating-l
parents:
diff changeset
84 field['score'] = li[12]
8d1cf7ce65cd planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit d583ac16a6c6942730ea536eb59cc37941816030-dirty
yating-l
parents:
diff changeset
85 field['strand'] = li[5]
8d1cf7ce65cd planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit d583ac16a6c6942730ea536eb59cc37941816030-dirty
yating-l
parents:
diff changeset
86 field['phase'] = '.'
15
671231da45f9 planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 594c5fa1e3cdb378ecf6b5490ec2cbac5fa8a61e-dirty
yating-l
parents: 0
diff changeset
87 attribute['ID'] = li[0] + '_' + li[3]
0
8d1cf7ce65cd planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit d583ac16a6c6942730ea536eb59cc37941816030-dirty
yating-l
parents:
diff changeset
88 attribute['Name'] = li[3]
8d1cf7ce65cd planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit d583ac16a6c6942730ea536eb59cc37941816030-dirty
yating-l
parents:
diff changeset
89 attribute['blockcount'] = li[9]
8d1cf7ce65cd planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit d583ac16a6c6942730ea536eb59cc37941816030-dirty
yating-l
parents:
diff changeset
90 attribute['blocksizes'] = li[10]
8d1cf7ce65cd planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit d583ac16a6c6942730ea536eb59cc37941816030-dirty
yating-l
parents:
diff changeset
91 attribute['chromstarts'] = li[11]
8d1cf7ce65cd planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit d583ac16a6c6942730ea536eb59cc37941816030-dirty
yating-l
parents:
diff changeset
92 utils.write_features(field, attribute, gff3)
15
671231da45f9 planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 594c5fa1e3cdb378ecf6b5490ec2cbac5fa8a61e-dirty
yating-l
parents: 0
diff changeset
93 utils.child_blocks(field, attribute, gff3, 'exon_junction')
671231da45f9 planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 594c5fa1e3cdb378ecf6b5490ec2cbac5fa8a61e-dirty
yating-l
parents: 0
diff changeset
94 gff3.close()
671231da45f9 planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 594c5fa1e3cdb378ecf6b5490ec2cbac5fa8a61e-dirty
yating-l
parents: 0
diff changeset
95
671231da45f9 planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 594c5fa1e3cdb378ecf6b5490ec2cbac5fa8a61e-dirty
yating-l
parents: 0
diff changeset
96 def bigpsl_to_gff3(self):
671231da45f9 planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 594c5fa1e3cdb378ecf6b5490ec2cbac5fa8a61e-dirty
yating-l
parents: 0
diff changeset
97 gff3 = open(self.output, 'w')
671231da45f9 planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 594c5fa1e3cdb378ecf6b5490ec2cbac5fa8a61e-dirty
yating-l
parents: 0
diff changeset
98 gff3.write("##gff-version 3\n")
671231da45f9 planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 594c5fa1e3cdb378ecf6b5490ec2cbac5fa8a61e-dirty
yating-l
parents: 0
diff changeset
99 sizes_dict = utils.sequence_region(self.chrom_sizes)
671231da45f9 planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 594c5fa1e3cdb378ecf6b5490ec2cbac5fa8a61e-dirty
yating-l
parents: 0
diff changeset
100 seq_regions = dict()
671231da45f9 planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 594c5fa1e3cdb378ecf6b5490ec2cbac5fa8a61e-dirty
yating-l
parents: 0
diff changeset
101 with open(self.input, 'r') as bed:
671231da45f9 planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 594c5fa1e3cdb378ecf6b5490ec2cbac5fa8a61e-dirty
yating-l
parents: 0
diff changeset
102 for line in bed:
671231da45f9 planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 594c5fa1e3cdb378ecf6b5490ec2cbac5fa8a61e-dirty
yating-l
parents: 0
diff changeset
103 field = OrderedDict()
671231da45f9 planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 594c5fa1e3cdb378ecf6b5490ec2cbac5fa8a61e-dirty
yating-l
parents: 0
diff changeset
104 attribute = OrderedDict()
671231da45f9 planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 594c5fa1e3cdb378ecf6b5490ec2cbac5fa8a61e-dirty
yating-l
parents: 0
diff changeset
105 li = line.rstrip().split("\t")
671231da45f9 planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 594c5fa1e3cdb378ecf6b5490ec2cbac5fa8a61e-dirty
yating-l
parents: 0
diff changeset
106 field['seqid'] = li[0]
671231da45f9 planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 594c5fa1e3cdb378ecf6b5490ec2cbac5fa8a61e-dirty
yating-l
parents: 0
diff changeset
107 if field['seqid'] not in seq_regions:
671231da45f9 planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 594c5fa1e3cdb378ecf6b5490ec2cbac5fa8a61e-dirty
yating-l
parents: 0
diff changeset
108 end_region = sizes_dict[field['seqid']]
671231da45f9 planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 594c5fa1e3cdb378ecf6b5490ec2cbac5fa8a61e-dirty
yating-l
parents: 0
diff changeset
109 gff3.write("##sequence-region " + field['seqid'] + ' 1 ' + str(end_region) + '\n')
671231da45f9 planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 594c5fa1e3cdb378ecf6b5490ec2cbac5fa8a61e-dirty
yating-l
parents: 0
diff changeset
110 seq_regions[field['seqid']] = end_region
671231da45f9 planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 594c5fa1e3cdb378ecf6b5490ec2cbac5fa8a61e-dirty
yating-l
parents: 0
diff changeset
111 field['source'] = 'UCSC BLAT alignment tool'
671231da45f9 planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 594c5fa1e3cdb378ecf6b5490ec2cbac5fa8a61e-dirty
yating-l
parents: 0
diff changeset
112 field['type'] = 'match'
671231da45f9 planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 594c5fa1e3cdb378ecf6b5490ec2cbac5fa8a61e-dirty
yating-l
parents: 0
diff changeset
113 # The first base in a chromosome is numbered 0 in BED format
671231da45f9 planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 594c5fa1e3cdb378ecf6b5490ec2cbac5fa8a61e-dirty
yating-l
parents: 0
diff changeset
114 field['start'] = str(int(li[1]) + 1)
671231da45f9 planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 594c5fa1e3cdb378ecf6b5490ec2cbac5fa8a61e-dirty
yating-l
parents: 0
diff changeset
115 field['end'] = li[2]
671231da45f9 planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 594c5fa1e3cdb378ecf6b5490ec2cbac5fa8a61e-dirty
yating-l
parents: 0
diff changeset
116 field['score'] = li[4]
671231da45f9 planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 594c5fa1e3cdb378ecf6b5490ec2cbac5fa8a61e-dirty
yating-l
parents: 0
diff changeset
117 field['strand'] = li[5]
671231da45f9 planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 594c5fa1e3cdb378ecf6b5490ec2cbac5fa8a61e-dirty
yating-l
parents: 0
diff changeset
118 field['phase'] = '.'
671231da45f9 planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 594c5fa1e3cdb378ecf6b5490ec2cbac5fa8a61e-dirty
yating-l
parents: 0
diff changeset
119 attribute['ID'] = li[0] + '_' + li[3]
671231da45f9 planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 594c5fa1e3cdb378ecf6b5490ec2cbac5fa8a61e-dirty
yating-l
parents: 0
diff changeset
120 attribute['Name'] = li[3]
671231da45f9 planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 594c5fa1e3cdb378ecf6b5490ec2cbac5fa8a61e-dirty
yating-l
parents: 0
diff changeset
121 attribute['blockcount'] = li[9]
671231da45f9 planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 594c5fa1e3cdb378ecf6b5490ec2cbac5fa8a61e-dirty
yating-l
parents: 0
diff changeset
122 attribute['blocksizes'] = li[10]
671231da45f9 planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 594c5fa1e3cdb378ecf6b5490ec2cbac5fa8a61e-dirty
yating-l
parents: 0
diff changeset
123 attribute['chromstarts'] = li[11]
671231da45f9 planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 594c5fa1e3cdb378ecf6b5490ec2cbac5fa8a61e-dirty
yating-l
parents: 0
diff changeset
124 attribute['sequence on other chromosome'] = li[17]
671231da45f9 planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 594c5fa1e3cdb378ecf6b5490ec2cbac5fa8a61e-dirty
yating-l
parents: 0
diff changeset
125 attribute['cds in ncbi format'] = li[18]
671231da45f9 planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 594c5fa1e3cdb378ecf6b5490ec2cbac5fa8a61e-dirty
yating-l
parents: 0
diff changeset
126 attribute['size of target chromosome'] = li[19]
671231da45f9 planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 594c5fa1e3cdb378ecf6b5490ec2cbac5fa8a61e-dirty
yating-l
parents: 0
diff changeset
127 attribute['number of bases matched'] = li[20]
671231da45f9 planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 594c5fa1e3cdb378ecf6b5490ec2cbac5fa8a61e-dirty
yating-l
parents: 0
diff changeset
128 attribute['number of bases that don\'t match'] = li[21]
671231da45f9 planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 594c5fa1e3cdb378ecf6b5490ec2cbac5fa8a61e-dirty
yating-l
parents: 0
diff changeset
129 attribute['number of bases that match but are part of repeats'] = li[22]
671231da45f9 planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 594c5fa1e3cdb378ecf6b5490ec2cbac5fa8a61e-dirty
yating-l
parents: 0
diff changeset
130 attribute['number of \'N\' bases'] = li[23]
671231da45f9 planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 594c5fa1e3cdb378ecf6b5490ec2cbac5fa8a61e-dirty
yating-l
parents: 0
diff changeset
131 utils.write_features(field, attribute, gff3)
671231da45f9 planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit 594c5fa1e3cdb378ecf6b5490ec2cbac5fa8a61e-dirty
yating-l
parents: 0
diff changeset
132 utils.child_blocks(field, attribute, gff3, 'match_part')
0
8d1cf7ce65cd planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit d583ac16a6c6942730ea536eb59cc37941816030-dirty
yating-l
parents:
diff changeset
133 gff3.close()
8d1cf7ce65cd planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit d583ac16a6c6942730ea536eb59cc37941816030-dirty
yating-l
parents:
diff changeset
134