annotate Gtf.py @ 57:b39dd0b5a166 draft

planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit ce28781f52a4f84039de300cb41e3982f2e8bf51-dirty
author yating-l
date Fri, 30 Jun 2017 14:49:14 -0400
parents c66803bff0cc
children a0fc8379223c
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
3e0c61b52a06 planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff changeset
1 #!/usr/bin/python
3e0c61b52a06 planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff changeset
2
3e0c61b52a06 planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff changeset
3 import os
3e0c61b52a06 planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff changeset
4 import tempfile
3e0c61b52a06 planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff changeset
5
3e0c61b52a06 planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff changeset
6 # Internal dependencies
3e0c61b52a06 planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff changeset
7 from Datatype import Datatype
3e0c61b52a06 planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff changeset
8 from util import subtools
3e0c61b52a06 planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff changeset
9
3e0c61b52a06 planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff changeset
10 class InfoModifiedGtf():
3e0c61b52a06 planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff changeset
11 def __init__(self, is_modified=False, array_modified_lines=[]):
3e0c61b52a06 planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff changeset
12 self.is_modified = is_modified
3e0c61b52a06 planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff changeset
13 self.array_modified_lines = array_modified_lines
3e0c61b52a06 planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff changeset
14
3e0c61b52a06 planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff changeset
15 def get_str_modified_lines(self):
3e0c61b52a06 planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff changeset
16 return ','.join(map(str, self.array_modified_lines))
3e0c61b52a06 planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff changeset
17
3e0c61b52a06 planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff changeset
18 class Gtf( Datatype ):
3e0c61b52a06 planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff changeset
19 def __init__( self, input_gtf_false_path, data_gtf):
3e0c61b52a06 planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff changeset
20
3e0c61b52a06 planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff changeset
21 super(Gtf, self).__init__()
3e0c61b52a06 planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff changeset
22
3e0c61b52a06 planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff changeset
23 self.track = None
3e0c61b52a06 planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff changeset
24
3e0c61b52a06 planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff changeset
25 self.input_gtf_false_path = input_gtf_false_path
3e0c61b52a06 planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff changeset
26 self.name_gtf = data_gtf["name"]
3e0c61b52a06 planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff changeset
27 self.priority = data_gtf["order_index"]
3e0c61b52a06 planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff changeset
28 self.track_color = data_gtf["track_color"]
3e0c61b52a06 planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff changeset
29 # TODO: Think about how to avoid repetition of the group_name everywhere
3e0c61b52a06 planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff changeset
30 self.group_name = data_gtf["group_name"]
57
b39dd0b5a166 planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit ce28781f52a4f84039de300cb41e3982f2e8bf51-dirty
yating-l
parents: 52
diff changeset
31 self.database = data_gtf["database"]
52
c66803bff0cc planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit adc1ac50269e02570e7ce12c732637bdd3f9a547-dirty
yating-l
parents: 0
diff changeset
32 if data_gtf["long_label"]:
c66803bff0cc planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit adc1ac50269e02570e7ce12c732637bdd3f9a547-dirty
yating-l
parents: 0
diff changeset
33 self.long_label = data_gtf["long_label"]
c66803bff0cc planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit adc1ac50269e02570e7ce12c732637bdd3f9a547-dirty
yating-l
parents: 0
diff changeset
34 else:
c66803bff0cc planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit adc1ac50269e02570e7ce12c732637bdd3f9a547-dirty
yating-l
parents: 0
diff changeset
35 self.long_label = self.name_gtf
0
3e0c61b52a06 planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff changeset
36 #print "Creating TrackHub GTF from (falsePath: %s; name: %s)" % ( self.input_gtf_false_path, self.name_gtf)
3e0c61b52a06 planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff changeset
37
3e0c61b52a06 planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff changeset
38 # TODO: See if we need these temporary files as part of the generated files
3e0c61b52a06 planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff changeset
39 genePredFile = tempfile.NamedTemporaryFile(bufsize=0, suffix=".genePred")
3e0c61b52a06 planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff changeset
40 unsorted_bigGenePred_file = tempfile.NamedTemporaryFile(bufsize=0, suffix=".unsorted.bigGenePred")
3e0c61b52a06 planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff changeset
41 sorted_bigGenePred_file = tempfile.NamedTemporaryFile(suffix=".sortedBed.bigGenePred")
3e0c61b52a06 planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff changeset
42
3e0c61b52a06 planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff changeset
43 # GtfToGenePred
3e0c61b52a06 planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff changeset
44 ## Checking the integrity of the inputs
3e0c61b52a06 planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff changeset
45 modified_gtf = self._checkAndFixGtf()
3e0c61b52a06 planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff changeset
46
3e0c61b52a06 planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff changeset
47 ## Processing the gtf
3e0c61b52a06 planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff changeset
48 subtools.gtfToGenePred(self.input_gtf_false_path, genePredFile.name)
3e0c61b52a06 planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff changeset
49
3e0c61b52a06 planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff changeset
50 # TODO: From there, refactor because common use with Gff3.py
3e0c61b52a06 planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff changeset
51 # genePredToBigGenePred processing
3e0c61b52a06 planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff changeset
52 subtools.genePredToBigGenePred(genePredFile.name, unsorted_bigGenePred_file.name)
3e0c61b52a06 planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff changeset
53
3e0c61b52a06 planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff changeset
54 # Sort processing
3e0c61b52a06 planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff changeset
55 subtools.sort(unsorted_bigGenePred_file.name, sorted_bigGenePred_file.name)
3e0c61b52a06 planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff changeset
56
3e0c61b52a06 planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff changeset
57 # bedToBigBed processing
3e0c61b52a06 planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff changeset
58 trackName = "".join( ( self.name_gtf, ".bb") )
3e0c61b52a06 planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff changeset
59
3e0c61b52a06 planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff changeset
60 auto_sql_option = os.path.join(self.tool_directory, 'bigGenePred.as')
3e0c61b52a06 planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff changeset
61
3e0c61b52a06 planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff changeset
62 myBigBedFilePath = os.path.join(self.myTrackFolderPath, trackName)
3e0c61b52a06 planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff changeset
63
3e0c61b52a06 planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff changeset
64 with open(myBigBedFilePath, 'w') as bigBedFile:
3e0c61b52a06 planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff changeset
65 subtools.bedToBigBed(sorted_bigGenePred_file.name,
3e0c61b52a06 planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff changeset
66 self.chromSizesFile.name,
3e0c61b52a06 planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff changeset
67 bigBedFile.name,
3e0c61b52a06 planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff changeset
68 autoSql=auto_sql_option,
3e0c61b52a06 planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff changeset
69 typeOption='bed12+8',
3e0c61b52a06 planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff changeset
70 tab=True)
3e0c61b52a06 planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff changeset
71
3e0c61b52a06 planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff changeset
72
3e0c61b52a06 planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff changeset
73 # Create the Track Object
3e0c61b52a06 planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff changeset
74 self.createTrack(file_path=trackName,
3e0c61b52a06 planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff changeset
75 track_name=trackName,
52
c66803bff0cc planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit adc1ac50269e02570e7ce12c732637bdd3f9a547-dirty
yating-l
parents: 0
diff changeset
76 long_label=self.long_label, track_type='bigGenePred',
0
3e0c61b52a06 planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff changeset
77 visibility='dense', priority=self.priority,
3e0c61b52a06 planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff changeset
78 track_file=myBigBedFilePath,
3e0c61b52a06 planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff changeset
79 track_color=self.track_color,
57
b39dd0b5a166 planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit ce28781f52a4f84039de300cb41e3982f2e8bf51-dirty
yating-l
parents: 52
diff changeset
80 group_name=self.group_name,
b39dd0b5a166 planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit ce28781f52a4f84039de300cb41e3982f2e8bf51-dirty
yating-l
parents: 52
diff changeset
81 database=self.database)
0
3e0c61b52a06 planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff changeset
82
3e0c61b52a06 planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff changeset
83 # TODO: Use Logging instead of print
3e0c61b52a06 planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff changeset
84 if modified_gtf.is_modified:
3e0c61b52a06 planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff changeset
85 print("- Warning: Gtf %s created with a modified version of your Gtf because of start/end coordinates issues."
3e0c61b52a06 planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff changeset
86 % self.name_gtf)
3e0c61b52a06 planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff changeset
87 print("Here are the lines removed: " + modified_gtf.get_str_modified_lines())
3e0c61b52a06 planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff changeset
88 else:
3e0c61b52a06 planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff changeset
89 print("- Gtf %s created" % self.name_gtf)
3e0c61b52a06 planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff changeset
90
3e0c61b52a06 planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff changeset
91 def _checkAndFixGtf(self):
3e0c61b52a06 planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff changeset
92 """
3e0c61b52a06 planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff changeset
93 Call _checkAndFixGtf, check the integrity of gtf file,
3e0c61b52a06 planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff changeset
94 if coordinates exceed chromosome size, either removed the whole line(s) or truncated to the end of the scaffold
3e0c61b52a06 planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff changeset
95 depending on the user choice
3e0c61b52a06 planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff changeset
96 default: remove the whole line(s)
3e0c61b52a06 planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff changeset
97 """
3e0c61b52a06 planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff changeset
98 # Set the boolean telling if we had to modify the file
3e0c61b52a06 planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff changeset
99 modified_gtf = InfoModifiedGtf()
3e0c61b52a06 planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff changeset
100
3e0c61b52a06 planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff changeset
101 # Create a temp gtf just in case we have issues
3e0c61b52a06 planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff changeset
102 temp_gtf = tempfile.NamedTemporaryFile(bufsize=0, suffix=".gtf", delete=False)
3e0c61b52a06 planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff changeset
103
3e0c61b52a06 planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff changeset
104 # TODO: Get the user choice and use it
3e0c61b52a06 planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff changeset
105 # TODO: Check if the start > 0 and the end <= chromosome size
3e0c61b52a06 planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff changeset
106 # Get the chrom.sizes into a dictionary to have a faster access
3e0c61b52a06 planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff changeset
107 # TODO: Think about doing this in Datatype.py, so everywhere we have access to this read-only dictionary
3e0c61b52a06 planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff changeset
108 dict_chrom_sizes = {}
3e0c61b52a06 planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff changeset
109 with open(self.chromSizesFile.name, 'r') as chromSizes:
3e0c61b52a06 planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff changeset
110 lines = chromSizes.readlines()
3e0c61b52a06 planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff changeset
111 for line in lines:
3e0c61b52a06 planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff changeset
112 fields = line.split()
3e0c61b52a06 planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff changeset
113 # fields[1] should be the name of the scaffold
3e0c61b52a06 planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff changeset
114 # fields[2] should be the size of the scaffold
3e0c61b52a06 planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff changeset
115 # TODO: Ensure this is true for all lines
3e0c61b52a06 planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff changeset
116 dict_chrom_sizes[fields[0]] = fields[1]
3e0c61b52a06 planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff changeset
117
3e0c61b52a06 planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff changeset
118 # Parse the GTF and check each line using the chrom sizes dictionary
3e0c61b52a06 planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff changeset
119 with open(temp_gtf.name, 'a+') as tmp:
3e0c61b52a06 planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff changeset
120 with open(self.input_gtf_false_path, 'r') as gtf:
3e0c61b52a06 planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff changeset
121 lines = gtf.readlines()
3e0c61b52a06 planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff changeset
122 for index, line in enumerate(lines):
3e0c61b52a06 planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff changeset
123 # If this is not a comment, we check the fields
3e0c61b52a06 planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff changeset
124 if not line.startswith('#'):
3e0c61b52a06 planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff changeset
125 fields = line.split()
3e0c61b52a06 planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff changeset
126 # We are interested in fields[0] => Seqname (scaffold)
3e0c61b52a06 planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff changeset
127 # We are interested in fields[3] => Start of the scaffold
3e0c61b52a06 planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff changeset
128 # We are interested in fields[4] => End of the scaffold
3e0c61b52a06 planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff changeset
129 scaffold_size = dict_chrom_sizes[fields[0]]
3e0c61b52a06 planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff changeset
130 start_position = fields[3]
3e0c61b52a06 planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff changeset
131 end_position = fields[4]
3e0c61b52a06 planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff changeset
132
3e0c61b52a06 planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff changeset
133 if start_position > 0 and end_position <= scaffold_size:
3e0c61b52a06 planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff changeset
134 # We are good, so we copy this line
3e0c61b52a06 planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff changeset
135 tmp.write(line)
3e0c61b52a06 planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff changeset
136 tmp.write(os.linesep)
3e0c61b52a06 planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff changeset
137
3e0c61b52a06 planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff changeset
138
3e0c61b52a06 planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff changeset
139 # The sequence is not good, we are going to process it regarding the user choice
3e0c61b52a06 planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff changeset
140 # TODO: Process the user choice
3e0c61b52a06 planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff changeset
141 # By default, we are assuming the user choice is to remove the lines: We don't copy it
3e0c61b52a06 planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff changeset
142
3e0c61b52a06 planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff changeset
143 # If we are here, it means the gtf has been modified
3e0c61b52a06 planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff changeset
144 else:
3e0c61b52a06 planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff changeset
145 # We save the line for the feedback to the user
3e0c61b52a06 planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff changeset
146 modified_gtf.array_modified_lines.append(index + 1)
3e0c61b52a06 planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff changeset
147
3e0c61b52a06 planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff changeset
148 if modified_gtf.is_modified is False:
3e0c61b52a06 planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff changeset
149 modified_gtf.is_modified = True
3e0c61b52a06 planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff changeset
150 else:
3e0c61b52a06 planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff changeset
151 pass
3e0c61b52a06 planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff changeset
152 else:
3e0c61b52a06 planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff changeset
153 tmp.write(line)
3e0c61b52a06 planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff changeset
154 tmp.write(os.linesep)
3e0c61b52a06 planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff changeset
155
3e0c61b52a06 planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff changeset
156 # Once the process it completed, we just replace the path of the gtf
3e0c61b52a06 planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff changeset
157 self.input_gtf_false_path = temp_gtf.name
3e0c61b52a06 planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff changeset
158
3e0c61b52a06 planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff changeset
159 # TODO: Manage the issue with the fact the dataset is going to still exist on the disk because of delete=False
3e0c61b52a06 planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff changeset
160
3e0c61b52a06 planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff changeset
161 return modified_gtf