Mercurial > repos > yating-l > hubarchivecreator
annotate Gtf.py @ 52:c66803bff0cc draft
planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit adc1ac50269e02570e7ce12c732637bdd3f9a547-dirty
author | yating-l |
---|---|
date | Thu, 11 May 2017 17:21:15 -0400 |
parents | 3e0c61b52a06 |
children | b39dd0b5a166 |
rev | line source |
---|---|
0
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
1 #!/usr/bin/python |
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
2 |
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
3 import os |
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
4 import tempfile |
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
5 |
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
6 # Internal dependencies |
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
7 from Datatype import Datatype |
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
8 from util import subtools |
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
9 |
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
10 class InfoModifiedGtf(): |
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
11 def __init__(self, is_modified=False, array_modified_lines=[]): |
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
12 self.is_modified = is_modified |
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
13 self.array_modified_lines = array_modified_lines |
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
14 |
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
15 def get_str_modified_lines(self): |
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
16 return ','.join(map(str, self.array_modified_lines)) |
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
17 |
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
18 class Gtf( Datatype ): |
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
19 def __init__( self, input_gtf_false_path, data_gtf): |
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
20 |
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
21 super(Gtf, self).__init__() |
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
22 |
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
23 self.track = None |
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
24 |
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
25 self.input_gtf_false_path = input_gtf_false_path |
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
26 self.name_gtf = data_gtf["name"] |
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
27 self.priority = data_gtf["order_index"] |
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
28 self.track_color = data_gtf["track_color"] |
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
29 # TODO: Think about how to avoid repetition of the group_name everywhere |
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
30 self.group_name = data_gtf["group_name"] |
52
c66803bff0cc
planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit adc1ac50269e02570e7ce12c732637bdd3f9a547-dirty
yating-l
parents:
0
diff
changeset
|
31 if data_gtf["long_label"]: |
c66803bff0cc
planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit adc1ac50269e02570e7ce12c732637bdd3f9a547-dirty
yating-l
parents:
0
diff
changeset
|
32 self.long_label = data_gtf["long_label"] |
c66803bff0cc
planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit adc1ac50269e02570e7ce12c732637bdd3f9a547-dirty
yating-l
parents:
0
diff
changeset
|
33 else: |
c66803bff0cc
planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit adc1ac50269e02570e7ce12c732637bdd3f9a547-dirty
yating-l
parents:
0
diff
changeset
|
34 self.long_label = self.name_gtf |
0
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
35 #print "Creating TrackHub GTF from (falsePath: %s; name: %s)" % ( self.input_gtf_false_path, self.name_gtf) |
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
36 |
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
37 # TODO: See if we need these temporary files as part of the generated files |
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
38 genePredFile = tempfile.NamedTemporaryFile(bufsize=0, suffix=".genePred") |
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
39 unsorted_bigGenePred_file = tempfile.NamedTemporaryFile(bufsize=0, suffix=".unsorted.bigGenePred") |
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
40 sorted_bigGenePred_file = tempfile.NamedTemporaryFile(suffix=".sortedBed.bigGenePred") |
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
41 |
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
42 # GtfToGenePred |
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
43 ## Checking the integrity of the inputs |
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
44 modified_gtf = self._checkAndFixGtf() |
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
45 |
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
46 ## Processing the gtf |
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
47 subtools.gtfToGenePred(self.input_gtf_false_path, genePredFile.name) |
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
48 |
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
49 # TODO: From there, refactor because common use with Gff3.py |
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
50 # genePredToBigGenePred processing |
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
51 subtools.genePredToBigGenePred(genePredFile.name, unsorted_bigGenePred_file.name) |
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
52 |
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
53 # Sort processing |
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
54 subtools.sort(unsorted_bigGenePred_file.name, sorted_bigGenePred_file.name) |
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
55 |
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
56 # bedToBigBed processing |
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
57 trackName = "".join( ( self.name_gtf, ".bb") ) |
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
58 |
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
59 auto_sql_option = os.path.join(self.tool_directory, 'bigGenePred.as') |
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
60 |
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
61 myBigBedFilePath = os.path.join(self.myTrackFolderPath, trackName) |
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
62 |
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
63 with open(myBigBedFilePath, 'w') as bigBedFile: |
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
64 subtools.bedToBigBed(sorted_bigGenePred_file.name, |
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
65 self.chromSizesFile.name, |
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
66 bigBedFile.name, |
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
67 autoSql=auto_sql_option, |
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
68 typeOption='bed12+8', |
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
69 tab=True) |
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
70 |
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
71 |
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
72 # Create the Track Object |
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
73 self.createTrack(file_path=trackName, |
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
74 track_name=trackName, |
52
c66803bff0cc
planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit adc1ac50269e02570e7ce12c732637bdd3f9a547-dirty
yating-l
parents:
0
diff
changeset
|
75 long_label=self.long_label, track_type='bigGenePred', |
0
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
76 visibility='dense', priority=self.priority, |
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
77 track_file=myBigBedFilePath, |
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
78 track_color=self.track_color, |
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
79 group_name=self.group_name) |
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
80 |
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
81 # TODO: Use Logging instead of print |
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
82 if modified_gtf.is_modified: |
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
83 print("- Warning: Gtf %s created with a modified version of your Gtf because of start/end coordinates issues." |
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
84 % self.name_gtf) |
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
85 print("Here are the lines removed: " + modified_gtf.get_str_modified_lines()) |
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
86 else: |
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
87 print("- Gtf %s created" % self.name_gtf) |
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
88 |
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
89 def _checkAndFixGtf(self): |
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
90 """ |
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
91 Call _checkAndFixGtf, check the integrity of gtf file, |
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
92 if coordinates exceed chromosome size, either removed the whole line(s) or truncated to the end of the scaffold |
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
93 depending on the user choice |
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
94 default: remove the whole line(s) |
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
95 """ |
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
96 # Set the boolean telling if we had to modify the file |
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
97 modified_gtf = InfoModifiedGtf() |
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
98 |
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
99 # Create a temp gtf just in case we have issues |
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
100 temp_gtf = tempfile.NamedTemporaryFile(bufsize=0, suffix=".gtf", delete=False) |
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
101 |
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
102 # TODO: Get the user choice and use it |
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
103 # TODO: Check if the start > 0 and the end <= chromosome size |
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
104 # Get the chrom.sizes into a dictionary to have a faster access |
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
105 # TODO: Think about doing this in Datatype.py, so everywhere we have access to this read-only dictionary |
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
106 dict_chrom_sizes = {} |
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
107 with open(self.chromSizesFile.name, 'r') as chromSizes: |
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
108 lines = chromSizes.readlines() |
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
109 for line in lines: |
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
110 fields = line.split() |
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
111 # fields[1] should be the name of the scaffold |
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
112 # fields[2] should be the size of the scaffold |
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
113 # TODO: Ensure this is true for all lines |
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
114 dict_chrom_sizes[fields[0]] = fields[1] |
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
115 |
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
116 # Parse the GTF and check each line using the chrom sizes dictionary |
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
117 with open(temp_gtf.name, 'a+') as tmp: |
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
118 with open(self.input_gtf_false_path, 'r') as gtf: |
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
119 lines = gtf.readlines() |
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
120 for index, line in enumerate(lines): |
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
121 # If this is not a comment, we check the fields |
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
122 if not line.startswith('#'): |
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
123 fields = line.split() |
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
124 # We are interested in fields[0] => Seqname (scaffold) |
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
125 # We are interested in fields[3] => Start of the scaffold |
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
126 # We are interested in fields[4] => End of the scaffold |
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
127 scaffold_size = dict_chrom_sizes[fields[0]] |
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
128 start_position = fields[3] |
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
129 end_position = fields[4] |
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
130 |
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
131 if start_position > 0 and end_position <= scaffold_size: |
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
132 # We are good, so we copy this line |
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
133 tmp.write(line) |
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
134 tmp.write(os.linesep) |
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
135 |
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
136 |
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
137 # The sequence is not good, we are going to process it regarding the user choice |
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
138 # TODO: Process the user choice |
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
139 # By default, we are assuming the user choice is to remove the lines: We don't copy it |
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
140 |
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
141 # If we are here, it means the gtf has been modified |
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
142 else: |
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
143 # We save the line for the feedback to the user |
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
144 modified_gtf.array_modified_lines.append(index + 1) |
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
145 |
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
146 if modified_gtf.is_modified is False: |
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
147 modified_gtf.is_modified = True |
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
148 else: |
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
149 pass |
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
150 else: |
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
151 tmp.write(line) |
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
152 tmp.write(os.linesep) |
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
153 |
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
154 # Once the process it completed, we just replace the path of the gtf |
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
155 self.input_gtf_false_path = temp_gtf.name |
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
156 |
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
157 # TODO: Manage the issue with the fact the dataset is going to still exist on the disk because of delete=False |
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
158 |
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
yating-l
parents:
diff
changeset
|
159 return modified_gtf |