Mercurial > repos > yating-l > hubarchivecreator
annotate Gtf.py @ 52:c66803bff0cc draft
planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit adc1ac50269e02570e7ce12c732637bdd3f9a547-dirty
| author | yating-l | 
|---|---|
| date | Thu, 11 May 2017 17:21:15 -0400 | 
| parents | 3e0c61b52a06 | 
| children | b39dd0b5a166 | 
| rev | line source | 
|---|---|
| 0 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 1 #!/usr/bin/python | 
| 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 2 | 
| 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 3 import os | 
| 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 4 import tempfile | 
| 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 5 | 
| 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 6 # Internal dependencies | 
| 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 7 from Datatype import Datatype | 
| 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 8 from util import subtools | 
| 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 9 | 
| 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 10 class InfoModifiedGtf(): | 
| 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 11 def __init__(self, is_modified=False, array_modified_lines=[]): | 
| 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 12 self.is_modified = is_modified | 
| 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 13 self.array_modified_lines = array_modified_lines | 
| 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 14 | 
| 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 15 def get_str_modified_lines(self): | 
| 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 16 return ','.join(map(str, self.array_modified_lines)) | 
| 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 17 | 
| 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 18 class Gtf( Datatype ): | 
| 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 19 def __init__( self, input_gtf_false_path, data_gtf): | 
| 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 20 | 
| 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 21 super(Gtf, self).__init__() | 
| 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 22 | 
| 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 23 self.track = None | 
| 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 24 | 
| 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 25 self.input_gtf_false_path = input_gtf_false_path | 
| 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 26 self.name_gtf = data_gtf["name"] | 
| 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 27 self.priority = data_gtf["order_index"] | 
| 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 28 self.track_color = data_gtf["track_color"] | 
| 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 29 # TODO: Think about how to avoid repetition of the group_name everywhere | 
| 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 30 self.group_name = data_gtf["group_name"] | 
| 52 
c66803bff0cc
planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit adc1ac50269e02570e7ce12c732637bdd3f9a547-dirty
 yating-l parents: 
0diff
changeset | 31 if data_gtf["long_label"]: | 
| 
c66803bff0cc
planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit adc1ac50269e02570e7ce12c732637bdd3f9a547-dirty
 yating-l parents: 
0diff
changeset | 32 self.long_label = data_gtf["long_label"] | 
| 
c66803bff0cc
planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit adc1ac50269e02570e7ce12c732637bdd3f9a547-dirty
 yating-l parents: 
0diff
changeset | 33 else: | 
| 
c66803bff0cc
planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit adc1ac50269e02570e7ce12c732637bdd3f9a547-dirty
 yating-l parents: 
0diff
changeset | 34 self.long_label = self.name_gtf | 
| 0 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 35 #print "Creating TrackHub GTF from (falsePath: %s; name: %s)" % ( self.input_gtf_false_path, self.name_gtf) | 
| 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 36 | 
| 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 37 # TODO: See if we need these temporary files as part of the generated files | 
| 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 38 genePredFile = tempfile.NamedTemporaryFile(bufsize=0, suffix=".genePred") | 
| 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 39 unsorted_bigGenePred_file = tempfile.NamedTemporaryFile(bufsize=0, suffix=".unsorted.bigGenePred") | 
| 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 40 sorted_bigGenePred_file = tempfile.NamedTemporaryFile(suffix=".sortedBed.bigGenePred") | 
| 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 41 | 
| 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 42 # GtfToGenePred | 
| 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 43 ## Checking the integrity of the inputs | 
| 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 44 modified_gtf = self._checkAndFixGtf() | 
| 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 45 | 
| 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 46 ## Processing the gtf | 
| 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 47 subtools.gtfToGenePred(self.input_gtf_false_path, genePredFile.name) | 
| 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 48 | 
| 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 49 # TODO: From there, refactor because common use with Gff3.py | 
| 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 50 # genePredToBigGenePred processing | 
| 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 51 subtools.genePredToBigGenePred(genePredFile.name, unsorted_bigGenePred_file.name) | 
| 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 52 | 
| 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 53 # Sort processing | 
| 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 54 subtools.sort(unsorted_bigGenePred_file.name, sorted_bigGenePred_file.name) | 
| 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 55 | 
| 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 56 # bedToBigBed processing | 
| 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 57 trackName = "".join( ( self.name_gtf, ".bb") ) | 
| 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 58 | 
| 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 59 auto_sql_option = os.path.join(self.tool_directory, 'bigGenePred.as') | 
| 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 60 | 
| 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 61 myBigBedFilePath = os.path.join(self.myTrackFolderPath, trackName) | 
| 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 62 | 
| 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 63 with open(myBigBedFilePath, 'w') as bigBedFile: | 
| 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 64 subtools.bedToBigBed(sorted_bigGenePred_file.name, | 
| 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 65 self.chromSizesFile.name, | 
| 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 66 bigBedFile.name, | 
| 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 67 autoSql=auto_sql_option, | 
| 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 68 typeOption='bed12+8', | 
| 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 69 tab=True) | 
| 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 70 | 
| 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 71 | 
| 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 72 # Create the Track Object | 
| 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 73 self.createTrack(file_path=trackName, | 
| 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 74 track_name=trackName, | 
| 52 
c66803bff0cc
planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit adc1ac50269e02570e7ce12c732637bdd3f9a547-dirty
 yating-l parents: 
0diff
changeset | 75 long_label=self.long_label, track_type='bigGenePred', | 
| 0 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 76 visibility='dense', priority=self.priority, | 
| 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 77 track_file=myBigBedFilePath, | 
| 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 78 track_color=self.track_color, | 
| 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 79 group_name=self.group_name) | 
| 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 80 | 
| 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 81 # TODO: Use Logging instead of print | 
| 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 82 if modified_gtf.is_modified: | 
| 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 83 print("- Warning: Gtf %s created with a modified version of your Gtf because of start/end coordinates issues." | 
| 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 84 % self.name_gtf) | 
| 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 85 print("Here are the lines removed: " + modified_gtf.get_str_modified_lines()) | 
| 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 86 else: | 
| 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 87 print("- Gtf %s created" % self.name_gtf) | 
| 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 88 | 
| 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 89 def _checkAndFixGtf(self): | 
| 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 90 """ | 
| 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 91 Call _checkAndFixGtf, check the integrity of gtf file, | 
| 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 92 if coordinates exceed chromosome size, either removed the whole line(s) or truncated to the end of the scaffold | 
| 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 93 depending on the user choice | 
| 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 94 default: remove the whole line(s) | 
| 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 95 """ | 
| 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 96 # Set the boolean telling if we had to modify the file | 
| 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 97 modified_gtf = InfoModifiedGtf() | 
| 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 98 | 
| 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 99 # Create a temp gtf just in case we have issues | 
| 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 100 temp_gtf = tempfile.NamedTemporaryFile(bufsize=0, suffix=".gtf", delete=False) | 
| 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 101 | 
| 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 102 # TODO: Get the user choice and use it | 
| 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 103 # TODO: Check if the start > 0 and the end <= chromosome size | 
| 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 104 # Get the chrom.sizes into a dictionary to have a faster access | 
| 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 105 # TODO: Think about doing this in Datatype.py, so everywhere we have access to this read-only dictionary | 
| 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 106 dict_chrom_sizes = {} | 
| 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 107 with open(self.chromSizesFile.name, 'r') as chromSizes: | 
| 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 108 lines = chromSizes.readlines() | 
| 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 109 for line in lines: | 
| 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 110 fields = line.split() | 
| 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 111 # fields[1] should be the name of the scaffold | 
| 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 112 # fields[2] should be the size of the scaffold | 
| 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 113 # TODO: Ensure this is true for all lines | 
| 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 114 dict_chrom_sizes[fields[0]] = fields[1] | 
| 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 115 | 
| 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 116 # Parse the GTF and check each line using the chrom sizes dictionary | 
| 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 117 with open(temp_gtf.name, 'a+') as tmp: | 
| 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 118 with open(self.input_gtf_false_path, 'r') as gtf: | 
| 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 119 lines = gtf.readlines() | 
| 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 120 for index, line in enumerate(lines): | 
| 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 121 # If this is not a comment, we check the fields | 
| 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 122 if not line.startswith('#'): | 
| 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 123 fields = line.split() | 
| 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 124 # We are interested in fields[0] => Seqname (scaffold) | 
| 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 125 # We are interested in fields[3] => Start of the scaffold | 
| 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 126 # We are interested in fields[4] => End of the scaffold | 
| 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 127 scaffold_size = dict_chrom_sizes[fields[0]] | 
| 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 128 start_position = fields[3] | 
| 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 129 end_position = fields[4] | 
| 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 130 | 
| 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 131 if start_position > 0 and end_position <= scaffold_size: | 
| 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 132 # We are good, so we copy this line | 
| 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 133 tmp.write(line) | 
| 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 134 tmp.write(os.linesep) | 
| 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 135 | 
| 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 136 | 
| 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 137 # The sequence is not good, we are going to process it regarding the user choice | 
| 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 138 # TODO: Process the user choice | 
| 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 139 # By default, we are assuming the user choice is to remove the lines: We don't copy it | 
| 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 140 | 
| 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 141 # If we are here, it means the gtf has been modified | 
| 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 142 else: | 
| 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 143 # We save the line for the feedback to the user | 
| 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 144 modified_gtf.array_modified_lines.append(index + 1) | 
| 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 145 | 
| 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 146 if modified_gtf.is_modified is False: | 
| 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 147 modified_gtf.is_modified = True | 
| 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 148 else: | 
| 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 149 pass | 
| 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 150 else: | 
| 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 151 tmp.write(line) | 
| 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 152 tmp.write(os.linesep) | 
| 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 153 | 
| 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 154 # Once the process it completed, we just replace the path of the gtf | 
| 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 155 self.input_gtf_false_path = temp_gtf.name | 
| 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 156 | 
| 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 157 # TODO: Manage the issue with the fact the dataset is going to still exist on the disk because of delete=False | 
| 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 158 | 
| 
3e0c61b52a06
planemo upload for repository https://github.com/Yating-L/hub-archive-creator commit a77635b40ebd29baafb3bea57f8cbfb3f252e3b0-dirty
 yating-l parents: diff
changeset | 159 return modified_gtf | 
