Mercurial > repos > yating-l > hubarchivecreator
changeset 3:44577d6784b7 draft
planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit e2db8aa3672f1222b415716054bfb3c5ccd132a1-dirty
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BedSpliceJunctions.py Wed Dec 21 11:18:27 2016 -0500 @@ -0,0 +1,70 @@ +#!/usr/bin/python + +import os +import tempfile + +from Datatype import Datatype +from Track import Track +from TrackDb import TrackDb +from util import subtools + + +class BedSpliceJunctions( Datatype ): + def __init__(self, input_bed_splice_junctions_false_path, data_bed_splice_junctions): + + super(BedSpliceJunctions, self).__init__() + + self.input_bed_splice_junctions_false_path = input_bed_splice_junctions_false_path + self.name_bed_splice_junctions = data_bed_splice_junctions["name"] + self.priority = data_bed_splice_junctions["order_index"] + self.track_color = data_bed_splice_junctions["track_color"] + # TODO: Think about how to avoid repetition of the group_name everywhere + self.group_name = data_bed_splice_junctions["group_name"] + + sortedBedFile = tempfile.NamedTemporaryFile(suffix=".sortedBed") + + # Sort processing + subtools.sort(self.input_bed_splice_junctions_false_path, sortedBedFile.name) + + # bedToBigBed processing + # TODO: Change the name of the bb, to tool + genome + .bb + trackName = "".join( ( self.name_bed_splice_junctions, '.bb' ) ) + myBigBedFilePath = os.path.join(self.myTrackFolderPath, trackName) + + auto_sql_option = os.path.join(self.tool_directory, 'spliceJunctions.as') + + with open(myBigBedFilePath, 'w') as bigBedFile: + subtools.bedToBigBed(sortedBedFile.name, + self.chromSizesFile.name, + bigBedFile.name, + typeOption='bed12+1', + autoSql=auto_sql_option) + + # Create the Track Object + self.createTrack(file_path=trackName, + track_name=trackName, + long_label=self.name_bed_splice_junctions, track_type='bigBed 12 +', visibility='dense', + priority=self.priority, + track_file=myBigBedFilePath, + track_color=self.track_color, + group_name=self.group_name) + + # dataURL = "tracks/%s" % trackName + # + # trackDb = TrackDb( + # trackName=trackName, + # longLabel=self.name_bed_simple_repeats, + # shortLabel=self.getShortName( self.name_bed_simple_repeats ), + # trackDataURL=dataURL, + # trackType='bigBed 4 +', + # visibility='dense', + # priority=self.priority, + # ) + # + # self.track = Track( + # trackFile=myBigBedFilePath, + # trackDb=trackDb, + # ) + + print("- Bed splice junctions %s created" % self.name_bed_splice_junctions) + #print("- %s created in %s" % (trackName, myBigBedFilePath))
--- a/hubArchiveCreator.py Tue Nov 22 17:13:18 2016 -0500 +++ b/hubArchiveCreator.py Wed Dec 21 11:18:27 2016 -0500 @@ -18,6 +18,7 @@ # Internal dependencies from Bam import Bam from BedSimpleRepeats import BedSimpleRepeats +from BedSpliceJunctions import BedSpliceJunctions from Bed import Bed from BigWig import BigWig from util.Fasta import Fasta @@ -46,6 +47,9 @@ # Bed4+12 (TrfBig) parser.add_argument('--bedSimpleRepeats', action='append', help='Bed4+12 format, using simpleRepeats.as') + # Bed12+1 (regtools) + parser.add_argument('--bedSpliceJunctions', action='append', help='Bed12+1 format, using spliceJunctions.as') + # Generic Bed (Blastx transformed to bed) parser.add_argument('--bed', action='append', help='Bed generic format') @@ -113,6 +117,7 @@ array_inputs_bam = args.bam array_inputs_bed_generic = args.bed array_inputs_bed_simple_repeats = args.bedSimpleRepeats + array_inputs_bed_splice_junctions = args.bedSpliceJunctions array_inputs_bigwig = args.bigwig array_inputs_gff3 = args.gff3 array_inputs_gtf = args.gtf @@ -139,6 +144,7 @@ (array_inputs_bed_generic, Bed), (array_inputs_bigwig, BigWig), (array_inputs_bed_simple_repeats, BedSimpleRepeats), + (array_inputs_bed_splice_junctions, BedSpliceJunctions), (array_inputs_gff3, Gff3), (array_inputs_gtf, Gtf), (array_inputs_psl, Psl)]:
--- a/hubArchiveCreator.xml Tue Nov 22 17:13:18 2016 -0500 +++ b/hubArchiveCreator.xml Wed Dec 21 11:18:27 2016 -0500 @@ -111,6 +111,11 @@ #silent $prepare_json($f.formatChoice.bedChoice.BED_simple_repeats, $index_track_final, extra_data_dict) #end if + #if $f.formatChoice.bedChoice.bed_select == "bed_splice_junctions_option" + --bedSpliceJunctions $f.formatChoice.bedChoice.Bed_splice_junctions + #silent $prepare_json($f.formatChoice.bedChoice.BED_splice_junctions, $index_track_final, + extra_data_dict) + #end if #end if #if $f.formatChoice.format_select == "psl" --psl $f.formatChoice.PSL @@ -206,6 +211,7 @@ <param name="bed_select" type="select" label="Bed Choice"> <option value="bed_generic" selected="true">BED Generic (bed3+)</option> <option value="bed_simple_repeats_option">BED Simple repeat (bed4+12 / simpleRepeat.as)</option> + <option value="bed_splice_junctions_option">BED Splice junctions (bed12+1 / spliceJunctions.as)</option> </param> <when value="bed_generic"> <param @@ -224,6 +230,14 @@ label="Bed Simple Repeats (Bed4+12) File" /> </when> + <when value="bed_splice_junctions_option"> + <param + format="bed" + name="BED_splice_junctions" + type="data" + label="Bed Splice Junctions (Bed12+1) File" + /> + </when> </conditional> <param name="track_color" type="color" label="Track color" value="#000000"> <sanitizer> @@ -442,6 +456,7 @@ /> </output> </test> + <!-- Test with Psl --> <test>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/spliceJunctions.as Wed Dec 21 11:18:27 2016 -0500 @@ -0,0 +1,17 @@ +table spliceJunctions +"Predicted splice junctions" + ( + string chrom; "Reference sequence chromosome or scaffold" + uint chromStart; "Start position in chromosome" + uint chromEnd; "End position in chromosome" + string name; "Name of item" + uint score; "Score from 0-1000" + char[1] strand; "+ or -" + uint thickStart; "Start of where display should be thick (start codon)" + uint thickEnd; "End of where display should be thick (stop codon)" + uint reserved; "Used as itemRgb as of 2004-11-22" + int blockCount; "Number of blocks" + int[blockCount] blockSizes; "Comma separated list of block sizes" + int[blockCount] chromStarts; "Start positions relative to chromStart" + uint junctionScore; "Number of reads supporting the splice junction" + )
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/bed_splice_junctions/inputs/Splice_Junctions_by_regtools.bed Wed Dec 21 11:18:27 2016 -0500 @@ -0,0 +1,40 @@ +contig16 1808 11364 JUNC00000001 1000 - 1808 11364 255,0,0 2 89,80 0,9476 17 +contig16 11374 11605 JUNC00000003 1000 - 11374 11605 255,0,0 2 89,88 0,143 18 +contig16 11617 11783 JUNC00000004 1000 - 11617 11783 255,0,0 2 65,25 0,141 1 +contig16 11617 11844 JUNC00000005 1000 - 11617 11844 255,0,0 2 88,86 0,141 22 +contig16 11774 11977 JUNC00000006 1000 - 11774 11977 255,0,0 2 84,56 0,147 13 +contig16 11806 11956 JUNC00000007 1000 - 11806 11956 255,0,0 2 55,35 0,115 1 +contig16 11921 12673 JUNC00000008 1000 - 11921 12673 255,0,0 2 54,73 0,679 13 +contig16 11939 17275 JUNC00000009 1000 - 11939 17275 255,0,0 2 36,54 0,5282 1 +contig16 12723 17294 JUNC00000011 1000 - 12723 17294 255,0,0 2 88,73 0,4498 11 +contig16 12782 21003 JUNC00000012 1000 - 12782 21003 255,0,0 2 29,61 0,8160 1 +contig16 12796 21322 JUNC00000013 1000 - 12796 21322 255,0,0 2 15,75 0,8451 1 +contig16 17251 17490 JUNC00000014 1000 - 17251 17490 255,0,0 2 86,85 0,154 17 +contig16 17307 21307 JUNC00000015 1000 - 17307 21307 255,0,0 2 30,60 0,3940 1 +contig16 21007 21283 JUNC00000016 1000 - 21007 21283 255,0,0 2 54,36 0,240 1 +contig16 21250 22512 JUNC00000017 1000 - 21250 22512 255,0,0 2 86,58 0,1204 8 +contig16 21316 23083 JUNC00000018 1000 - 21316 23083 255,0,0 2 20,70 0,1697 1 +contig16 22206 25356 JUNC00000019 1000 + 22206 25356 255,0,0 2 39,46 0,3104 1 +contig16 27016 27178 JUNC00000021 1000 + 27016 27178 255,0,0 2 33,70 0,92 2 +contig16 27196 34369 JUNC00000022 1000 + 27196 34369 255,0,0 2 57,32 0,7141 1 +contig16 29103 29238 JUNC00000023 1000 + 29103 29238 255,0,0 2 61,18 0,117 1 +contig16 29358 34416 JUNC00000024 1000 + 29358 34416 255,0,0 2 11,79 0,4979 1 +contig16 34568 34768 JUNC00000025 1000 + 34568 34768 255,0,0 2 51,84 0,116 3 +contig16 34573 34763 JUNC00000026 1000 + 34573 34763 255,0,0 2 46,44 0,146 1 +contig16 34782 34954 JUNC00000027 1000 + 34782 34954 255,0,0 2 53,66 0,106 4 +contig16 35110 42519 JUNC00000028 1000 + 35110 42519 255,0,0 2 64,64 0,7345 4 +contig16 42508 43097 JUNC00000030 1000 + 42508 43097 255,0,0 2 82,65 0,524 6 +contig16 43273 43505 JUNC00000031 1000 + 43273 43505 255,0,0 2 81,85 0,147 12 +contig17 1242 2504 JUNC00000032 1000 - 1242 2504 255,0,0 2 86,58 0,1204 8 +contig17 1308 3075 JUNC00000033 1000 - 1308 3075 255,0,0 2 20,70 0,1697 1 +contig17 2198 5348 JUNC00000034 1000 + 2198 5348 255,0,0 2 39,46 0,3104 1 +contig17 7008 7170 JUNC00000036 1000 + 7008 7170 255,0,0 2 33,70 0,92 2 +contig17 7188 14361 JUNC00000037 1000 + 7188 14361 255,0,0 2 57,32 0,7141 1 +contig17 9095 9230 JUNC00000038 1000 + 9095 9230 255,0,0 2 61,18 0,117 1 +contig17 9350 14408 JUNC00000039 1000 + 9350 14408 255,0,0 2 11,79 0,4979 1 +contig17 14560 14760 JUNC00000040 1000 + 14560 14760 255,0,0 2 51,84 0,116 3 +contig17 14565 14755 JUNC00000041 1000 + 14565 14755 255,0,0 2 46,44 0,146 1 +contig17 14774 14946 JUNC00000042 1000 + 14774 14946 255,0,0 2 53,66 0,106 4 +contig17 15102 22511 JUNC00000043 1000 + 15102 22511 255,0,0 2 64,64 0,7345 4 +contig17 22500 23089 JUNC00000045 1000 + 22500 23089 255,0,0 2 82,65 0,524 6 +contig17 23265 23497 JUNC00000046 1000 + 23265 23497 255,0,0 2 81,85 0,147 12
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/bed_splice_junctions/myHub/__main__.log Wed Dec 21 11:18:27 2016 -0500 @@ -0,0 +1,6 @@ +DEBUG:root:#### Welcome in HubArchiveCreator Debug Mode #### + +DEBUG:root:#### Welcome in HubArchiveCreator Debug Mode #### + +DEBUG:root:#### Welcome in HubArchiveCreator Debug Mode #### +
--- a/todo.md Tue Nov 22 17:13:18 2016 -0500 +++ b/todo.md Wed Dec 21 11:18:27 2016 -0500 @@ -1,37 +1,3 @@ -### TEMP St Louis ### -- How to manage messages to user and debugging: - - User should receive INFO / WARN / ERROR / CRITICAL: - - User summary informations in stdout - - Full error in stderr - - Developer should receive all Logging stack: - - Not the user summary in stdout - - Full stack in stdout and stderr directly - - - HOWTO: - - Manage (at least) two type of Logging types: - - The user one => When Debug mode is not set or disabled - - The dev one => When Debug mode is enabled - - User: - - Two Handlers: stdout and stderr - - STDOUT: - - Filter stdout: - - NO ERROR and CRITICAL here - - (Warn) - - Formatter: - - Only show %(message) for clarity - - STDERR: - - Filter stderr => WARN / ERRROR / CRITICAL - - Formatter: - - Show message - - Show traceback - - Dev: - - One Handler: - - To both stdout and stderr - - Filter: - - Nope? - - Formatter: - - Show traceback in both - # HubArchiveCreator's TODO *TODO file inspired from: http://lifehacker.com/why-a-github-gist-is-my-favorite-to-do-list-1493063613* @@ -39,29 +5,30 @@ ### TO COMPLETE +- [ ] Parse the [JSON received from Galaxy inputs](hubArchiveCreator.xml#L47-L137) in a class instead of doing [this spaghetti code](hubArchiveCreator.py#L137-L149) - [ ] Don't let the Tool Classes manage the archive (add or remove files / folders) => Everything should pass through TrackHub or another class dedicated to that - [ ] Move the class and others program related files, into separated folders - [ ] Take into account the name of the reference genome / the change: - [ ] Somebody could want to launch two visualisations of two different genomes. Repeats of Genome with extensions associated -- [ ] Add TDD => First add the test. It should not pass. Implement. It should now pass :) -- [ ] Replace Gff3 by an abstract class GeneralFormat, with two sub-classes GFF3Format and GTFFormat -- [ ] TrackHub should check if the 2bit already exists instead of recreating it (which is the case atm) +- [ ] Replace Gff3/GTF by an abstract class GeneralFormat, with two sub-classes GFF3Format and GTFFormat - [ ] Manage the error when a user is selecting Generic Bed instead of Bed Simple Repeats. Two options: a. Output a better error message ("Check with the other Bed options") b. Identify internally this is not a regular BED but a specific one - [ ] Remove the non-explicit parameters for the communication between Galaxy Wrapper and the entry point -- [ ] Rename all occurences of `extension` which `datatype` +- [ ] Rename all occurences of `extension` with `datatype` - [ ] Follow https://google.github.io/styleguide/pyguide.html -- [ ] Move to Python 3 -- [ ] Remove the repetition of the extension if it already exists +- [ ] Migrate to Python 3 +- [ ] Remove the repetition of the extension file if it already exists - [ ] Better thinking about the tool_directory management / Classes path refactoring -- [ ] Add a debug mode to have more outputs -- [ ] Improve the standard output of HAC -- [ ] Find why a $ (newline) is added when installing dependencies from tool_dependencies.xml -- [ ] Sort the order of the HTML directories/files ### DONE +- [x] Add TDD => First add the test. It should not pass. Implement. It should now pass :) +- [x] TrackHub should check if the 2bit already exists instead of recreating it (which is the case atm) +- [x] Add a debug mode to have more outputs +- [x] Improve the standard output of HAC - [x] Each time a file is added => Print it in the output with the full path (or relative path to root) +- [x] Find why a $ (newline) is added when installing dependencies from tool_dependencies.xml +- [x] Sort the order of the HTML directories/files - [x] Add a script for Linux.x86_64 to download and and chmod +x the dependencies for local testing => util/install_linux_binaries.py - [x] Add sorting BED if not sorted (Use the output of bedToBigBed) - [x] Add a script to install the huba datatype