Mercurial > repos > yating-l > hubarchivecreator
changeset 9:d5781fe7b782 draft
planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit e42ecb807fb34a8e4ddcf96944dd74d24c695352-dirty
author | yating-l |
---|---|
date | Wed, 28 Dec 2016 17:33:08 -0500 |
parents | be8fd2f24c60 |
children | cf1abb96a43d |
files | BedSpliceJunctions.py bigPsl.py hubArchiveCreator.py hubArchiveCreator.xml spliceJunctions.as test-data/bed_splice_junctions/__main__.log test-data/bed_splice_junctions/inputs/Splice_Junctions_by_regtools.bed |
diffstat | 7 files changed, 246 insertions(+), 1 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BedSpliceJunctions.py Wed Dec 28 17:33:08 2016 -0500 @@ -0,0 +1,70 @@ +#!/usr/bin/python + +import os +import tempfile + +from Datatype import Datatype +from Track import Track +from TrackDb import TrackDb +from util import subtools + + +class BedSpliceJunctions( Datatype ): + def __init__(self, input_bed_splice_junctions_false_path, data_bed_splice_junctions): + + super(BedSpliceJunctions, self).__init__() + + self.input_bed_splice_junctions_false_path = input_bed_splice_junctions_false_path + self.name_bed_splice_junctions = data_bed_splice_junctions["name"] + self.priority = data_bed_splice_junctions["order_index"] + self.track_color = data_bed_splice_junctions["track_color"] + # TODO: Think about how to avoid repetition of the group_name everywhere + self.group_name = data_bed_splice_junctions["group_name"] + + sortedBedFile = tempfile.NamedTemporaryFile(suffix=".sortedBed") + + # Sort processing + subtools.sort(self.input_bed_splice_junctions_false_path, sortedBedFile.name) + + # bedToBigBed processing + # TODO: Change the name of the bb, to tool + genome + .bb + trackName = "".join( ( self.name_bed_splice_junctions, '.bb' ) ) + myBigBedFilePath = os.path.join(self.myTrackFolderPath, trackName) + + auto_sql_option = os.path.join(self.tool_directory, 'spliceJunctions.as') + + with open(myBigBedFilePath, 'w') as bigBedFile: + subtools.bedToBigBed(sortedBedFile.name, + self.chromSizesFile.name, + bigBedFile.name, + typeOption='bed12+1', + autoSql=auto_sql_option) + + # Create the Track Object + self.createTrack(file_path=trackName, + track_name=trackName, + long_label=self.name_bed_splice_junctions, track_type='bigBed 12 +', visibility='dense', + priority=self.priority, + track_file=myBigBedFilePath, + track_color=self.track_color, + group_name=self.group_name) + + # dataURL = "tracks/%s" % trackName + # + # trackDb = TrackDb( + # trackName=trackName, + # longLabel=self.name_bed_simple_repeats, + # shortLabel=self.getShortName( self.name_bed_simple_repeats ), + # trackDataURL=dataURL, + # trackType='bigBed 4 +', + # visibility='dense', + # priority=self.priority, + # ) + # + # self.track = Track( + # trackFile=myBigBedFilePath, + # trackDb=trackDb, + # ) + + print("- Bed splice junctions %s created" % self.name_bed_splice_junctions) + #print("- %s created in %s" % (trackName, myBigBedFilePath))
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bigPsl.py Wed Dec 28 17:33:08 2016 -0500 @@ -0,0 +1,71 @@ +#!/usr/bin/python + +import os +import tempfile + +from Datatype import Datatype +from Track import Track +from TrackDb import TrackDb +from util import subtools + + +class bigPsl( Datatype ): + def __init__(self, input_bigpsl_false_path, data_bigpsl): + + super(bigPsl, self).__init__() + + self.input_bigpsl_false_path = input_bigpsl_false_path + self.name_bigpsl = data_bigpsl["name"] + self.priority = data_bigpsl["order_index"] + self.track_color = data_bigpsl["track_color"] + # TODO: Think about how to avoid repetition of the group_name everywhere + self.group_name = data_bigpsl["group_name"] + + #sortedBedFile = tempfile.NamedTemporaryFile(suffix=".sortedBed") + + # Sort processing + #subtools.sort(self.input_bigpsl_false_path, sortedBedFile.name) + + # bedToBigBed processing + # TODO: Change the name of the bb, to tool + genome + .bb + trackName = "".join( ( self.name_bigpsl, '.bb' ) ) + myBigBedFilePath = os.path.join(self.myTrackFolderPath, trackName) + + auto_sql_option = os.path.join(self.tool_directory, 'bigPsl.as') + + with open(myBigBedFilePath, 'w') as bigBedFile: + subtools.bedToBigBed(self.input_bigpsl_false_path, + self.chromSizesFile.name, + bigBedFile.name, + typeOption='bed12+12', + tab='True', + autoSql=auto_sql_option) + + # Create the Track Object + self.createTrack(file_path=trackName, + track_name=trackName, + long_label=self.name_bigpsl, track_type='bigBed 12 +', visibility='dense', + priority=self.priority, + track_file=myBigBedFilePath, + track_color=self.track_color, + group_name=self.group_name) + + # dataURL = "tracks/%s" % trackName + # + # trackDb = TrackDb( + # trackName=trackName, + # longLabel=self.name_bed_simple_repeats, + # shortLabel=self.getShortName( self.name_bed_simple_repeats ), + # trackDataURL=dataURL, + # trackType='bigBed 4 +', + # visibility='dense', + # priority=self.priority, + # ) + # + # self.track = Track( + # trackFile=myBigBedFilePath, + # trackDb=trackDb, + # ) + + print("- bigPsl %s created" % self.name_bigpsl) + #print("- %s created in %s" % (trackName, myBigBedFilePath))
--- a/hubArchiveCreator.py Thu Dec 22 15:32:27 2016 -0500 +++ b/hubArchiveCreator.py Wed Dec 28 17:33:08 2016 -0500 @@ -18,6 +18,7 @@ # Internal dependencies from Bam import Bam from BedSimpleRepeats import BedSimpleRepeats +from BedSpliceJunctions import BedSpliceJunctions from Bed import Bed from BigWig import BigWig from util.Fasta import Fasta @@ -26,6 +27,7 @@ from Gtf import Gtf from Psl import Psl from TrackHub import TrackHub +from bigPsl import bigPsl # TODO: Verify each subprocessed dependency is accessible [gff3ToGenePred, genePredToBed, twoBitInfo, faToTwoBit, bedToBigBed, sort @@ -46,9 +48,15 @@ # Bed4+12 (TrfBig) parser.add_argument('--bedSimpleRepeats', action='append', help='Bed4+12 format, using simpleRepeats.as') + # Bed12+1 (regtools) + parser.add_argument('--bedSpliceJunctions', action='append', help='Bed12+1 format, using spliceJunctions.as') + # Generic Bed (Blastx transformed to bed) parser.add_argument('--bed', action='append', help='Bed generic format') + # Bed12+12 (tblastn) + parser.add_argument('--bigpsl', action='append', help='bigPsl format') + # BigWig Management parser.add_argument('--bigwig', action='append', help='BigWig format') @@ -113,10 +121,12 @@ array_inputs_bam = args.bam array_inputs_bed_generic = args.bed array_inputs_bed_simple_repeats = args.bedSimpleRepeats + array_inputs_bed_splice_junctions = args.bedSpliceJunctions array_inputs_bigwig = args.bigwig array_inputs_gff3 = args.gff3 array_inputs_gtf = args.gtf array_inputs_psl = args.psl + array_inputs_bigpsl = args.bigpsl outputFile = args.output @@ -139,9 +149,11 @@ (array_inputs_bed_generic, Bed), (array_inputs_bigwig, BigWig), (array_inputs_bed_simple_repeats, BedSimpleRepeats), + (array_inputs_bed_splice_junctions, BedSpliceJunctions), (array_inputs_gff3, Gff3), (array_inputs_gtf, Gtf), - (array_inputs_psl, Psl)]: + (array_inputs_psl, Psl), + (array_inputs_bigpsl, bigPsl)]: if inputs: all_datatype_dictionary.update(create_ordered_datatype_objects(datatype_class, inputs, inputs_data))
--- a/hubArchiveCreator.xml Thu Dec 22 15:32:27 2016 -0500 +++ b/hubArchiveCreator.xml Wed Dec 28 17:33:08 2016 -0500 @@ -111,6 +111,16 @@ #silent $prepare_json($f.formatChoice.bedChoice.BED_simple_repeats, $index_track_final, extra_data_dict) #end if + #if $f.formatChoice.bedChoice.bed_select == "bed_splice_junctions_option" + --bedSpliceJunctions $f.formatChoice.bedChoice.BED_splice_junctions + #silent $prepare_json($f.formatChoice.bedChoice.BED_splice_junctions, $index_track_final, + extra_data_dict) + #end if + #if $f.formatChoice.bedChoice.bed_select == "bigpsl_option" + --bigpsl $f.formatChoice.bedChoice.bigPsl + #silent $prepare_json($f.formatChoice.bedChoice.bigPsl, $index_track_final, + extra_data_dict) + #end if #end if #if $f.formatChoice.format_select == "psl" --psl $f.formatChoice.PSL @@ -206,6 +216,8 @@ <param name="bed_select" type="select" label="Bed Choice"> <option value="bed_generic" selected="true">BED Generic (bed3+)</option> <option value="bed_simple_repeats_option">BED Simple repeat (bed4+12 / simpleRepeat.as)</option> + <option value="bed_splice_junctions_option">BED Splice junctions (bed12+1 / spliceJunctions.as)</option> + <option value="bigpsl_option">bigPsl (bed12+12 / bigPsl.as)</option> </param> <when value="bed_generic"> <param @@ -224,6 +236,22 @@ label="Bed Simple Repeats (Bed4+12) File" /> </when> + <when value="bed_splice_junctions_option"> + <param + format="bed" + name="BED_splice_junctions" + type="data" + label="Bed Splice Junctions (Bed12+1) File" + /> + </when> + <when value="bigpsl_option"> + <param + format="bed" + name="bigPsl" + type="data" + label="bigPsl (Bed12+12) File" + /> + </when> </conditional> <param name="track_color" type="color" label="Track color" value="#000000"> <sanitizer> @@ -442,6 +470,7 @@ /> </output> </test> + <!-- Test with Psl --> <test>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/spliceJunctions.as Wed Dec 28 17:33:08 2016 -0500 @@ -0,0 +1,17 @@ +table spliceJunctions +"Predicted splice junctions" + ( + string chrom; "Reference sequence chromosome or scaffold" + uint chromStart; "Start position in chromosome" + uint chromEnd; "End position in chromosome" + string name; "Name of item" + uint score; "Score from 0-1000" + char[1] strand; "+ or -" + uint thickStart; "Start of where display should be thick (start codon)" + uint thickEnd; "End of where display should be thick (stop codon)" + uint reserved; "Used as itemRgb as of 2004-11-22" + int blockCount; "Number of blocks" + int[blockCount] blockSizes; "Comma separated list of block sizes" + int[blockCount] chromStarts; "Start positions relative to chromStart" + uint junctionScore; "Number of reads supporting the splice junction" + )
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/bed_splice_junctions/__main__.log Wed Dec 28 17:33:08 2016 -0500 @@ -0,0 +1,6 @@ +DEBUG:root:#### Welcome in HubArchiveCreator Debug Mode #### + +DEBUG:root:#### Welcome in HubArchiveCreator Debug Mode #### + +DEBUG:root:#### Welcome in HubArchiveCreator Debug Mode #### +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/bed_splice_junctions/inputs/Splice_Junctions_by_regtools.bed Wed Dec 28 17:33:08 2016 -0500 @@ -0,0 +1,40 @@ +contig16 1808 11364 JUNC00000001 1000 - 1808 11364 255,0,0 2 89,80 0,9476 17 +contig16 11374 11605 JUNC00000003 1000 - 11374 11605 255,0,0 2 89,88 0,143 18 +contig16 11617 11783 JUNC00000004 1000 - 11617 11783 255,0,0 2 65,25 0,141 1 +contig16 11617 11844 JUNC00000005 1000 - 11617 11844 255,0,0 2 88,86 0,141 22 +contig16 11774 11977 JUNC00000006 1000 - 11774 11977 255,0,0 2 84,56 0,147 13 +contig16 11806 11956 JUNC00000007 1000 - 11806 11956 255,0,0 2 55,35 0,115 1 +contig16 11921 12673 JUNC00000008 1000 - 11921 12673 255,0,0 2 54,73 0,679 13 +contig16 11939 17275 JUNC00000009 1000 - 11939 17275 255,0,0 2 36,54 0,5282 1 +contig16 12723 17294 JUNC00000011 1000 - 12723 17294 255,0,0 2 88,73 0,4498 11 +contig16 12782 21003 JUNC00000012 1000 - 12782 21003 255,0,0 2 29,61 0,8160 1 +contig16 12796 21322 JUNC00000013 1000 - 12796 21322 255,0,0 2 15,75 0,8451 1 +contig16 17251 17490 JUNC00000014 1000 - 17251 17490 255,0,0 2 86,85 0,154 17 +contig16 17307 21307 JUNC00000015 1000 - 17307 21307 255,0,0 2 30,60 0,3940 1 +contig16 21007 21283 JUNC00000016 1000 - 21007 21283 255,0,0 2 54,36 0,240 1 +contig16 21250 22512 JUNC00000017 1000 - 21250 22512 255,0,0 2 86,58 0,1204 8 +contig16 21316 23083 JUNC00000018 1000 - 21316 23083 255,0,0 2 20,70 0,1697 1 +contig16 22206 25356 JUNC00000019 1000 + 22206 25356 255,0,0 2 39,46 0,3104 1 +contig16 27016 27178 JUNC00000021 1000 + 27016 27178 255,0,0 2 33,70 0,92 2 +contig16 27196 34369 JUNC00000022 1000 + 27196 34369 255,0,0 2 57,32 0,7141 1 +contig16 29103 29238 JUNC00000023 1000 + 29103 29238 255,0,0 2 61,18 0,117 1 +contig16 29358 34416 JUNC00000024 1000 + 29358 34416 255,0,0 2 11,79 0,4979 1 +contig16 34568 34768 JUNC00000025 1000 + 34568 34768 255,0,0 2 51,84 0,116 3 +contig16 34573 34763 JUNC00000026 1000 + 34573 34763 255,0,0 2 46,44 0,146 1 +contig16 34782 34954 JUNC00000027 1000 + 34782 34954 255,0,0 2 53,66 0,106 4 +contig16 35110 42519 JUNC00000028 1000 + 35110 42519 255,0,0 2 64,64 0,7345 4 +contig16 42508 43097 JUNC00000030 1000 + 42508 43097 255,0,0 2 82,65 0,524 6 +contig16 43273 43505 JUNC00000031 1000 + 43273 43505 255,0,0 2 81,85 0,147 12 +contig17 1242 2504 JUNC00000032 1000 - 1242 2504 255,0,0 2 86,58 0,1204 8 +contig17 1308 3075 JUNC00000033 1000 - 1308 3075 255,0,0 2 20,70 0,1697 1 +contig17 2198 5348 JUNC00000034 1000 + 2198 5348 255,0,0 2 39,46 0,3104 1 +contig17 7008 7170 JUNC00000036 1000 + 7008 7170 255,0,0 2 33,70 0,92 2 +contig17 7188 14361 JUNC00000037 1000 + 7188 14361 255,0,0 2 57,32 0,7141 1 +contig17 9095 9230 JUNC00000038 1000 + 9095 9230 255,0,0 2 61,18 0,117 1 +contig17 9350 14408 JUNC00000039 1000 + 9350 14408 255,0,0 2 11,79 0,4979 1 +contig17 14560 14760 JUNC00000040 1000 + 14560 14760 255,0,0 2 51,84 0,116 3 +contig17 14565 14755 JUNC00000041 1000 + 14565 14755 255,0,0 2 46,44 0,146 1 +contig17 14774 14946 JUNC00000042 1000 + 14774 14946 255,0,0 2 53,66 0,106 4 +contig17 15102 22511 JUNC00000043 1000 + 15102 22511 255,0,0 2 64,64 0,7345 4 +contig17 22500 23089 JUNC00000045 1000 + 22500 23089 255,0,0 2 82,65 0,524 6 +contig17 23265 23497 JUNC00000046 1000 + 23265 23497 255,0,0 2 81,85 0,147 12