changeset 9:d5781fe7b782 draft

planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit e42ecb807fb34a8e4ddcf96944dd74d24c695352-dirty
author yating-l
date Wed, 28 Dec 2016 17:33:08 -0500
parents be8fd2f24c60
children cf1abb96a43d
files BedSpliceJunctions.py bigPsl.py hubArchiveCreator.py hubArchiveCreator.xml spliceJunctions.as test-data/bed_splice_junctions/__main__.log test-data/bed_splice_junctions/inputs/Splice_Junctions_by_regtools.bed
diffstat 7 files changed, 246 insertions(+), 1 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/BedSpliceJunctions.py	Wed Dec 28 17:33:08 2016 -0500
@@ -0,0 +1,70 @@
+#!/usr/bin/python
+
+import os
+import tempfile
+
+from Datatype import Datatype
+from Track import Track
+from TrackDb import TrackDb
+from util import subtools
+
+
+class BedSpliceJunctions( Datatype ):
+    def __init__(self, input_bed_splice_junctions_false_path, data_bed_splice_junctions):
+
+        super(BedSpliceJunctions, self).__init__()
+
+        self.input_bed_splice_junctions_false_path = input_bed_splice_junctions_false_path
+        self.name_bed_splice_junctions = data_bed_splice_junctions["name"]
+        self.priority = data_bed_splice_junctions["order_index"]
+        self.track_color = data_bed_splice_junctions["track_color"]
+        # TODO: Think about how to avoid repetition of the group_name everywhere
+        self.group_name = data_bed_splice_junctions["group_name"]
+
+        sortedBedFile = tempfile.NamedTemporaryFile(suffix=".sortedBed")
+
+        # Sort processing
+        subtools.sort(self.input_bed_splice_junctions_false_path, sortedBedFile.name)
+
+        # bedToBigBed processing
+        # TODO: Change the name of the bb, to tool + genome + .bb
+        trackName = "".join( ( self.name_bed_splice_junctions, '.bb' ) )
+        myBigBedFilePath = os.path.join(self.myTrackFolderPath, trackName)
+
+        auto_sql_option = os.path.join(self.tool_directory, 'spliceJunctions.as')
+
+        with open(myBigBedFilePath, 'w') as bigBedFile:
+            subtools.bedToBigBed(sortedBedFile.name,
+                                 self.chromSizesFile.name,
+                                 bigBedFile.name,
+                                 typeOption='bed12+1',
+                                 autoSql=auto_sql_option)
+
+        # Create the Track Object
+        self.createTrack(file_path=trackName,
+                         track_name=trackName,
+                         long_label=self.name_bed_splice_junctions, track_type='bigBed 12 +', visibility='dense',
+                         priority=self.priority,
+                         track_file=myBigBedFilePath,
+                         track_color=self.track_color,
+                         group_name=self.group_name)
+
+        # dataURL = "tracks/%s" % trackName
+        #
+        # trackDb = TrackDb(
+        #     trackName=trackName,
+        #     longLabel=self.name_bed_simple_repeats,
+        #     shortLabel=self.getShortName( self.name_bed_simple_repeats ),
+        #     trackDataURL=dataURL,
+        #     trackType='bigBed 4 +',
+        #     visibility='dense',
+        #     priority=self.priority,
+        # )
+        #
+        # self.track = Track(
+        #     trackFile=myBigBedFilePath,
+        #     trackDb=trackDb,
+        # )
+
+        print("- Bed splice junctions %s created" % self.name_bed_splice_junctions)
+        #print("- %s created in %s" % (trackName, myBigBedFilePath))
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bigPsl.py	Wed Dec 28 17:33:08 2016 -0500
@@ -0,0 +1,71 @@
+#!/usr/bin/python
+
+import os
+import tempfile
+
+from Datatype import Datatype
+from Track import Track
+from TrackDb import TrackDb
+from util import subtools
+
+
+class bigPsl( Datatype ):
+    def __init__(self, input_bigpsl_false_path, data_bigpsl):
+
+        super(bigPsl, self).__init__()
+
+        self.input_bigpsl_false_path = input_bigpsl_false_path
+        self.name_bigpsl = data_bigpsl["name"]
+        self.priority = data_bigpsl["order_index"]
+        self.track_color = data_bigpsl["track_color"]
+        # TODO: Think about how to avoid repetition of the group_name everywhere
+        self.group_name = data_bigpsl["group_name"]
+
+        #sortedBedFile = tempfile.NamedTemporaryFile(suffix=".sortedBed")
+
+        # Sort processing
+        #subtools.sort(self.input_bigpsl_false_path, sortedBedFile.name)
+
+        # bedToBigBed processing
+        # TODO: Change the name of the bb, to tool + genome + .bb
+        trackName = "".join( ( self.name_bigpsl, '.bb' ) )
+        myBigBedFilePath = os.path.join(self.myTrackFolderPath, trackName)
+
+        auto_sql_option = os.path.join(self.tool_directory, 'bigPsl.as')
+
+        with open(myBigBedFilePath, 'w') as bigBedFile:
+            subtools.bedToBigBed(self.input_bigpsl_false_path,
+                                 self.chromSizesFile.name,
+                                 bigBedFile.name,
+                                 typeOption='bed12+12',
+                                 tab='True',
+                                 autoSql=auto_sql_option)
+
+        # Create the Track Object
+        self.createTrack(file_path=trackName,
+                         track_name=trackName,
+                         long_label=self.name_bigpsl, track_type='bigBed 12 +', visibility='dense',
+                         priority=self.priority,
+                         track_file=myBigBedFilePath,
+                         track_color=self.track_color,
+                         group_name=self.group_name)
+
+        # dataURL = "tracks/%s" % trackName
+        #
+        # trackDb = TrackDb(
+        #     trackName=trackName,
+        #     longLabel=self.name_bed_simple_repeats,
+        #     shortLabel=self.getShortName( self.name_bed_simple_repeats ),
+        #     trackDataURL=dataURL,
+        #     trackType='bigBed 4 +',
+        #     visibility='dense',
+        #     priority=self.priority,
+        # )
+        #
+        # self.track = Track(
+        #     trackFile=myBigBedFilePath,
+        #     trackDb=trackDb,
+        # )
+
+        print("- bigPsl %s created" % self.name_bigpsl)
+        #print("- %s created in %s" % (trackName, myBigBedFilePath))
--- a/hubArchiveCreator.py	Thu Dec 22 15:32:27 2016 -0500
+++ b/hubArchiveCreator.py	Wed Dec 28 17:33:08 2016 -0500
@@ -18,6 +18,7 @@
 # Internal dependencies
 from Bam import Bam
 from BedSimpleRepeats import BedSimpleRepeats
+from BedSpliceJunctions import BedSpliceJunctions
 from Bed import Bed
 from BigWig import BigWig
 from util.Fasta import Fasta
@@ -26,6 +27,7 @@
 from Gtf import Gtf
 from Psl import Psl
 from TrackHub import TrackHub
+from bigPsl import bigPsl
 
 # TODO: Verify each subprocessed dependency is accessible [gff3ToGenePred, genePredToBed, twoBitInfo, faToTwoBit, bedToBigBed, sort
 
@@ -46,9 +48,15 @@
     # Bed4+12 (TrfBig)
     parser.add_argument('--bedSimpleRepeats', action='append', help='Bed4+12 format, using simpleRepeats.as')
 
+    # Bed12+1 (regtools)
+    parser.add_argument('--bedSpliceJunctions', action='append', help='Bed12+1 format, using spliceJunctions.as')
+
     # Generic Bed (Blastx transformed to bed)
     parser.add_argument('--bed', action='append', help='Bed generic format')
 
+    # Bed12+12 (tblastn)
+    parser.add_argument('--bigpsl', action='append', help='bigPsl format')
+
     # BigWig Management
     parser.add_argument('--bigwig', action='append', help='BigWig format')
 
@@ -113,10 +121,12 @@
     array_inputs_bam = args.bam
     array_inputs_bed_generic = args.bed
     array_inputs_bed_simple_repeats = args.bedSimpleRepeats
+    array_inputs_bed_splice_junctions = args.bedSpliceJunctions
     array_inputs_bigwig = args.bigwig
     array_inputs_gff3 = args.gff3
     array_inputs_gtf = args.gtf
     array_inputs_psl = args.psl
+    array_inputs_bigpsl = args.bigpsl
 
     outputFile = args.output
 
@@ -139,9 +149,11 @@
                         (array_inputs_bed_generic, Bed),
                         (array_inputs_bigwig, BigWig),
                         (array_inputs_bed_simple_repeats, BedSimpleRepeats),
+                        (array_inputs_bed_splice_junctions, BedSpliceJunctions),
                         (array_inputs_gff3, Gff3),
                         (array_inputs_gtf, Gtf),
-                        (array_inputs_psl, Psl)]:
+                        (array_inputs_psl, Psl),
+                        (array_inputs_bigpsl, bigPsl)]:
         if inputs:
             all_datatype_dictionary.update(create_ordered_datatype_objects(datatype_class, inputs, inputs_data))
 
--- a/hubArchiveCreator.xml	Thu Dec 22 15:32:27 2016 -0500
+++ b/hubArchiveCreator.xml	Wed Dec 28 17:33:08 2016 -0500
@@ -111,6 +111,16 @@
                         #silent $prepare_json($f.formatChoice.bedChoice.BED_simple_repeats, $index_track_final,
                                                 extra_data_dict)
                     #end if
+                    #if $f.formatChoice.bedChoice.bed_select == "bed_splice_junctions_option"
+                        --bedSpliceJunctions $f.formatChoice.bedChoice.BED_splice_junctions
+                        #silent $prepare_json($f.formatChoice.bedChoice.BED_splice_junctions, $index_track_final,
+                                                extra_data_dict)
+                    #end if
+                    #if $f.formatChoice.bedChoice.bed_select == "bigpsl_option"
+                        --bigpsl $f.formatChoice.bedChoice.bigPsl
+                        #silent $prepare_json($f.formatChoice.bedChoice.bigPsl, $index_track_final,
+                                                extra_data_dict)
+                    #end if
                 #end if
                 #if $f.formatChoice.format_select == "psl"
                     --psl $f.formatChoice.PSL
@@ -206,6 +216,8 @@
                             <param name="bed_select" type="select" label="Bed Choice">
                                 <option value="bed_generic" selected="true">BED Generic (bed3+)</option>
                                 <option value="bed_simple_repeats_option">BED Simple repeat (bed4+12 / simpleRepeat.as)</option>
+                                <option value="bed_splice_junctions_option">BED Splice junctions (bed12+1 / spliceJunctions.as)</option>
+                                <option value="bigpsl_option">bigPsl (bed12+12 / bigPsl.as)</option>
                             </param>
                             <when value="bed_generic">
                                 <param
@@ -224,6 +236,22 @@
                                         label="Bed Simple Repeats (Bed4+12) File"
                                 />
                             </when>
+                            <when value="bed_splice_junctions_option">
+                                <param
+                                        format="bed"
+                                        name="BED_splice_junctions"
+                                        type="data"
+                                        label="Bed Splice Junctions (Bed12+1) File"
+                                />
+                            </when>
+                            <when value="bigpsl_option">
+                                <param
+                                        format="bed"
+                                        name="bigPsl"
+                                        type="data"
+                                        label="bigPsl (Bed12+12) File"
+                                />
+                            </when>
                         </conditional>
                         <param name="track_color" type="color" label="Track color" value="#000000">
                             <sanitizer>
@@ -442,6 +470,7 @@
                 />
             </output>
         </test>
+        
 
         <!-- Test with Psl -->
         <test>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/spliceJunctions.as	Wed Dec 28 17:33:08 2016 -0500
@@ -0,0 +1,17 @@
+table spliceJunctions
+"Predicted splice junctions"
+    (
+    string chrom;      "Reference sequence chromosome or scaffold"
+    uint   chromStart; "Start position in chromosome"
+    uint   chromEnd;   "End position in chromosome"
+    string name;       "Name of item"
+    uint   score;      "Score from 0-1000"
+    char[1] strand;    "+ or -"
+    uint thickStart;   "Start of where display should be thick (start codon)"
+    uint thickEnd;     "End of where display should be thick (stop codon)"
+    uint reserved;     "Used as itemRgb as of 2004-11-22"
+    int blockCount;    "Number of blocks"
+    int[blockCount] blockSizes; "Comma separated list of block sizes"
+    int[blockCount] chromStarts; "Start positions relative to chromStart"
+    uint junctionScore;   "Number of reads supporting the splice junction"
+    )
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/bed_splice_junctions/__main__.log	Wed Dec 28 17:33:08 2016 -0500
@@ -0,0 +1,6 @@
+DEBUG:root:#### Welcome in HubArchiveCreator Debug Mode ####
+
+DEBUG:root:#### Welcome in HubArchiveCreator Debug Mode ####
+
+DEBUG:root:#### Welcome in HubArchiveCreator Debug Mode ####
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/bed_splice_junctions/inputs/Splice_Junctions_by_regtools.bed	Wed Dec 28 17:33:08 2016 -0500
@@ -0,0 +1,40 @@
+contig16	1808	11364	JUNC00000001	1000	-	1808	11364	255,0,0	2	89,80	0,9476	17
+contig16	11374	11605	JUNC00000003	1000	-	11374	11605	255,0,0	2	89,88	0,143	18
+contig16	11617	11783	JUNC00000004	1000	-	11617	11783	255,0,0	2	65,25	0,141	1
+contig16	11617	11844	JUNC00000005	1000	-	11617	11844	255,0,0	2	88,86	0,141	22
+contig16	11774	11977	JUNC00000006	1000	-	11774	11977	255,0,0	2	84,56	0,147	13
+contig16	11806	11956	JUNC00000007	1000	-	11806	11956	255,0,0	2	55,35	0,115	1
+contig16	11921	12673	JUNC00000008	1000	-	11921	12673	255,0,0	2	54,73	0,679	13
+contig16	11939	17275	JUNC00000009	1000	-	11939	17275	255,0,0	2	36,54	0,5282	1
+contig16	12723	17294	JUNC00000011	1000	-	12723	17294	255,0,0	2	88,73	0,4498	11
+contig16	12782	21003	JUNC00000012	1000	-	12782	21003	255,0,0	2	29,61	0,8160	1
+contig16	12796	21322	JUNC00000013	1000	-	12796	21322	255,0,0	2	15,75	0,8451	1
+contig16	17251	17490	JUNC00000014	1000	-	17251	17490	255,0,0	2	86,85	0,154	17
+contig16	17307	21307	JUNC00000015	1000	-	17307	21307	255,0,0	2	30,60	0,3940	1
+contig16	21007	21283	JUNC00000016	1000	-	21007	21283	255,0,0	2	54,36	0,240	1
+contig16	21250	22512	JUNC00000017	1000	-	21250	22512	255,0,0	2	86,58	0,1204	8
+contig16	21316	23083	JUNC00000018	1000	-	21316	23083	255,0,0	2	20,70	0,1697	1
+contig16	22206	25356	JUNC00000019	1000	+	22206	25356	255,0,0	2	39,46	0,3104	1
+contig16	27016	27178	JUNC00000021	1000	+	27016	27178	255,0,0	2	33,70	0,92	2
+contig16	27196	34369	JUNC00000022	1000	+	27196	34369	255,0,0	2	57,32	0,7141	1
+contig16	29103	29238	JUNC00000023	1000	+	29103	29238	255,0,0	2	61,18	0,117	1
+contig16	29358	34416	JUNC00000024	1000	+	29358	34416	255,0,0	2	11,79	0,4979	1
+contig16	34568	34768	JUNC00000025	1000	+	34568	34768	255,0,0	2	51,84	0,116	3
+contig16	34573	34763	JUNC00000026	1000	+	34573	34763	255,0,0	2	46,44	0,146	1
+contig16	34782	34954	JUNC00000027	1000	+	34782	34954	255,0,0	2	53,66	0,106	4
+contig16	35110	42519	JUNC00000028	1000	+	35110	42519	255,0,0	2	64,64	0,7345	4
+contig16	42508	43097	JUNC00000030	1000	+	42508	43097	255,0,0	2	82,65	0,524	6
+contig16	43273	43505	JUNC00000031	1000	+	43273	43505	255,0,0	2	81,85	0,147	12
+contig17	1242	2504	JUNC00000032	1000	-	1242	2504	255,0,0	2	86,58	0,1204	8
+contig17	1308	3075	JUNC00000033	1000	-	1308	3075	255,0,0	2	20,70	0,1697	1
+contig17	2198	5348	JUNC00000034	1000	+	2198	5348	255,0,0	2	39,46	0,3104	1
+contig17	7008	7170	JUNC00000036	1000	+	7008	7170	255,0,0	2	33,70	0,92	2
+contig17	7188	14361	JUNC00000037	1000	+	7188	14361	255,0,0	2	57,32	0,7141	1
+contig17	9095	9230	JUNC00000038	1000	+	9095	9230	255,0,0	2	61,18	0,117	1
+contig17	9350	14408	JUNC00000039	1000	+	9350	14408	255,0,0	2	11,79	0,4979	1
+contig17	14560	14760	JUNC00000040	1000	+	14560	14760	255,0,0	2	51,84	0,116	3
+contig17	14565	14755	JUNC00000041	1000	+	14565	14755	255,0,0	2	46,44	0,146	1
+contig17	14774	14946	JUNC00000042	1000	+	14774	14946	255,0,0	2	53,66	0,106	4
+contig17	15102	22511	JUNC00000043	1000	+	15102	22511	255,0,0	2	64,64	0,7345	4
+contig17	22500	23089	JUNC00000045	1000	+	22500	23089	255,0,0	2	82,65	0,524	6
+contig17	23265	23497	JUNC00000046	1000	+	23265	23497	255,0,0	2	81,85	0,147	12