diff jbrowse_hub.py @ 31:d8049deb0c97 draft

planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit faeedda55e23f1197bc454d3db2d52af29d786e8-dirty
author yating-l
date Fri, 17 Mar 2017 12:28:32 -0400
parents 5580dbf2a31c
children 39a214ac35a7
line wrap: on
line diff
--- a/jbrowse_hub.py	Wed Mar 15 11:46:38 2017 -0400
+++ b/jbrowse_hub.py	Fri Mar 17 12:28:32 2017 -0400
@@ -1,22 +1,19 @@
 #!/usr/bin/env python
 
-import os
 import sys
 import argparse
-import subprocess
-from bedToGff3 import bedToGff3
-import blastxmlToGff3
+import json
 import utils
-import tempfile
 import trackObject
 import TrackHub
-import shutil
+
+
 
 def main(argv):
     parser = argparse.ArgumentParser(description='Create a hub to display in jbrowse.')
 
     # Reference genome mandatory
-    parser.add_argument('-f', '--fasta', help='Fasta file of the reference genome')
+    parser.add_argument('-f', '--fasta', help='Fasta file of the reference genome (Required)')
 
     # Genome name
     parser.add_argument('-g', '--genome_name', help='Name of reference genome')
@@ -25,7 +22,10 @@
     parser.add_argument('-o', '--out', help='output html')
 
     # Output folder
-    parser.add_argument('-e', '--extra_files_path', help="Directory of JBrowse Hub folder")
+    parser.add_argument('-e', '--extra_files_path', help='Directory of JBrowse Hub folder')
+
+    #Tool Directory
+    parser.add_argument('-d', '--tool_directory', help='The directory of JBrowse file convertion scripts and UCSC tools')
 
     # GFF3 structure: gene->transcription->CDS
     parser.add_argument('--gff3_transcript', action='append', help='GFF3 format, structure: gene->transcription->CDS')
@@ -51,25 +51,45 @@
     # GTF format
     parser.add_argument('--gtf', action='append', help='GTF format from StringTie')
 
+    # Metadata json format
+    parser.add_argument('-j', '--data_json', help='Json containing the metadata of the inputs')
+
     args = parser.parse_args()
     all_datatype_dictionary = dict()
     
 
+    if not args.fasta:
+        parser.print_help()
+        raise RuntimeError("No reference genome\n")
     reference = args.fasta
     genome = 'unknown'
-    out_path = '.'
+    out_path = 'unknown.html'
     extra_files_path = '.'
+    tool_directory = '.'
     if args.genome_name:
-        genome = utils.sanitize_name_path(args.genome_name)
+        genome = utils.sanitize_name(args.genome_name)
     if args.out:
         out_path = args.out
     if args.extra_files_path:
-        extra_files_path = utils.sanitize_name_path(args.extra_files_path)
-    cwd = os.getcwd()
+        extra_files_path = utils.sanitize_name(args.extra_files_path)
+
     #tool_directory not work for Galaxy tool, all tools need to exist in the current PATH, deal with it with tool dependencies
-    tool_directory = os.path.join(cwd, 'JBrowse-1.12.1/bin')
+    if args.tool_directory:
+        tool_directory = args.tool_directory
+
+    #Calculate chromsome sizes using genome reference and uscs tools
     chrom_size = utils.getChromSizes(reference, tool_directory)
-    all_tracks = trackObject.trackObject(chrom_size.name, genome, extra_files_path) #store converted files in the array: all_tracks.tracks
+
+    #get metadata from json file
+    json_inputs_data = args.data_json
+    if json_inputs_data:
+        inputs_data = json.loads(json_inputs_data)
+    else:
+        inputs_data = {}
+
+    #Initate trackObject
+    all_tracks = trackObject.trackObject(chrom_size.name, genome, extra_files_path) 
+    
     array_inputs_bam = args.bam
     array_inputs_bed_simple_repeats = args.bedSimpleRepeats
     array_inputs_bed_splice_junctions = args.bedSpliceJunctions
@@ -78,6 +98,7 @@
     array_inputs_gff3_mrna = args.gff3_mrna
     array_inputs_gtf = args.gtf
     array_inputs_blastxml = args.blastxml
+
     if array_inputs_bam:
         all_datatype_dictionary['bam'] = array_inputs_bam
     if array_inputs_bed_simple_repeats:
@@ -95,7 +116,7 @@
     if array_inputs_blastxml:
         all_datatype_dictionary['blastxml'] = array_inputs_blastxml
     
-    print all_datatype_dictionary
+    print "input tracks: \n", all_datatype_dictionary
 
     for datatype, inputfiles in all_datatype_dictionary.items():
         try:
@@ -105,11 +126,23 @@
             print 'Cannot open', datatype
         else:
             for f in inputfiles:
-                all_tracks.addToRaw(f, datatype)
+                metadata = {}
+                if f in inputs_data.keys():
+                    metadata = inputs_data[f]
+                #Convert tracks into gff3 format
+                all_tracks.addToRaw(f, datatype, metadata)
 
     jbrowseHub = TrackHub.TrackHub(all_tracks, reference, out_path, tool_directory, genome, extra_files_path)
     jbrowseHub.createHub()
-         
+
+"""        
+def extractMetadata(array_inputs, inputs_data):
+    metadata_dict = {}
+    for input_false_path in array_inputs:
+        for key, data_value in inputs_data.items():
+            if key == input_false_path:
+                metadata_dict[input_false_path]
+"""
 
 if __name__ == "__main__":
     main(sys.argv)