comparison jbrowse_hub.py @ 31:d8049deb0c97 draft

planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit faeedda55e23f1197bc454d3db2d52af29d786e8-dirty
author yating-l
date Fri, 17 Mar 2017 12:28:32 -0400
parents 5580dbf2a31c
children 39a214ac35a7
comparison
equal deleted inserted replaced
30:daf6a1122200 31:d8049deb0c97
1 #!/usr/bin/env python 1 #!/usr/bin/env python
2 2
3 import os
4 import sys 3 import sys
5 import argparse 4 import argparse
6 import subprocess 5 import json
7 from bedToGff3 import bedToGff3
8 import blastxmlToGff3
9 import utils 6 import utils
10 import tempfile
11 import trackObject 7 import trackObject
12 import TrackHub 8 import TrackHub
13 import shutil 9
10
14 11
15 def main(argv): 12 def main(argv):
16 parser = argparse.ArgumentParser(description='Create a hub to display in jbrowse.') 13 parser = argparse.ArgumentParser(description='Create a hub to display in jbrowse.')
17 14
18 # Reference genome mandatory 15 # Reference genome mandatory
19 parser.add_argument('-f', '--fasta', help='Fasta file of the reference genome') 16 parser.add_argument('-f', '--fasta', help='Fasta file of the reference genome (Required)')
20 17
21 # Genome name 18 # Genome name
22 parser.add_argument('-g', '--genome_name', help='Name of reference genome') 19 parser.add_argument('-g', '--genome_name', help='Name of reference genome')
23 20
24 # Output folder 21 # Output folder
25 parser.add_argument('-o', '--out', help='output html') 22 parser.add_argument('-o', '--out', help='output html')
26 23
27 # Output folder 24 # Output folder
28 parser.add_argument('-e', '--extra_files_path', help="Directory of JBrowse Hub folder") 25 parser.add_argument('-e', '--extra_files_path', help='Directory of JBrowse Hub folder')
26
27 #Tool Directory
28 parser.add_argument('-d', '--tool_directory', help='The directory of JBrowse file convertion scripts and UCSC tools')
29 29
30 # GFF3 structure: gene->transcription->CDS 30 # GFF3 structure: gene->transcription->CDS
31 parser.add_argument('--gff3_transcript', action='append', help='GFF3 format, structure: gene->transcription->CDS') 31 parser.add_argument('--gff3_transcript', action='append', help='GFF3 format, structure: gene->transcription->CDS')
32 32
33 # GFF3 structure: gene->mRNA->CDS 33 # GFF3 structure: gene->mRNA->CDS
49 parser.add_argument('--bigwig', action='append', help='BIGWIG format to show rnaseq coverage') 49 parser.add_argument('--bigwig', action='append', help='BIGWIG format to show rnaseq coverage')
50 50
51 # GTF format 51 # GTF format
52 parser.add_argument('--gtf', action='append', help='GTF format from StringTie') 52 parser.add_argument('--gtf', action='append', help='GTF format from StringTie')
53 53
54 # Metadata json format
55 parser.add_argument('-j', '--data_json', help='Json containing the metadata of the inputs')
56
54 args = parser.parse_args() 57 args = parser.parse_args()
55 all_datatype_dictionary = dict() 58 all_datatype_dictionary = dict()
56 59
57 60
61 if not args.fasta:
62 parser.print_help()
63 raise RuntimeError("No reference genome\n")
58 reference = args.fasta 64 reference = args.fasta
59 genome = 'unknown' 65 genome = 'unknown'
60 out_path = '.' 66 out_path = 'unknown.html'
61 extra_files_path = '.' 67 extra_files_path = '.'
68 tool_directory = '.'
62 if args.genome_name: 69 if args.genome_name:
63 genome = utils.sanitize_name_path(args.genome_name) 70 genome = utils.sanitize_name(args.genome_name)
64 if args.out: 71 if args.out:
65 out_path = args.out 72 out_path = args.out
66 if args.extra_files_path: 73 if args.extra_files_path:
67 extra_files_path = utils.sanitize_name_path(args.extra_files_path) 74 extra_files_path = utils.sanitize_name(args.extra_files_path)
68 cwd = os.getcwd() 75
69 #tool_directory not work for Galaxy tool, all tools need to exist in the current PATH, deal with it with tool dependencies 76 #tool_directory not work for Galaxy tool, all tools need to exist in the current PATH, deal with it with tool dependencies
70 tool_directory = os.path.join(cwd, 'JBrowse-1.12.1/bin') 77 if args.tool_directory:
78 tool_directory = args.tool_directory
79
80 #Calculate chromsome sizes using genome reference and uscs tools
71 chrom_size = utils.getChromSizes(reference, tool_directory) 81 chrom_size = utils.getChromSizes(reference, tool_directory)
72 all_tracks = trackObject.trackObject(chrom_size.name, genome, extra_files_path) #store converted files in the array: all_tracks.tracks 82
83 #get metadata from json file
84 json_inputs_data = args.data_json
85 if json_inputs_data:
86 inputs_data = json.loads(json_inputs_data)
87 else:
88 inputs_data = {}
89
90 #Initate trackObject
91 all_tracks = trackObject.trackObject(chrom_size.name, genome, extra_files_path)
92
73 array_inputs_bam = args.bam 93 array_inputs_bam = args.bam
74 array_inputs_bed_simple_repeats = args.bedSimpleRepeats 94 array_inputs_bed_simple_repeats = args.bedSimpleRepeats
75 array_inputs_bed_splice_junctions = args.bedSpliceJunctions 95 array_inputs_bed_splice_junctions = args.bedSpliceJunctions
76 array_inputs_bigwig = args.bigwig 96 array_inputs_bigwig = args.bigwig
77 array_inputs_gff3_transcript = args.gff3_transcript 97 array_inputs_gff3_transcript = args.gff3_transcript
78 array_inputs_gff3_mrna = args.gff3_mrna 98 array_inputs_gff3_mrna = args.gff3_mrna
79 array_inputs_gtf = args.gtf 99 array_inputs_gtf = args.gtf
80 array_inputs_blastxml = args.blastxml 100 array_inputs_blastxml = args.blastxml
101
81 if array_inputs_bam: 102 if array_inputs_bam:
82 all_datatype_dictionary['bam'] = array_inputs_bam 103 all_datatype_dictionary['bam'] = array_inputs_bam
83 if array_inputs_bed_simple_repeats: 104 if array_inputs_bed_simple_repeats:
84 all_datatype_dictionary['bedSimpleRepeats'] = array_inputs_bed_simple_repeats 105 all_datatype_dictionary['bedSimpleRepeats'] = array_inputs_bed_simple_repeats
85 if array_inputs_bed_splice_junctions: 106 if array_inputs_bed_splice_junctions:
93 if array_inputs_gtf: 114 if array_inputs_gtf:
94 all_datatype_dictionary['gtf'] = array_inputs_gtf 115 all_datatype_dictionary['gtf'] = array_inputs_gtf
95 if array_inputs_blastxml: 116 if array_inputs_blastxml:
96 all_datatype_dictionary['blastxml'] = array_inputs_blastxml 117 all_datatype_dictionary['blastxml'] = array_inputs_blastxml
97 118
98 print all_datatype_dictionary 119 print "input tracks: \n", all_datatype_dictionary
99 120
100 for datatype, inputfiles in all_datatype_dictionary.items(): 121 for datatype, inputfiles in all_datatype_dictionary.items():
101 try: 122 try:
102 if not inputfiles: 123 if not inputfiles:
103 raise ValueError('empty input, must provide track files!\n') 124 raise ValueError('empty input, must provide track files!\n')
104 except IOError: 125 except IOError:
105 print 'Cannot open', datatype 126 print 'Cannot open', datatype
106 else: 127 else:
107 for f in inputfiles: 128 for f in inputfiles:
108 all_tracks.addToRaw(f, datatype) 129 metadata = {}
130 if f in inputs_data.keys():
131 metadata = inputs_data[f]
132 #Convert tracks into gff3 format
133 all_tracks.addToRaw(f, datatype, metadata)
109 134
110 jbrowseHub = TrackHub.TrackHub(all_tracks, reference, out_path, tool_directory, genome, extra_files_path) 135 jbrowseHub = TrackHub.TrackHub(all_tracks, reference, out_path, tool_directory, genome, extra_files_path)
111 jbrowseHub.createHub() 136 jbrowseHub.createHub()
112 137
138 """
139 def extractMetadata(array_inputs, inputs_data):
140 metadata_dict = {}
141 for input_false_path in array_inputs:
142 for key, data_value in inputs_data.items():
143 if key == input_false_path:
144 metadata_dict[input_false_path]
145 """
113 146
114 if __name__ == "__main__": 147 if __name__ == "__main__":
115 main(sys.argv) 148 main(sys.argv)
116 149