Mercurial > repos > yating-l > jbrowse_hub
comparison jbrowse_hub.py @ 31:d8049deb0c97 draft
planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit faeedda55e23f1197bc454d3db2d52af29d786e8-dirty
author | yating-l |
---|---|
date | Fri, 17 Mar 2017 12:28:32 -0400 |
parents | 5580dbf2a31c |
children | 39a214ac35a7 |
comparison
equal
deleted
inserted
replaced
30:daf6a1122200 | 31:d8049deb0c97 |
---|---|
1 #!/usr/bin/env python | 1 #!/usr/bin/env python |
2 | 2 |
3 import os | |
4 import sys | 3 import sys |
5 import argparse | 4 import argparse |
6 import subprocess | 5 import json |
7 from bedToGff3 import bedToGff3 | |
8 import blastxmlToGff3 | |
9 import utils | 6 import utils |
10 import tempfile | |
11 import trackObject | 7 import trackObject |
12 import TrackHub | 8 import TrackHub |
13 import shutil | 9 |
10 | |
14 | 11 |
15 def main(argv): | 12 def main(argv): |
16 parser = argparse.ArgumentParser(description='Create a hub to display in jbrowse.') | 13 parser = argparse.ArgumentParser(description='Create a hub to display in jbrowse.') |
17 | 14 |
18 # Reference genome mandatory | 15 # Reference genome mandatory |
19 parser.add_argument('-f', '--fasta', help='Fasta file of the reference genome') | 16 parser.add_argument('-f', '--fasta', help='Fasta file of the reference genome (Required)') |
20 | 17 |
21 # Genome name | 18 # Genome name |
22 parser.add_argument('-g', '--genome_name', help='Name of reference genome') | 19 parser.add_argument('-g', '--genome_name', help='Name of reference genome') |
23 | 20 |
24 # Output folder | 21 # Output folder |
25 parser.add_argument('-o', '--out', help='output html') | 22 parser.add_argument('-o', '--out', help='output html') |
26 | 23 |
27 # Output folder | 24 # Output folder |
28 parser.add_argument('-e', '--extra_files_path', help="Directory of JBrowse Hub folder") | 25 parser.add_argument('-e', '--extra_files_path', help='Directory of JBrowse Hub folder') |
26 | |
27 #Tool Directory | |
28 parser.add_argument('-d', '--tool_directory', help='The directory of JBrowse file convertion scripts and UCSC tools') | |
29 | 29 |
30 # GFF3 structure: gene->transcription->CDS | 30 # GFF3 structure: gene->transcription->CDS |
31 parser.add_argument('--gff3_transcript', action='append', help='GFF3 format, structure: gene->transcription->CDS') | 31 parser.add_argument('--gff3_transcript', action='append', help='GFF3 format, structure: gene->transcription->CDS') |
32 | 32 |
33 # GFF3 structure: gene->mRNA->CDS | 33 # GFF3 structure: gene->mRNA->CDS |
49 parser.add_argument('--bigwig', action='append', help='BIGWIG format to show rnaseq coverage') | 49 parser.add_argument('--bigwig', action='append', help='BIGWIG format to show rnaseq coverage') |
50 | 50 |
51 # GTF format | 51 # GTF format |
52 parser.add_argument('--gtf', action='append', help='GTF format from StringTie') | 52 parser.add_argument('--gtf', action='append', help='GTF format from StringTie') |
53 | 53 |
54 # Metadata json format | |
55 parser.add_argument('-j', '--data_json', help='Json containing the metadata of the inputs') | |
56 | |
54 args = parser.parse_args() | 57 args = parser.parse_args() |
55 all_datatype_dictionary = dict() | 58 all_datatype_dictionary = dict() |
56 | 59 |
57 | 60 |
61 if not args.fasta: | |
62 parser.print_help() | |
63 raise RuntimeError("No reference genome\n") | |
58 reference = args.fasta | 64 reference = args.fasta |
59 genome = 'unknown' | 65 genome = 'unknown' |
60 out_path = '.' | 66 out_path = 'unknown.html' |
61 extra_files_path = '.' | 67 extra_files_path = '.' |
68 tool_directory = '.' | |
62 if args.genome_name: | 69 if args.genome_name: |
63 genome = utils.sanitize_name_path(args.genome_name) | 70 genome = utils.sanitize_name(args.genome_name) |
64 if args.out: | 71 if args.out: |
65 out_path = args.out | 72 out_path = args.out |
66 if args.extra_files_path: | 73 if args.extra_files_path: |
67 extra_files_path = utils.sanitize_name_path(args.extra_files_path) | 74 extra_files_path = utils.sanitize_name(args.extra_files_path) |
68 cwd = os.getcwd() | 75 |
69 #tool_directory not work for Galaxy tool, all tools need to exist in the current PATH, deal with it with tool dependencies | 76 #tool_directory not work for Galaxy tool, all tools need to exist in the current PATH, deal with it with tool dependencies |
70 tool_directory = os.path.join(cwd, 'JBrowse-1.12.1/bin') | 77 if args.tool_directory: |
78 tool_directory = args.tool_directory | |
79 | |
80 #Calculate chromsome sizes using genome reference and uscs tools | |
71 chrom_size = utils.getChromSizes(reference, tool_directory) | 81 chrom_size = utils.getChromSizes(reference, tool_directory) |
72 all_tracks = trackObject.trackObject(chrom_size.name, genome, extra_files_path) #store converted files in the array: all_tracks.tracks | 82 |
83 #get metadata from json file | |
84 json_inputs_data = args.data_json | |
85 if json_inputs_data: | |
86 inputs_data = json.loads(json_inputs_data) | |
87 else: | |
88 inputs_data = {} | |
89 | |
90 #Initate trackObject | |
91 all_tracks = trackObject.trackObject(chrom_size.name, genome, extra_files_path) | |
92 | |
73 array_inputs_bam = args.bam | 93 array_inputs_bam = args.bam |
74 array_inputs_bed_simple_repeats = args.bedSimpleRepeats | 94 array_inputs_bed_simple_repeats = args.bedSimpleRepeats |
75 array_inputs_bed_splice_junctions = args.bedSpliceJunctions | 95 array_inputs_bed_splice_junctions = args.bedSpliceJunctions |
76 array_inputs_bigwig = args.bigwig | 96 array_inputs_bigwig = args.bigwig |
77 array_inputs_gff3_transcript = args.gff3_transcript | 97 array_inputs_gff3_transcript = args.gff3_transcript |
78 array_inputs_gff3_mrna = args.gff3_mrna | 98 array_inputs_gff3_mrna = args.gff3_mrna |
79 array_inputs_gtf = args.gtf | 99 array_inputs_gtf = args.gtf |
80 array_inputs_blastxml = args.blastxml | 100 array_inputs_blastxml = args.blastxml |
101 | |
81 if array_inputs_bam: | 102 if array_inputs_bam: |
82 all_datatype_dictionary['bam'] = array_inputs_bam | 103 all_datatype_dictionary['bam'] = array_inputs_bam |
83 if array_inputs_bed_simple_repeats: | 104 if array_inputs_bed_simple_repeats: |
84 all_datatype_dictionary['bedSimpleRepeats'] = array_inputs_bed_simple_repeats | 105 all_datatype_dictionary['bedSimpleRepeats'] = array_inputs_bed_simple_repeats |
85 if array_inputs_bed_splice_junctions: | 106 if array_inputs_bed_splice_junctions: |
93 if array_inputs_gtf: | 114 if array_inputs_gtf: |
94 all_datatype_dictionary['gtf'] = array_inputs_gtf | 115 all_datatype_dictionary['gtf'] = array_inputs_gtf |
95 if array_inputs_blastxml: | 116 if array_inputs_blastxml: |
96 all_datatype_dictionary['blastxml'] = array_inputs_blastxml | 117 all_datatype_dictionary['blastxml'] = array_inputs_blastxml |
97 | 118 |
98 print all_datatype_dictionary | 119 print "input tracks: \n", all_datatype_dictionary |
99 | 120 |
100 for datatype, inputfiles in all_datatype_dictionary.items(): | 121 for datatype, inputfiles in all_datatype_dictionary.items(): |
101 try: | 122 try: |
102 if not inputfiles: | 123 if not inputfiles: |
103 raise ValueError('empty input, must provide track files!\n') | 124 raise ValueError('empty input, must provide track files!\n') |
104 except IOError: | 125 except IOError: |
105 print 'Cannot open', datatype | 126 print 'Cannot open', datatype |
106 else: | 127 else: |
107 for f in inputfiles: | 128 for f in inputfiles: |
108 all_tracks.addToRaw(f, datatype) | 129 metadata = {} |
130 if f in inputs_data.keys(): | |
131 metadata = inputs_data[f] | |
132 #Convert tracks into gff3 format | |
133 all_tracks.addToRaw(f, datatype, metadata) | |
109 | 134 |
110 jbrowseHub = TrackHub.TrackHub(all_tracks, reference, out_path, tool_directory, genome, extra_files_path) | 135 jbrowseHub = TrackHub.TrackHub(all_tracks, reference, out_path, tool_directory, genome, extra_files_path) |
111 jbrowseHub.createHub() | 136 jbrowseHub.createHub() |
112 | 137 |
138 """ | |
139 def extractMetadata(array_inputs, inputs_data): | |
140 metadata_dict = {} | |
141 for input_false_path in array_inputs: | |
142 for key, data_value in inputs_data.items(): | |
143 if key == input_false_path: | |
144 metadata_dict[input_false_path] | |
145 """ | |
113 | 146 |
114 if __name__ == "__main__": | 147 if __name__ == "__main__": |
115 main(sys.argv) | 148 main(sys.argv) |
116 | 149 |