Mercurial > repos > yating-l > jbrowsearchivecreator
comparison jbrowse_hub.py @ 0:8d1cf7ce65cd draft
planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit d583ac16a6c6942730ea536eb59cc37941816030-dirty
| author | yating-l |
|---|---|
| date | Thu, 18 May 2017 17:25:33 -0400 |
| parents | |
| children | 671231da45f9 |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:8d1cf7ce65cd |
|---|---|
| 1 #!/usr/bin/env python | |
| 2 | |
| 3 import sys | |
| 4 import argparse | |
| 5 import json | |
| 6 import utils | |
| 7 import trackObject | |
| 8 import TrackHub | |
| 9 | |
| 10 | |
| 11 | |
| 12 def main(argv): | |
| 13 parser = argparse.ArgumentParser(description='Create a hub to display in jbrowse.') | |
| 14 | |
| 15 # Reference genome mandatory | |
| 16 parser.add_argument('-f', '--fasta', help='Fasta file of the reference genome (Required)') | |
| 17 | |
| 18 # Genome name | |
| 19 parser.add_argument('-g', '--genome_name', help='Name of reference genome') | |
| 20 | |
| 21 # Output folder | |
| 22 parser.add_argument('-o', '--out', help='output html') | |
| 23 | |
| 24 # Output folder | |
| 25 parser.add_argument('-e', '--extra_files_path', help='Directory of JBrowse Hub folder') | |
| 26 | |
| 27 #Tool Directory | |
| 28 parser.add_argument('-d', '--tool_directory', help='The directory of JBrowse file convertion scripts and UCSC tools') | |
| 29 | |
| 30 #GFF3 | |
| 31 parser.add_argument('--gff3', action='append', help='GFF3 format') | |
| 32 | |
| 33 # GFF3 structure: gene->transcription->CDS | |
| 34 parser.add_argument('--gff3_transcript', action='append', help='GFF3 format for gene prediction, structure: gene->transcription->CDS') | |
| 35 | |
| 36 # GFF3 structure: gene->mRNA->CDS | |
| 37 parser.add_argument('--gff3_mrna', action='append', help='GFF3 format for gene prediction, structure: gene->mRNA->CDS') | |
| 38 | |
| 39 # generic BED | |
| 40 parser.add_argument('--bed', action='append', help='BED format') | |
| 41 | |
| 42 # trfBig simple repeats (BED 4+12) | |
| 43 parser.add_argument('--bedSimpleRepeats', action='append', help='BED 4+12 format, using simpleRepeats.as') | |
| 44 | |
| 45 # regtools (BED 12+1) | |
| 46 parser.add_argument('--bedSpliceJunctions', action='append', help='BED 12+1 format, using spliceJunctions.as') | |
| 47 | |
| 48 # tblastn alignment (blastxml) | |
| 49 parser.add_argument('--blastxml', action='append', help='blastxml format from tblastn') | |
| 50 | |
| 51 # BAM format | |
| 52 parser.add_argument('--bam', action='append', help='BAM format from HISAT') | |
| 53 | |
| 54 # BIGWIG format | |
| 55 parser.add_argument('--bigwig', action='append', help='BIGWIG format to show rnaseq coverage') | |
| 56 | |
| 57 # GTF format | |
| 58 parser.add_argument('--gtf', action='append', help='GTF format from StringTie') | |
| 59 | |
| 60 # Metadata json format | |
| 61 parser.add_argument('-j', '--data_json', help='Json containing the metadata of the inputs') | |
| 62 | |
| 63 #JBrowse host | |
| 64 parser.add_argument('--jbrowse_host', help="JBrowse Host") | |
| 65 | |
| 66 args = parser.parse_args() | |
| 67 all_datatype_dictionary = dict() | |
| 68 | |
| 69 | |
| 70 if not args.fasta: | |
| 71 parser.print_help() | |
| 72 raise RuntimeError("No reference genome\n") | |
| 73 reference = args.fasta | |
| 74 genome = 'unknown' | |
| 75 out_path = 'unknown.html' | |
| 76 extra_files_path = '.' | |
| 77 tool_directory = '.' | |
| 78 jbrowse_host = '' | |
| 79 if args.jbrowse_host: | |
| 80 jbrowse_host = args.jbrowse_host | |
| 81 if args.genome_name: | |
| 82 genome = args.genome_name | |
| 83 if args.out: | |
| 84 out_path = args.out | |
| 85 if args.extra_files_path: | |
| 86 extra_files_path = args.extra_files_path | |
| 87 | |
| 88 #tool_directory not work for Galaxy tool, all tools need to exist in the current PATH, deal with it with tool dependencies | |
| 89 if args.tool_directory: | |
| 90 tool_directory = args.tool_directory | |
| 91 | |
| 92 #Calculate chromsome sizes using genome reference and uscs tools | |
| 93 chrom_size = utils.getChromSizes(reference, tool_directory) | |
| 94 | |
| 95 #get metadata from json file | |
| 96 json_inputs_data = args.data_json | |
| 97 if json_inputs_data: | |
| 98 inputs_data = json.loads(json_inputs_data) | |
| 99 else: | |
| 100 inputs_data = {} | |
| 101 | |
| 102 #print inputs_data | |
| 103 | |
| 104 #Initate trackObject | |
| 105 all_tracks = trackObject.trackObject(chrom_size.name, genome, extra_files_path) | |
| 106 | |
| 107 array_inputs_bam = args.bam | |
| 108 array_inputs_bed = args.bed | |
| 109 array_inputs_bed_simple_repeats = args.bedSimpleRepeats | |
| 110 array_inputs_bed_splice_junctions = args.bedSpliceJunctions | |
| 111 array_inputs_bigwig = args.bigwig | |
| 112 array_inputs_gff3 = args.gff3 | |
| 113 array_inputs_gff3_transcript = args.gff3_transcript | |
| 114 array_inputs_gff3_mrna = args.gff3_mrna | |
| 115 array_inputs_gtf = args.gtf | |
| 116 array_inputs_blastxml = args.blastxml | |
| 117 | |
| 118 if array_inputs_bam: | |
| 119 all_datatype_dictionary['bam'] = array_inputs_bam | |
| 120 if array_inputs_bed: | |
| 121 all_datatype_dictionary['bed'] = array_inputs_bed | |
| 122 if array_inputs_bed_simple_repeats: | |
| 123 all_datatype_dictionary['bedSimpleRepeats'] = array_inputs_bed_simple_repeats | |
| 124 if array_inputs_bed_splice_junctions: | |
| 125 all_datatype_dictionary['bedSpliceJunctions'] = array_inputs_bed_splice_junctions | |
| 126 if array_inputs_bigwig: | |
| 127 all_datatype_dictionary['bigwig'] = array_inputs_bigwig | |
| 128 if array_inputs_gff3: | |
| 129 all_datatype_dictionary['gff3'] = array_inputs_gff3 | |
| 130 if array_inputs_gff3_transcript: | |
| 131 all_datatype_dictionary['gff3_transcript'] = array_inputs_gff3_transcript | |
| 132 if array_inputs_gff3_mrna: | |
| 133 all_datatype_dictionary['gff3_mrna'] = array_inputs_gff3_mrna | |
| 134 if array_inputs_gtf: | |
| 135 all_datatype_dictionary['gtf'] = array_inputs_gtf | |
| 136 if array_inputs_blastxml: | |
| 137 all_datatype_dictionary['blastxml'] = array_inputs_blastxml | |
| 138 | |
| 139 print "input tracks: \n", all_datatype_dictionary | |
| 140 | |
| 141 for datatype, inputfiles in all_datatype_dictionary.items(): | |
| 142 try: | |
| 143 if not inputfiles: | |
| 144 raise ValueError('empty input, must provide track files!\n') | |
| 145 except IOError: | |
| 146 print 'Cannot open', datatype | |
| 147 else: | |
| 148 for f in inputfiles: | |
| 149 #metadata = {} | |
| 150 #print f | |
| 151 #if f in inputs_data.keys(): | |
| 152 # metadata = inputs_data[f] | |
| 153 #print metadata | |
| 154 #Convert tracks into gff3 format | |
| 155 all_tracks.addToRaw(f, datatype) | |
| 156 | |
| 157 jbrowseHub = TrackHub.TrackHub(all_tracks, reference, out_path, tool_directory, genome, extra_files_path, inputs_data, jbrowse_host) | |
| 158 jbrowseHub.createHub() | |
| 159 | |
| 160 """ | |
| 161 def extractMetadata(array_inputs, inputs_data): | |
| 162 metadata_dict = {} | |
| 163 for input_false_path in array_inputs: | |
| 164 for key, data_value in inputs_data.items(): | |
| 165 if key == input_false_path: | |
| 166 metadata_dict[input_false_path] | |
| 167 """ | |
| 168 | |
| 169 if __name__ == "__main__": | |
| 170 main(sys.argv) | |
| 171 |
