# HG changeset patch # User yating-l # Date 1490198852 14400 # Node ID 15b4a74722d1b356e3a2573203ea4a0da177a614 # Parent 37422f705e9b2a3237403d95e6e7c514bf19f10e planemo upload for repository https://github.com/Yating-L/jbrowse_hub commit 73ede51a133dc8914812195a1b4cd512bfb8f23d diff -r 37422f705e9b -r 15b4a74722d1 TrackHub.py --- a/TrackHub.py Mon Mar 20 13:15:34 2017 -0400 +++ b/TrackHub.py Wed Mar 22 12:07:32 2017 -0400 @@ -45,7 +45,7 @@ print "Cannot prepare reference error({0}): {1}".format(e.errno, e.strerror) #TODO: hard coded the bam and bigwig tracks. Need to allow users to customize the settings def addTrack(self, track): - print "false_path" , track['false_path'] + #print "false_path" , track['false_path'] if track['false_path'] in self.metaData.keys(): metadata = self.metaData[track['false_path']] else: @@ -58,7 +58,7 @@ self.BigWig(track, metadata) else: gff3_file = os.path.join(self.raw, track['fileName']) - if track['dataType'] == 'bedSpliceJunctions' or track['dataType'] == 'gtf': + if track['dataType'] == 'bedSpliceJunctions' or track['dataType'] == 'gtf' or track['dataType'] == 'blastxml': p = subprocess.Popen(['flatfile-to-json.pl', '--gff', gff3_file, '--trackType', metadata['type'], '--trackLabel', metadata['label'], '--Config', '{"glyph": "JBrowse/View/FeatureGlyph/Segments", "category" : "%s"}' % metadata['category'], '--clientConfig', '{"color" : "%s"}' % metadata['color'], '--out', self.json]) elif track['dataType'] == 'gff3_transcript': p = subprocess.Popen(['flatfile-to-json.pl', '--gff', gff3_file, '--trackType', metadata['type'], '--trackLabel', metadata['label'], '--Config', '{"transcriptType": "transcript", "category" : "%s"}' % metadata['category'], '--clientConfig', '{"color" : "%s"}' % metadata['color'], '--out', self.json]) @@ -128,7 +128,7 @@ #If the metadata is not set, use the default value def SetMetadata(self, track, metadata): - print metadata + #print metadata #track.update(metadata) if 'label' not in metadata.keys() or metadata['label'] == '': metadata['label'] = track['fileName'] diff -r 37422f705e9b -r 15b4a74722d1 bedToGff3.pyc Binary file bedToGff3.pyc has changed diff -r 37422f705e9b -r 15b4a74722d1 blastxmlToGff3.py --- a/blastxmlToGff3.py Mon Mar 20 13:15:34 2017 -0400 +++ b/blastxmlToGff3.py Wed Mar 22 12:07:32 2017 -0400 @@ -45,35 +45,54 @@ return ' '.join(cigar) def gff3_writer(blast_records, gff3_file): - gff3 = open(gff3_file, 'w') + gff3 = open(gff3_file, 'a') gff3.write("##gff-version 3\n") seq_regions = dict() for blast_record in blast_records: query_name = blast_record.query.split(" ")[0] source = blast_record.application + method = blast_record.matrix for alignment in blast_record.alignments: + group = { + "parent_field" : OrderedDict(), + "parent_attribute" : OrderedDict(), + "alignments" : [] + } title = alignment.title.split(" ") - seqid = title[len(title) - 1] + contig_name = title[len(title) - 1] length = alignment.length - feature_type = 'match' + group['parent_field']['seqid'] = contig_name + group['parent_field']['source'] = source + group['parent_field']['type'] = 'match' + group['parent_attribute']['ID'] = contig_name + '_' + query_name + group['parent_attribute']['method'] = method + group['parent_attribute']['length'] = length + if contig_name not in seq_regions: + gff3.write("##sequence-region " + contig_name + ' 1 ' + str(length) + '\n') + seq_regions[contig_name] = length + match_num = 0 + coords = [length, 0] for hsp in alignment.hsps: + hsp_align = {} field = OrderedDict() attribute = OrderedDict() ref = hsp.sbjct query = hsp.query - field['seqid'] = seqid + field['seqid'] = contig_name field['source'] = source - field['type'] = feature_type - if seqid not in seq_regions: - gff3.write("##sequence-region " + field['seqid'] + ' 1 ' + str(length) + '\n') - seq_regions[seqid] = length + field['type'] = 'match_part' + field['start'] = hsp.sbjct_start + if field['start'] < coords[0]: + coords[0] = field['start'] ref_length = len(ref.replace('-', '')) # if run tblastn, the actual length of reference should be multiplied by 3 if source.lower() == "tblastn": ref_length *= 3 field['end'] = field['start'] + ref_length - 1 - field['score'] = hsp.expect + if field['end'] > coords[1]: + coords[1] = field['end'] + field['score'] = hsp.score #decide if the alignment in the same strand or reverse strand #reading frame # (+, +), (0, 0), (-, -) => + @@ -95,21 +114,40 @@ if source.lower() == "blastx": target_len *= 3 target_end = target_start + target_len -1 - attribute['ID'] = field['seqid'] + '_' + str(field['start']) + '_' + str(field['end']) + '_' + query_name + '_' + str(target_start) + '_' + str(target_end) + attribute['ID'] = group['parent_attribute']['ID'] + '_match_' + str(match_num) + attribute['Parent'] = group['parent_attribute']['ID'] attribute['Target'] = query_name + " " + str(target_start) + " " + str(target_end) attribute['Gap'] = align2cigar(query, ref) - #store the query sequence in the file in order to display alignment with BlastAlignment plugin + #store the query sequence and match string in the file in order to display alignment with BlastAlignment plugin + attribute['subject'] = hsp.sbjct attribute['query'] = hsp.query + attribute['match'] = hsp.match + attribute['gaps'] = attribute['match'].count(' ') + similar = attribute['match'].count('+') + attribute['identities'] = len(attribute['match']) - similar - attribute['gaps'] + attribute['positives'] = attribute['identities'] + similar + attribute['expect'] = hsp.expect # show reading frame attribute only if the frame is not (0, 0) - if hsp.frame[0] != 0 or hsp.frame[1] != 0: - attribute['reading_frame'] = str(hsp.frame[0]) + ", " + str(hsp.frame[1]) - utils.write_features(field, attribute, gff3) + attribute['frame'] = hsp.frame[1] + match_num += 1 + hsp_align['field'] = field + hsp_align['attribute'] = attribute + group['alignments'].append(hsp_align) + group['parent_field']['start'] = coords[0] + group['parent_field']['end'] = coords[1] + group['parent_field']['score'] = group['parent_field']['strand'] = group['parent_field']['phase'] = '.' + group['parent_attribute']['match_num'] = match_num + utils.write_features(group['parent_field'], group['parent_attribute'], gff3) + for align in group['alignments']: + utils.write_features(align['field'], align['attribute'], gff3) + gff3.close() + def blastxml2gff3(xml_file, gff3_file): result_handle = open(xml_file) blast_records = NCBIXML.parse(result_handle) gff3_writer(blast_records, gff3_file) - + if __name__ == "__main__": - blastxml2gff3("../tblastn_dmel.blastxml", "gff3.txt") + blastxml2gff3("../dbia3/raw/tblastn_dmel-hits-translation-r6.11.fa_vs_nucleotide_BLAST_database_from_data_3.blastxml", "gff3.txt") diff -r 37422f705e9b -r 15b4a74722d1 blastxmlToGff3.pyc Binary file blastxmlToGff3.pyc has changed diff -r 37422f705e9b -r 15b4a74722d1 utils.pyc Binary file utils.pyc has changed