Mercurial > repos > eduardo > annotateviz
view annotateviz.py @ 6:9a09f7072ed5 draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/annotateviz commit 92d0101d8677f60a314611d325ca51a809a545d7-dirty
author | eduardo |
---|---|
date | Tue, 27 Jun 2017 08:12:24 -0400 |
parents | 92921dfea0b5 |
children |
line wrap: on
line source
import sys import os import gffutils import argparse import json def add_matches(matches,array): for m in matches: trans = {} trans['mRNA'] = '.'.join(m.id.split('.',2)[:2]) #hack to recover mRNA from PFAM gff which doesn't include a query field trans['Contig'] = m.seqid trans['Source'] = m.source trans['Score'] = m.score trans['Prediction']=m["Name"] array.append(trans) def main(argv, wayout): if not len(argv): argv.append("-h") parser = argparse.ArgumentParser(formatter_class=argparse.RawDescriptionHelpFormatter, description=__doc__) parser.add_argument('-d','--database', help="gffutils sqlite database") parser.add_argument('-j','--json', help="annotations in json format") parser.add_argument('-p','--pred', help="predictions in json") args = parser.parse_args(argv) gffutils.constants.always_return_list = False db = gffutils.interface.FeatureDB(args.database) prediction_list = [] add_matches(db.features_of_type("protein_match"), prediction_list) add_matches(db.features_of_type("signalpep"), prediction_list) add_matches(db.features_of_type("trans_helix"), prediction_list) add_matches(db.features_of_type("PFAM"), prediction_list) fout=open(args.json, 'w') json.dump(prediction_list,fout) # create json for protein predictions array=[] for f in db.features_of_type("gene"): dict = {} dict['seqid'] = f.seqid dict['source'] = f.source dict['featuretype'] = f.featuretype dict['start'] = f.start dict['end'] = f.end dict['score'] = f.score dict['strand'] = f.strand dict['frame'] = f.frame dict['attributes']=f.attributes.__dict__["_d"] array.append(dict) for c in db.children(f): dict = {} dict['seqid'] = c.seqid dict['source'] = c.source dict['featuretype'] = c.featuretype dict['start'] = c.start dict['end'] = c.end dict['score'] = c.score dict['strand'] = c.strand dict['frame'] = c.frame dict['attributes'] = c.attributes.__dict__["_d"] array.append(dict) fout2=open(args.pred,'w') json.dump(array,fout2) if __name__ == "__main__": main(sys.argv[1:],sys.stdout)