Mercurial > repos > eric-rasche > apollo
view gff3_cleaner.py @ 1:f7d57e56f322 draft
planemo upload for repository https://github.com/TAMU-CPT/galaxy-webapollo commit 0f3c6e906c4d32843f451990b55bfa2954235a42
author | eric-rasche |
---|---|
date | Tue, 03 May 2016 13:46:14 -0400 |
parents | 6002cc0df04e |
children |
line wrap: on
line source
#!/usr/bin/env python import sys import argparse from gff3 import feature_lambda, feature_test_type from BCBio import GFF import logging logging.basicConfig(level=logging.WARN) log = logging.getLogger(name='pav') def coding_genes(feature_list): for x in feature_lambda(feature_list, feature_test_type, {'type': 'gene'}, subfeatures=True): if len(list(feature_lambda(x.sub_features, feature_test_type, {'type': 'CDS'}, subfeatures=False))) > 0: yield x def genes(feature_list, feature_type='gene'): for x in feature_lambda(feature_list, feature_test_type, {'type': feature_type}, subfeatures=True): yield x def fix_apollo_issues(annotations, user_email): for rec in GFF.parse(annotations): for feat in rec.features: if feat.type != 'gene': continue for sf in feat.sub_features: if sf.type != 'mRNA': continue for ssf in sf.sub_features: if ssf.type != 'exon': continue if len(ssf) > 10: continue ssf.type = 'Shine_Dalgarno_sequence' sf.sub_features = [x for x in sf.sub_features if x.type not in ('non_canonical_five_prime_splice_site', 'non_canonical_three_prime_splice_site')] yield rec if __name__ == '__main__': parser = argparse.ArgumentParser(description='rebase gff3 features against parent locations', epilog="") parser.add_argument('annotations', type=file, help='Parent GFF3 annotations') # parser.add_argument('genome', type=file, help='Genome Sequence') parser.add_argument('--user_email') args = parser.parse_args() for rec in fix_apollo_issues(**vars(args)): rec.annotations = {} GFF.write([rec], sys.stdout)