Mercurial > repos > eric-rasche > apollo
annotate gff3_cleaner.py @ 2:c8e16c8eff98 draft
planemo upload for repository https://github.com/TAMU-CPT/galaxy-webapollo commit 6944305517dcccad3e6b37f8aba0b43ccb004540
author | eric-rasche |
---|---|
date | Tue, 03 May 2016 13:48:11 -0400 |
parents | 6002cc0df04e |
children |
rev | line source |
---|---|
0
6002cc0df04e
planemo upload for repository https://github.com/TAMU-CPT/galaxy-webapollo commit 4e5a5af7689f1713c34a6ad9a9594c205e762fdd
eric-rasche
parents:
diff
changeset
|
1 #!/usr/bin/env python |
6002cc0df04e
planemo upload for repository https://github.com/TAMU-CPT/galaxy-webapollo commit 4e5a5af7689f1713c34a6ad9a9594c205e762fdd
eric-rasche
parents:
diff
changeset
|
2 import sys |
6002cc0df04e
planemo upload for repository https://github.com/TAMU-CPT/galaxy-webapollo commit 4e5a5af7689f1713c34a6ad9a9594c205e762fdd
eric-rasche
parents:
diff
changeset
|
3 import argparse |
6002cc0df04e
planemo upload for repository https://github.com/TAMU-CPT/galaxy-webapollo commit 4e5a5af7689f1713c34a6ad9a9594c205e762fdd
eric-rasche
parents:
diff
changeset
|
4 from gff3 import feature_lambda, feature_test_type |
6002cc0df04e
planemo upload for repository https://github.com/TAMU-CPT/galaxy-webapollo commit 4e5a5af7689f1713c34a6ad9a9594c205e762fdd
eric-rasche
parents:
diff
changeset
|
5 from BCBio import GFF |
6002cc0df04e
planemo upload for repository https://github.com/TAMU-CPT/galaxy-webapollo commit 4e5a5af7689f1713c34a6ad9a9594c205e762fdd
eric-rasche
parents:
diff
changeset
|
6 import logging |
6002cc0df04e
planemo upload for repository https://github.com/TAMU-CPT/galaxy-webapollo commit 4e5a5af7689f1713c34a6ad9a9594c205e762fdd
eric-rasche
parents:
diff
changeset
|
7 logging.basicConfig(level=logging.WARN) |
6002cc0df04e
planemo upload for repository https://github.com/TAMU-CPT/galaxy-webapollo commit 4e5a5af7689f1713c34a6ad9a9594c205e762fdd
eric-rasche
parents:
diff
changeset
|
8 log = logging.getLogger(name='pav') |
6002cc0df04e
planemo upload for repository https://github.com/TAMU-CPT/galaxy-webapollo commit 4e5a5af7689f1713c34a6ad9a9594c205e762fdd
eric-rasche
parents:
diff
changeset
|
9 |
6002cc0df04e
planemo upload for repository https://github.com/TAMU-CPT/galaxy-webapollo commit 4e5a5af7689f1713c34a6ad9a9594c205e762fdd
eric-rasche
parents:
diff
changeset
|
10 def coding_genes(feature_list): |
6002cc0df04e
planemo upload for repository https://github.com/TAMU-CPT/galaxy-webapollo commit 4e5a5af7689f1713c34a6ad9a9594c205e762fdd
eric-rasche
parents:
diff
changeset
|
11 for x in feature_lambda(feature_list, feature_test_type, {'type': 'gene'}, subfeatures=True): |
6002cc0df04e
planemo upload for repository https://github.com/TAMU-CPT/galaxy-webapollo commit 4e5a5af7689f1713c34a6ad9a9594c205e762fdd
eric-rasche
parents:
diff
changeset
|
12 if len(list(feature_lambda(x.sub_features, feature_test_type, {'type': 'CDS'}, subfeatures=False))) > 0: |
6002cc0df04e
planemo upload for repository https://github.com/TAMU-CPT/galaxy-webapollo commit 4e5a5af7689f1713c34a6ad9a9594c205e762fdd
eric-rasche
parents:
diff
changeset
|
13 yield x |
6002cc0df04e
planemo upload for repository https://github.com/TAMU-CPT/galaxy-webapollo commit 4e5a5af7689f1713c34a6ad9a9594c205e762fdd
eric-rasche
parents:
diff
changeset
|
14 |
6002cc0df04e
planemo upload for repository https://github.com/TAMU-CPT/galaxy-webapollo commit 4e5a5af7689f1713c34a6ad9a9594c205e762fdd
eric-rasche
parents:
diff
changeset
|
15 |
6002cc0df04e
planemo upload for repository https://github.com/TAMU-CPT/galaxy-webapollo commit 4e5a5af7689f1713c34a6ad9a9594c205e762fdd
eric-rasche
parents:
diff
changeset
|
16 def genes(feature_list, feature_type='gene'): |
6002cc0df04e
planemo upload for repository https://github.com/TAMU-CPT/galaxy-webapollo commit 4e5a5af7689f1713c34a6ad9a9594c205e762fdd
eric-rasche
parents:
diff
changeset
|
17 for x in feature_lambda(feature_list, feature_test_type, |
6002cc0df04e
planemo upload for repository https://github.com/TAMU-CPT/galaxy-webapollo commit 4e5a5af7689f1713c34a6ad9a9594c205e762fdd
eric-rasche
parents:
diff
changeset
|
18 {'type': feature_type}, |
6002cc0df04e
planemo upload for repository https://github.com/TAMU-CPT/galaxy-webapollo commit 4e5a5af7689f1713c34a6ad9a9594c205e762fdd
eric-rasche
parents:
diff
changeset
|
19 subfeatures=True): |
6002cc0df04e
planemo upload for repository https://github.com/TAMU-CPT/galaxy-webapollo commit 4e5a5af7689f1713c34a6ad9a9594c205e762fdd
eric-rasche
parents:
diff
changeset
|
20 yield x |
6002cc0df04e
planemo upload for repository https://github.com/TAMU-CPT/galaxy-webapollo commit 4e5a5af7689f1713c34a6ad9a9594c205e762fdd
eric-rasche
parents:
diff
changeset
|
21 |
6002cc0df04e
planemo upload for repository https://github.com/TAMU-CPT/galaxy-webapollo commit 4e5a5af7689f1713c34a6ad9a9594c205e762fdd
eric-rasche
parents:
diff
changeset
|
22 |
6002cc0df04e
planemo upload for repository https://github.com/TAMU-CPT/galaxy-webapollo commit 4e5a5af7689f1713c34a6ad9a9594c205e762fdd
eric-rasche
parents:
diff
changeset
|
23 def fix_apollo_issues(annotations, user_email): |
6002cc0df04e
planemo upload for repository https://github.com/TAMU-CPT/galaxy-webapollo commit 4e5a5af7689f1713c34a6ad9a9594c205e762fdd
eric-rasche
parents:
diff
changeset
|
24 for rec in GFF.parse(annotations): |
6002cc0df04e
planemo upload for repository https://github.com/TAMU-CPT/galaxy-webapollo commit 4e5a5af7689f1713c34a6ad9a9594c205e762fdd
eric-rasche
parents:
diff
changeset
|
25 for feat in rec.features: |
6002cc0df04e
planemo upload for repository https://github.com/TAMU-CPT/galaxy-webapollo commit 4e5a5af7689f1713c34a6ad9a9594c205e762fdd
eric-rasche
parents:
diff
changeset
|
26 if feat.type != 'gene': |
6002cc0df04e
planemo upload for repository https://github.com/TAMU-CPT/galaxy-webapollo commit 4e5a5af7689f1713c34a6ad9a9594c205e762fdd
eric-rasche
parents:
diff
changeset
|
27 continue |
6002cc0df04e
planemo upload for repository https://github.com/TAMU-CPT/galaxy-webapollo commit 4e5a5af7689f1713c34a6ad9a9594c205e762fdd
eric-rasche
parents:
diff
changeset
|
28 |
6002cc0df04e
planemo upload for repository https://github.com/TAMU-CPT/galaxy-webapollo commit 4e5a5af7689f1713c34a6ad9a9594c205e762fdd
eric-rasche
parents:
diff
changeset
|
29 for sf in feat.sub_features: |
6002cc0df04e
planemo upload for repository https://github.com/TAMU-CPT/galaxy-webapollo commit 4e5a5af7689f1713c34a6ad9a9594c205e762fdd
eric-rasche
parents:
diff
changeset
|
30 if sf.type != 'mRNA': |
6002cc0df04e
planemo upload for repository https://github.com/TAMU-CPT/galaxy-webapollo commit 4e5a5af7689f1713c34a6ad9a9594c205e762fdd
eric-rasche
parents:
diff
changeset
|
31 continue |
6002cc0df04e
planemo upload for repository https://github.com/TAMU-CPT/galaxy-webapollo commit 4e5a5af7689f1713c34a6ad9a9594c205e762fdd
eric-rasche
parents:
diff
changeset
|
32 |
6002cc0df04e
planemo upload for repository https://github.com/TAMU-CPT/galaxy-webapollo commit 4e5a5af7689f1713c34a6ad9a9594c205e762fdd
eric-rasche
parents:
diff
changeset
|
33 for ssf in sf.sub_features: |
6002cc0df04e
planemo upload for repository https://github.com/TAMU-CPT/galaxy-webapollo commit 4e5a5af7689f1713c34a6ad9a9594c205e762fdd
eric-rasche
parents:
diff
changeset
|
34 if ssf.type != 'exon': |
6002cc0df04e
planemo upload for repository https://github.com/TAMU-CPT/galaxy-webapollo commit 4e5a5af7689f1713c34a6ad9a9594c205e762fdd
eric-rasche
parents:
diff
changeset
|
35 continue |
6002cc0df04e
planemo upload for repository https://github.com/TAMU-CPT/galaxy-webapollo commit 4e5a5af7689f1713c34a6ad9a9594c205e762fdd
eric-rasche
parents:
diff
changeset
|
36 |
6002cc0df04e
planemo upload for repository https://github.com/TAMU-CPT/galaxy-webapollo commit 4e5a5af7689f1713c34a6ad9a9594c205e762fdd
eric-rasche
parents:
diff
changeset
|
37 if len(ssf) > 10: |
6002cc0df04e
planemo upload for repository https://github.com/TAMU-CPT/galaxy-webapollo commit 4e5a5af7689f1713c34a6ad9a9594c205e762fdd
eric-rasche
parents:
diff
changeset
|
38 continue |
6002cc0df04e
planemo upload for repository https://github.com/TAMU-CPT/galaxy-webapollo commit 4e5a5af7689f1713c34a6ad9a9594c205e762fdd
eric-rasche
parents:
diff
changeset
|
39 |
6002cc0df04e
planemo upload for repository https://github.com/TAMU-CPT/galaxy-webapollo commit 4e5a5af7689f1713c34a6ad9a9594c205e762fdd
eric-rasche
parents:
diff
changeset
|
40 ssf.type = 'Shine_Dalgarno_sequence' |
6002cc0df04e
planemo upload for repository https://github.com/TAMU-CPT/galaxy-webapollo commit 4e5a5af7689f1713c34a6ad9a9594c205e762fdd
eric-rasche
parents:
diff
changeset
|
41 |
6002cc0df04e
planemo upload for repository https://github.com/TAMU-CPT/galaxy-webapollo commit 4e5a5af7689f1713c34a6ad9a9594c205e762fdd
eric-rasche
parents:
diff
changeset
|
42 sf.sub_features = [x for x in sf.sub_features if x.type not in |
6002cc0df04e
planemo upload for repository https://github.com/TAMU-CPT/galaxy-webapollo commit 4e5a5af7689f1713c34a6ad9a9594c205e762fdd
eric-rasche
parents:
diff
changeset
|
43 ('non_canonical_five_prime_splice_site', |
6002cc0df04e
planemo upload for repository https://github.com/TAMU-CPT/galaxy-webapollo commit 4e5a5af7689f1713c34a6ad9a9594c205e762fdd
eric-rasche
parents:
diff
changeset
|
44 'non_canonical_three_prime_splice_site')] |
6002cc0df04e
planemo upload for repository https://github.com/TAMU-CPT/galaxy-webapollo commit 4e5a5af7689f1713c34a6ad9a9594c205e762fdd
eric-rasche
parents:
diff
changeset
|
45 yield rec |
6002cc0df04e
planemo upload for repository https://github.com/TAMU-CPT/galaxy-webapollo commit 4e5a5af7689f1713c34a6ad9a9594c205e762fdd
eric-rasche
parents:
diff
changeset
|
46 |
6002cc0df04e
planemo upload for repository https://github.com/TAMU-CPT/galaxy-webapollo commit 4e5a5af7689f1713c34a6ad9a9594c205e762fdd
eric-rasche
parents:
diff
changeset
|
47 if __name__ == '__main__': |
6002cc0df04e
planemo upload for repository https://github.com/TAMU-CPT/galaxy-webapollo commit 4e5a5af7689f1713c34a6ad9a9594c205e762fdd
eric-rasche
parents:
diff
changeset
|
48 parser = argparse.ArgumentParser(description='rebase gff3 features against parent locations', epilog="") |
6002cc0df04e
planemo upload for repository https://github.com/TAMU-CPT/galaxy-webapollo commit 4e5a5af7689f1713c34a6ad9a9594c205e762fdd
eric-rasche
parents:
diff
changeset
|
49 parser.add_argument('annotations', type=file, help='Parent GFF3 annotations') |
6002cc0df04e
planemo upload for repository https://github.com/TAMU-CPT/galaxy-webapollo commit 4e5a5af7689f1713c34a6ad9a9594c205e762fdd
eric-rasche
parents:
diff
changeset
|
50 # parser.add_argument('genome', type=file, help='Genome Sequence') |
6002cc0df04e
planemo upload for repository https://github.com/TAMU-CPT/galaxy-webapollo commit 4e5a5af7689f1713c34a6ad9a9594c205e762fdd
eric-rasche
parents:
diff
changeset
|
51 parser.add_argument('--user_email') |
6002cc0df04e
planemo upload for repository https://github.com/TAMU-CPT/galaxy-webapollo commit 4e5a5af7689f1713c34a6ad9a9594c205e762fdd
eric-rasche
parents:
diff
changeset
|
52 |
6002cc0df04e
planemo upload for repository https://github.com/TAMU-CPT/galaxy-webapollo commit 4e5a5af7689f1713c34a6ad9a9594c205e762fdd
eric-rasche
parents:
diff
changeset
|
53 args = parser.parse_args() |
6002cc0df04e
planemo upload for repository https://github.com/TAMU-CPT/galaxy-webapollo commit 4e5a5af7689f1713c34a6ad9a9594c205e762fdd
eric-rasche
parents:
diff
changeset
|
54 for rec in fix_apollo_issues(**vars(args)): |
6002cc0df04e
planemo upload for repository https://github.com/TAMU-CPT/galaxy-webapollo commit 4e5a5af7689f1713c34a6ad9a9594c205e762fdd
eric-rasche
parents:
diff
changeset
|
55 rec.annotations = {} |
6002cc0df04e
planemo upload for repository https://github.com/TAMU-CPT/galaxy-webapollo commit 4e5a5af7689f1713c34a6ad9a9594c205e762fdd
eric-rasche
parents:
diff
changeset
|
56 GFF.write([rec], sys.stdout) |