annotate fml_gff_groomer/scripts/gff_available_limits.py @ 0:a35d6c641115 default tip

Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
author vipints
date Tue, 07 Jun 2011 16:47:44 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
1 #!/usr/bin/env python
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
2 #
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
3 # This program is free software; you can redistribute it and/or modify
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
4 # it under the terms of the GNU General Public License as published by
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
5 # the Free Software Foundation; either version 3 of the License, or
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
6 # (at your option) any later version.
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
7 #
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
8 # Written (W) 2010 Vipin T Sreedharan, Friedrich Miescher Laboratory of the Max Planck Society
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
9 # Copyright (C) 2010 Max Planck Society
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
10 #
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
11 # Description : Provide available source, feature types from a GFF file
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
12
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
13 import re, sys
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
14 import time
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
15 import collections
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
16
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
17 def available_limits(gff_handle):
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
18 """Figure out the available feature types from the given GFF file"""
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
19
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
20 filter_info = dict(gff_id = [0], gff_source_type = [1, 2],
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
21 gff_source = [1], gff_type = [2])
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
22 cur_limits = dict()
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
23 for filter_key in filter_info.keys():
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
24 cur_limits[filter_key] = collections.defaultdict(int)
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
25 for line in gff_handle:
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
26 if line.strip('\n\r')[0] != "#":
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
27 parts = [p.strip() for p in line.split('\t')]
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
28 if len(parts) == 1 and re.search(r'\w+', parts[0]):continue ## GFF files with FASTA sequence together
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
29 assert len(parts) == 9, line
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
30 for filter_key, cur_indexes in filter_info.items():
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
31 cur_id = tuple([parts[i] for i in cur_indexes])
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
32 cur_limits[filter_key][cur_id] += 1
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
33 # get rid of the default dicts
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
34 final_dict = dict()
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
35 for key, value_dict in cur_limits.items():
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
36 if len(key) == 1:
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
37 key = key[0]
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
38 final_dict[key] = dict(value_dict)
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
39
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
40 return final_dict
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
41
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
42 if __name__=='__main__':
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
43
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
44 stime = time.asctime( time.localtime(time.time()) )
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
45 print '-------------------------------------------------------'
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
46 print 'FeatureScan started on ' + stime
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
47 print '-------------------------------------------------------'
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
48
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
49 try:
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
50 gff_handle = open(sys.argv[1], 'rU')
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
51 except:
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
52 sys.stderr.write("Can't open the GFF3 file, terminating...\n")
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
53 sys.stderr.write("USAGE: gff_available_limits.py <gff file>\n")
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
54 sys.exit(-1)
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
55 final_dict = available_limits(gff_handle)
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
56 gff_handle.close()
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
57 print
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
58 print "==Overview of available source(s) and feature type(s) from GFF file=="
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
59 print
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
60 print "Chromosome identifier(s) and corresponding count:"
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
61 for contig, cnt in sorted(final_dict['gff_id'].items()):
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
62 print '\t' + str(contig[0]) + '\t' + str(cnt)
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
63 print
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
64 print "Source(s) of feature and corresponding count:"
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
65 for source, cnt in sorted(final_dict['gff_source'].items()):
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
66 print '\t' + str(source[0]) + '\t' + str(cnt)
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
67 print
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
68 print "Feature type(s) and corresponding count:"
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
69 for ftype, cnt in sorted(final_dict['gff_type'].items()):
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
70 print '\t' + str(cnt) + '\t' + str(ftype[0])
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
71 print
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
72 print "Unique combination of Feature type(s), Source(s) and corresponding count:"
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
73 for sftype, cnt in sorted(final_dict['gff_source_type'].items()):
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
74 print '\t' + str(cnt) + '\t' + str(sftype[0]) + ', '+ str(sftype[1])
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
75 print
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
76 stime = time.asctime( time.localtime(time.time()) )
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
77 print '-------------------------------------------------------'
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
78 print 'FeatureScan finished at ' + stime
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
79 print '-------------------------------------------------------'