annotate fml_gff_groomer/scripts/gff_loci_merge.py @ 0:a35d6c641115 default tip

Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
author vipints
date Tue, 07 Jun 2011 16:47:44 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
1 #!/usr/bin/env python
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
2 #
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
3 # This program is free software; you can redistribute it and/or modify
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
4 # it under the terms of the GNU General Public License as published by
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
5 # the Free Software Foundation; either version 3 of the License, or
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
6 # (at your option) any later version.
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
7 #
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
8 # Written (W) 2010 Vipin T Sreedharan, Friedrich Miescher Laboratory of the Max Planck Society
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
9 # Copyright (C) 2010 Friedrich Miescher Laboratory of the Max Planck Society
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
10 #
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
11 # Description : to merge same transcripts in single loci and define as an alternative spliced form for the gene.
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
12
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
13 def display_content(final_dict):
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
14 """displaying the summary from GFF file"""
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
15
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
16 print "\tUnique combination of Source(s), Feature type(s) and corresponding count:"
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
17 for sftype, cnt in sorted(final_dict['gff_source_type'].items()):
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
18 if sftype[1] == 'gene':print '\t' + str(cnt) + '\t' + str(sftype[0]) + ', '+ str(sftype[1])
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
19
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
20 def available_limits(gff_file):
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
21 """Figure out the available feature types from the given GFF file"""
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
22
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
23 gff_handle = open(gff_file, 'rU')
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
24 filter_info = dict(gff_id = [0], gff_source_type = [1, 2],
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
25 gff_source = [1], gff_type = [2])
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
26 cur_limits = dict()
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
27 for filter_key in filter_info.keys():
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
28 cur_limits[filter_key] = collections.defaultdict(int)
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
29 for line in gff_handle:
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
30 if line.strip('\n\r')[0] != "#":
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
31 parts = [p.strip() for p in line.split('\t')]
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
32 if len(parts) == 1 and re.search(r'\w+', parts[0]):continue ## GFF files with FASTA sequence together
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
33 assert len(parts) == 9, line
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
34 for filter_key, cur_indexes in filter_info.items():
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
35 cur_id = tuple([parts[i] for i in cur_indexes])
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
36 cur_limits[filter_key][cur_id] += 1
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
37 # get rid of the default dicts
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
38 gff_handle.close()
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
39 final_dict = dict()
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
40 for key, value_dict in cur_limits.items():
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
41 if len(key) == 1:key = key[0]
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
42 final_dict[key] = dict(value_dict)
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
43 return final_dict
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
44
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
45 def GFFWriter(merged_info, genes, transcripts, exons, utr5, cds, utr3, out_file):
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
46 """Write GFF3 file with merged feature description"""
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
47
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
48 out_fh = open(out_file, 'w')
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
49 for ginfo, regions in merged_info.items():
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
50 gene_cnt = 1
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
51 for interval, features in sorted(regions.items()):# master gene feature
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
52 out_fh.write(ginfo[0] + '\t' + ginfo[1] + '\tgene\t' + str(interval[0]) + '\t' + str(interval[1]) + '\t.\t' + ginfo[2] + '\t.\tID=Gene_' + ginfo[0] + '_' + str(gene_cnt).zfill(5) + ';Name=Gene_' + ginfo[0] + '_' + str(gene_cnt).zfill(5) + '\n')
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
53 for geneid in features:# corresponding transcript info
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
54 if geneid in transcripts:
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
55 for tinfo in transcripts[geneid]:# transcript feature line
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
56 out_fh.write(ginfo[0] + '\t' + ginfo[1] + '\t' + tinfo['type'] + '\t' + str(tinfo['start']) + '\t' + str(tinfo['stop']) + '\t.\t' + ginfo[2] + '\t.\tID=' + tinfo['ID']+ ';Parent=Gene_' + ginfo[0] + '_' + str(gene_cnt).zfill(5) + '\n')
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
57 if tinfo['ID'] in utr5:# check for 5 prime UTR
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
58 for u5info in utr5[tinfo['ID']]:out_fh.write(ginfo[0] + '\t' + ginfo[1] + '\tfive_prime_UTR\t' + str(u5info['start']) + '\t' + str(u5info['stop']) + '\t.\t' + ginfo[2] + '\t.\tParent=' + tinfo['ID'] + '\n')
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
59 if tinfo['ID'] in cds:# check for CDS
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
60 for cdsinfo in cds[tinfo['ID']]:out_fh.write(ginfo[0] + '\t' + ginfo[1] + '\tCDS\t' + str(cdsinfo['start']) + '\t' + str(cdsinfo['stop']) + '\t.\t' + ginfo[2] + '\t.\tParent=' + tinfo['ID'] + '\n')
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
61 if tinfo['ID'] in utr3:# check for 3 prime UTR
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
62 for u3info in utr3[tinfo['ID']]:out_fh.write(ginfo[0] + '\t' + ginfo[1] + '\tthree_prime_UTR\t' + str(u3info['start']) + '\t' + str(u3info['stop']) + '\t.\t' + ginfo[2] + '\t.\tParent=' + tinfo['ID'] + '\n')
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
63 if tinfo['ID'] in exons:# check for exons
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
64 for exinfo in exons[tinfo['ID']]:out_fh.write(ginfo[0] + '\t' + ginfo[1] + '\texon\t' + str(exinfo['start']) + '\t' + str(exinfo['stop']) + '\t.\t' + ginfo[2] + '\t.\tParent=' + tinfo['ID'] + '\n')
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
65 gene_cnt += 1
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
66 out_fh.close()
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
67
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
68 def UniqLoci(genes, transcripts, exons):
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
69 """determine unique location where features annotated multiple times"""
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
70
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
71 uniq_loci = dict()
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
72 for gid, parts in genes.items():
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
73 gene_info = (parts['chr'], parts['source'], parts['strand'])
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
74 if gene_info in uniq_loci:## same contig, orientation, source: look for merging transcripts based on the nearby location
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
75 if (int(parts['start']), int(parts['stop'])) in uniq_loci[gene_info].keys(): ## similar transcripts will catch here (start and stop are same may be exon, CDS or intron content may vary)
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
76 uniq_loci[gene_info][(int(parts['start']), int(parts['stop']))].append(gid)
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
77 else: # heuristic approach to include closely related region on a single master loci.
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
78 got_a_range = 0
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
79 for floc in uniq_loci[gene_info].keys():# look whether it lies closely to any intervel which is already defined
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
80 if (floc[1]-parts['start']) < 150 or (parts['stop']-floc[0]) < 150:continue ## TODO boundary spanning length in same orientation for genes of each species will be great.
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
81 if floc[0] <= parts['start'] and parts['start'] < floc[1]: # the start of the new candidate is inside of any of the already defined interval ?
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
82 non_coding = 0
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
83 try: # check for small transcript whether they belong to a existing one or a new non-coding candidate.
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
84 if len(transcripts[gid]) == 1:
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
85 if len(exons[transcripts[gid][0]['ID']]) == 1:non_coding = 1
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
86 if non_coding == 0:
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
87 if parts['stop'] > floc[1]:# making global gene coordinate from individual transcript model
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
88 entries = uniq_loci[gene_info][floc]
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
89 del uniq_loci[gene_info][floc] # remove the existing interval, here we got a longer downstream position from the candidate
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
90 entries.append(gid)
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
91 uniq_loci[gene_info][(floc[0], parts['stop'])] = entries
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
92 else:
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
93 uniq_loci[gene_info][floc].append(gid)
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
94 else:# create a new interval for non-coding type entry
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
95 uniq_loci[gene_info][(parts['start'], parts['stop'])] = [gid]
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
96 got_a_range = 1
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
97 break
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
98 except: # dont have any transcripts or exons defined.
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
99 break
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
100 elif floc[0] < parts['stop'] and parts['stop'] <= floc[1]: # the stop of the new candidate is inside of any of the pre-defined interval ? the candidate seems to be from more upstream
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
101 non_coding = 0
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
102 try:
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
103 if len(transcripts[gid]) == 1:
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
104 if len(exons[transcripts[gid][0]['ID']]) == 1:non_coding = 1
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
105 if non_coding == 0:
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
106 entries = uniq_loci[gene_info][floc]
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
107 del uniq_loci[gene_info][floc] # remove the existing interval, here we got a upstream position from which the candidate transcribing
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
108 entries.append(gid)
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
109 uniq_loci[gene_info][(int(parts['start']), floc[1])] = entries
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
110 else: # create a new interval for non-coding type entry
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
111 uniq_loci[gene_info][(parts['start'], parts['stop'])] = [gid]
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
112 got_a_range = 1
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
113 break
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
114 except:
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
115 break
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
116 elif floc[0] > parts['start'] and floc[1] < parts['stop']: # whether the whole feature floc region (--) resides in the candidate location (----------) ?
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
117 non_coding = 0 # here the candidate seems to be longer than the pre-defined interval, check all entries from the pre-defined interval whether it is a small region, any chance as non-coding.
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
118 try:
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
119 for features in uniq_loci[gene_info][floc]:
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
120 if len(transcripts[features]) == 1:
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
121 if len(exons[transcripts[features][0]['ID']]) == 1:non_coding = 1
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
122 if non_coding == 1: # create a new interval for non coding
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
123 uniq_loci[gene_info][(parts['start'], parts['stop'])] = [gid]
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
124 else: # append the existing transcript cluster, here change the interval position based on the candidate location
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
125 entries = uniq_loci[gene_info][floc]
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
126 del uniq_loci[gene_info][floc] # remove the existing interval, here we got a longer upstream and downstream region.
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
127 entries.append(gid)
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
128 uniq_loci[gene_info][(parts['start'], parts['stop'])] = entries
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
129 got_a_range = 1
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
130 break
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
131 except:
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
132 break
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
133 ## or create a new interval ??
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
134 if got_a_range == 0:uniq_loci[gene_info][(parts['start'], parts['stop'])] = [gid]
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
135 else:
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
136 uniq_loci[gene_info] = {(int(parts['start']), int(parts['stop'])): [gid]}
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
137
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
138 return uniq_loci
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
139
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
140 def ParseGFF(gff_file):
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
141 """feature extraction from provided GFF file"""
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
142
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
143 gff_handle = open(gff_file, 'rU')
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
144 genes, transcripts, exons, utr5, cds, utr3 = dict(), dict(), dict(), dict(), dict(), dict()
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
145 for gff_line in gff_handle:
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
146 parts = gff_line.strip('\n\r').split('\t')
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
147 if gff_line[0] == '#' or gff_line[0] == '>':continue
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
148 if len(parts) == 1:continue ## Some centers in the world create GFF files with FASTA sequence together
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
149 if len(parts) != 9:sys.stdout.write('Warning: Found invalid GFF line\n' + gff_line.strip('\n\r') + '\n');continue
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
150 if parts[3] == '' or parts[4] == '':sys.stdout.write('Warning: Found missing coordinate in GFF line\n' + gff_line.strip('\n\r') + '\n');continue
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
151 if parts[2] == 'gene':
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
152 gene_info = dict()
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
153 gene_info['start'] = int(parts[3])
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
154 gene_info['stop'] = int(parts[4])
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
155 gene_info['chr'] = parts[0]
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
156 gene_info['source'] = parts[1]
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
157 gene_info['strand'] = parts[6]
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
158 gid = ''
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
159 for attr in parts[-1].split(';'):
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
160 if attr == '':continue ## GFF line may end with a ';' symbol
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
161 attr = attr.split('=')
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
162 if attr[0] == 'ID':gid=attr[1];continue
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
163 gene_info[attr[0]] = attr[1]
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
164 if gid != '': genes[gid] = gene_info
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
165 if parts[2] == 'mRNA' or parts[2] == 'transcript' or parts[2] == 'ncRNA' or parts[2] == 'tRNA' or parts[2] == 'snRNA' or parts[2] == 'scRNA' or parts[2] == 'snoRNA' or parts[2] == 'snlRNA' or parts[2] == 'rRNA' or parts[2] == 'miRNA':
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
166 mrna_info = dict()
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
167 mrna_info['start'] = int(parts[3])
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
168 mrna_info['stop'] = int(parts[4])
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
169 mrna_info['chr'] = parts[0]
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
170 mrna_info['strand'] = parts[6]
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
171 mrna_info['type'] = parts[2]
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
172 gid = ''
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
173 for attr in parts[-1].split(';'):
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
174 if attr == '':continue ## GFF line may end with a ';' symbol
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
175 attr = attr.split('=')
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
176 if attr[0] == 'Parent':gid=attr[1];continue
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
177 mrna_info[attr[0]] = attr[1]
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
178 if gid in transcripts:
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
179 transcripts[gid].append(mrna_info)
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
180 else:
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
181 transcripts[gid] = [mrna_info]
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
182 if parts[2] == 'exon':
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
183 exon_info = dict()
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
184 exon_info['start'] = int(parts[3])
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
185 exon_info['stop'] = int(parts[4])
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
186 exon_info['chr'] = parts[0]
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
187 exon_info['strand'] = parts[6]
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
188 tid = ''
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
189 for attr in parts[-1].split(';'):
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
190 if attr == '':continue ## GFF line may end with a ';' symbol
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
191 attr = attr.split('=')
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
192 if attr[0] == 'Parent':tid=attr[1];continue
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
193 exon_info[attr[0]] = attr[1]
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
194 if tid in exons:
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
195 exons[tid].append(exon_info)
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
196 else:
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
197 exons[tid] = [exon_info]
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
198 if parts[2] == 'five_prime_UTR':
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
199 utr5_info = dict()
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
200 utr5_info['start'] = int(parts[3])
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
201 utr5_info['stop'] = int(parts[4])
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
202 utr5_info['chr'] = parts[0]
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
203 utr5_info['strand'] = parts[6]
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
204 tid = ''
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
205 for attr in parts[-1].split(';'):
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
206 if attr == '':continue ## GFF line may end with a ';' symbol
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
207 attr = attr.split('=')
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
208 if attr[0] == 'Parent':tid=attr[1];continue
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
209 utr5_info[attr[0]] = attr[1]
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
210 if tid in utr5:
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
211 utr5[tid].append(utr5_info)
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
212 else:
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
213 utr5[tid] = [utr5_info]
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
214 if parts[2] == 'CDS':
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
215 cds_info = dict()
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
216 cds_info['start'] = int(parts[3])
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
217 cds_info['stop'] = int(parts[4])
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
218 cds_info['chr'] = parts[0]
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
219 cds_info['strand'] = parts[6]
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
220 tid = ''
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
221 for attr in parts[-1].split(';'):
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
222 if attr == '':continue
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
223 attr = attr.split('=')
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
224 if attr[0] == 'Parent':tid=attr[1];continue
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
225 cds_info[attr[0]] = attr[1]
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
226 if tid in cds:
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
227 cds[tid].append(cds_info)
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
228 else:
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
229 cds[tid] = [cds_info]
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
230 if parts[2] == 'three_prime_UTR':
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
231 utr3_info = dict()
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
232 utr3_info['start'] = int(parts[3])
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
233 utr3_info['stop'] = int(parts[4])
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
234 utr3_info['chr'] = parts[0]
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
235 utr3_info['strand'] = parts[6]
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
236 tid = ''
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
237 for attr in parts[-1].split(';'):
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
238 if attr == '':continue
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
239 attr = attr.split('=')
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
240 if attr[0] == 'Parent':tid=attr[1];continue
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
241 utr3_info[attr[0]] = attr[1]
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
242 if tid in utr3:
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
243 utr3[tid].append(utr3_info)
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
244 else:
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
245 utr3[tid] = [utr3_info]
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
246 gff_handle.close()
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
247 return genes, transcripts, exons, utr5, cds, utr3
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
248
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
249 import re, sys
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
250 import time
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
251 import collections
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
252
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
253 if __name__=='__main__':
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
254
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
255 stime = time.asctime( time.localtime(time.time()) )
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
256 print '-------------------------------------------------------'
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
257 print 'MergeLoci started on ' + stime
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
258 print '-------------------------------------------------------'
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
259 try:
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
260 gff_file = sys.argv[1]
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
261 out_file = sys.argv[2]
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
262 except:
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
263 sys.stderr.write("Missing GFF3 file, result file. terminating...\n")
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
264 sys.stderr.write("USAGE: gff_loci_merge.py <gff file> <result file>\n")
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
265 sys.exit(-1)
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
266 print '--------'
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
267 print 'Level: 1- ' + 'Reading GFF file: ' + re.sub(r'/home/galaxy/galaxy-2.1.2009', r'GALAXYDIR', gff_file)
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
268 print '--------'
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
269 print '--------'
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
270 print 'Level: 2- ' + 'BEFORE processing, Merging feature distribution in GFF file'
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
271 print '--------'
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
272 # initial feature distribution in file
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
273 final_dict = available_limits(gff_file)
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
274 display_content(final_dict)
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
275 # determine the whole content from GFF file
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
276 genes, transcripts, exons, utr5, cds, utr3 = ParseGFF(gff_file)
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
277 print '--------'
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
278 print 'Level: 3- ' + 'Start merging feature(s) from similar locations...'
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
279 print '--------'
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
280 # determine the same gene loci on specific chromosome based on the same source
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
281 merged_regions = UniqLoci(genes, transcripts, exons)
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
282 print '\tDone.'
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
283 print '--------'
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
284 print 'Level: 4- ' + 'Writing merged feature annotation to GFF format...'
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
285 print '--------'
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
286 # write new GFF file with merged loci information for gene feature
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
287 GFFWriter(merged_regions, genes, transcripts, exons, utr5, cds, utr3, out_file)
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
288 print '\tDone.'
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
289 # after processing display the feature distribution in the result file
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
290 print '--------'
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
291 print 'Level: 5- ' + 'Merged feature(s) summary from GFF file'
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
292 print '--------'
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
293 final_dict = available_limits(out_file)
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
294 display_content(final_dict)
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
295 print
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
296 print '\tMerged result file: ' + re.sub(r'/home/galaxy/galaxy-2.1.2009', r'GALAXYDIR', out_file)
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
297 stime = time.asctime( time.localtime(time.time()) )
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
298 print '-------------------------------------------------------'
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
299 print 'MergeLoci finished at ' + stime
a35d6c641115 Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff changeset
300 print '-------------------------------------------------------'