annotate trams.py @ 3:1f00946b18c2 draft default tip

Uploaded
author rijst
date Wed, 12 Dec 2012 09:09:45 -0500
parents cc961e057668
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
cc961e057668 Uploaded
rijst
parents:
diff changeset
1 #################
cc961e057668 Uploaded
rijst
parents:
diff changeset
2 transl_table = 11
cc961e057668 Uploaded
rijst
parents:
diff changeset
3 intro_message = ''' +------------------------------------------------------------------+
cc961e057668 Uploaded
rijst
parents:
diff changeset
4 | Tool for Rapid Annotation of Microbial SNPs (TRAMS): a simple |
cc961e057668 Uploaded
rijst
parents:
diff changeset
5 | program for rapid annotation of genomic variation in prokaryotes |
cc961e057668 Uploaded
rijst
parents:
diff changeset
6 | |
cc961e057668 Uploaded
rijst
parents:
diff changeset
7 | Developed by: Richard A. Reumerman, Paul R. Herron, |
cc961e057668 Uploaded
rijst
parents:
diff changeset
8 | Paul A. Hoskisson and Vartul Sangal |
cc961e057668 Uploaded
rijst
parents:
diff changeset
9 +------------------------------------------------------------------+\n'''
cc961e057668 Uploaded
rijst
parents:
diff changeset
10 #################
cc961e057668 Uploaded
rijst
parents:
diff changeset
11
cc961e057668 Uploaded
rijst
parents:
diff changeset
12 import sys
cc961e057668 Uploaded
rijst
parents:
diff changeset
13 import time
cc961e057668 Uploaded
rijst
parents:
diff changeset
14 start = time.clock()
cc961e057668 Uploaded
rijst
parents:
diff changeset
15
cc961e057668 Uploaded
rijst
parents:
diff changeset
16 # Command line files: SNP REF REF-TYPE ANNOT OVERL SUM;
cc961e057668 Uploaded
rijst
parents:
diff changeset
17 if len(sys.argv) < 7:
cc961e057668 Uploaded
rijst
parents:
diff changeset
18 exit("\nNot enough arguments given.\nUsage: TRAMS_Galaxy.py [SNP.] [REF.] [ANNOT.] [OVERL.] [SUM.]")
cc961e057668 Uploaded
rijst
parents:
diff changeset
19 try:
cc961e057668 Uploaded
rijst
parents:
diff changeset
20 file_snps = open(sys.argv[1], "rU")
cc961e057668 Uploaded
rijst
parents:
diff changeset
21 except IOError as e:
cc961e057668 Uploaded
rijst
parents:
diff changeset
22 exit("Error trying to open '"+sys.argv[1]+"': {1}".format(e.errno, e.strerror))
cc961e057668 Uploaded
rijst
parents:
diff changeset
23 try:
cc961e057668 Uploaded
rijst
parents:
diff changeset
24 file_ref = open(sys.argv[2], "rU")
cc961e057668 Uploaded
rijst
parents:
diff changeset
25 except IOError as e:
cc961e057668 Uploaded
rijst
parents:
diff changeset
26 exit("Error trying to open '"+sys.argv[2]+"': {1}".format(e.errno, e.strerror))
cc961e057668 Uploaded
rijst
parents:
diff changeset
27
cc961e057668 Uploaded
rijst
parents:
diff changeset
28 filetype_reference = sys.argv[3]
cc961e057668 Uploaded
rijst
parents:
diff changeset
29
cc961e057668 Uploaded
rijst
parents:
diff changeset
30 try:
cc961e057668 Uploaded
rijst
parents:
diff changeset
31 file_out = open(sys.argv[4], "w")
cc961e057668 Uploaded
rijst
parents:
diff changeset
32 except IOError as e:
cc961e057668 Uploaded
rijst
parents:
diff changeset
33 exit("Error trying to open '"+sys.argv[4]+"': {1}".format(e.errno, e.strerror))
cc961e057668 Uploaded
rijst
parents:
diff changeset
34 try:
cc961e057668 Uploaded
rijst
parents:
diff changeset
35 file_overlap = open(sys.argv[5], "w")
cc961e057668 Uploaded
rijst
parents:
diff changeset
36 except IOError as e:
cc961e057668 Uploaded
rijst
parents:
diff changeset
37 exit("Error trying to open '"+sys.argv[5]+"': {1}".format(e.errno, e.strerror))
cc961e057668 Uploaded
rijst
parents:
diff changeset
38 try:
cc961e057668 Uploaded
rijst
parents:
diff changeset
39 file_summary = open(sys.argv[6], "w")
cc961e057668 Uploaded
rijst
parents:
diff changeset
40 except IOError as e:
cc961e057668 Uploaded
rijst
parents:
diff changeset
41 exit("Error trying to open '"+sys.argv[6]+"': {1}".format(e.errno, e.strerror))
cc961e057668 Uploaded
rijst
parents:
diff changeset
42
cc961e057668 Uploaded
rijst
parents:
diff changeset
43 import Bio
cc961e057668 Uploaded
rijst
parents:
diff changeset
44 from Bio import SeqIO, SeqFeature
cc961e057668 Uploaded
rijst
parents:
diff changeset
45 from Bio.SeqRecord import SeqRecord
cc961e057668 Uploaded
rijst
parents:
diff changeset
46 from Bio.Seq import Seq
cc961e057668 Uploaded
rijst
parents:
diff changeset
47 from Bio.Alphabet import generic_dna, IUPAC
cc961e057668 Uploaded
rijst
parents:
diff changeset
48 from Bio.Data import CodonTable
cc961e057668 Uploaded
rijst
parents:
diff changeset
49
cc961e057668 Uploaded
rijst
parents:
diff changeset
50 modules_loaded = time.clock()
cc961e057668 Uploaded
rijst
parents:
diff changeset
51
cc961e057668 Uploaded
rijst
parents:
diff changeset
52 def non_coding_calc(gene, pos = 0):
cc961e057668 Uploaded
rijst
parents:
diff changeset
53 '''This function takes a pseudogene and returns the number of bases
cc961e057668 Uploaded
rijst
parents:
diff changeset
54 located in between the sub-features before 'pos'. Returns 0 if 'pseudo' = False.
cc961e057668 Uploaded
rijst
parents:
diff changeset
55 Input: {start, subfeats, pseudo}, pos (default = 0)'''
cc961e057668 Uploaded
rijst
parents:
diff changeset
56 if not gene['pseudo']: return 0
cc961e057668 Uploaded
rijst
parents:
diff changeset
57
cc961e057668 Uploaded
rijst
parents:
diff changeset
58 non_coding_bases = 0
cc961e057668 Uploaded
rijst
parents:
diff changeset
59 prev_subfeat_end = gene['start']
cc961e057668 Uploaded
rijst
parents:
diff changeset
60 if gene['strand'] == -1:
cc961e057668 Uploaded
rijst
parents:
diff changeset
61 for subfeature in gene['subfeats']:
cc961e057668 Uploaded
rijst
parents:
diff changeset
62 if subfeature.location._start.position < pos:
cc961e057668 Uploaded
rijst
parents:
diff changeset
63 prev_subfeat_end = subfeature.location._end.position
cc961e057668 Uploaded
rijst
parents:
diff changeset
64 continue
cc961e057668 Uploaded
rijst
parents:
diff changeset
65 non_coding_bases += (subfeature.location._start.position - prev_subfeat_end)
cc961e057668 Uploaded
rijst
parents:
diff changeset
66 prev_subfeat_end = subfeature.location._end.position
cc961e057668 Uploaded
rijst
parents:
diff changeset
67 else:
cc961e057668 Uploaded
rijst
parents:
diff changeset
68 for subfeature in gene['subfeats']:
cc961e057668 Uploaded
rijst
parents:
diff changeset
69 non_coding_bases += (subfeature.location._start.position - prev_subfeat_end)
cc961e057668 Uploaded
rijst
parents:
diff changeset
70 prev_subfeat_end = subfeature.location._end.position
cc961e057668 Uploaded
rijst
parents:
diff changeset
71 if prev_subfeat_end >= pos and pos != 0: break
cc961e057668 Uploaded
rijst
parents:
diff changeset
72
cc961e057668 Uploaded
rijst
parents:
diff changeset
73 return non_coding_bases
cc961e057668 Uploaded
rijst
parents:
diff changeset
74
cc961e057668 Uploaded
rijst
parents:
diff changeset
75
cc961e057668 Uploaded
rijst
parents:
diff changeset
76 def region_calc(bounds,length):
cc961e057668 Uploaded
rijst
parents:
diff changeset
77 regions = []
cc961e057668 Uploaded
rijst
parents:
diff changeset
78 lastend=i=0
cc961e057668 Uploaded
rijst
parents:
diff changeset
79 while i < len(bounds):
cc961e057668 Uploaded
rijst
parents:
diff changeset
80 if bounds[i]['start'] > lastend:# Intergenic region present;
cc961e057668 Uploaded
rijst
parents:
diff changeset
81 regions.append([lastend,bounds[i]['start'],-1])
cc961e057668 Uploaded
rijst
parents:
diff changeset
82 lastend = bounds[i]['start']
cc961e057668 Uploaded
rijst
parents:
diff changeset
83 else:
cc961e057668 Uploaded
rijst
parents:
diff changeset
84 regions.append([bounds[i]['start'],bounds[i]['end'],i])
cc961e057668 Uploaded
rijst
parents:
diff changeset
85 if bounds[i]['end'] > lastend:
cc961e057668 Uploaded
rijst
parents:
diff changeset
86 lastend = bounds[i]['end']
cc961e057668 Uploaded
rijst
parents:
diff changeset
87 i += 1
cc961e057668 Uploaded
rijst
parents:
diff changeset
88
cc961e057668 Uploaded
rijst
parents:
diff changeset
89 if regions[-1][1] < length:# Final tail of genome;
cc961e057668 Uploaded
rijst
parents:
diff changeset
90 regions.append([lastend,length,-1])
cc961e057668 Uploaded
rijst
parents:
diff changeset
91
cc961e057668 Uploaded
rijst
parents:
diff changeset
92 return regions
cc961e057668 Uploaded
rijst
parents:
diff changeset
93
cc961e057668 Uploaded
rijst
parents:
diff changeset
94
cc961e057668 Uploaded
rijst
parents:
diff changeset
95 def overlap_calc(bounds):
cc961e057668 Uploaded
rijst
parents:
diff changeset
96 '''This function takes an array of feature starts and ends and
cc961e057668 Uploaded
rijst
parents:
diff changeset
97 returns an array of starts and ends of all overlapping regions.
cc961e057668 Uploaded
rijst
parents:
diff changeset
98 Input: [{start,end}]'''
cc961e057668 Uploaded
rijst
parents:
diff changeset
99 i = 0
cc961e057668 Uploaded
rijst
parents:
diff changeset
100 overlaps = []
cc961e057668 Uploaded
rijst
parents:
diff changeset
101 while i < len(bounds) - 1:
cc961e057668 Uploaded
rijst
parents:
diff changeset
102 for downstr in bounds[i+1:]:
cc961e057668 Uploaded
rijst
parents:
diff changeset
103 if downstr[0] < bounds[i][1]:# Features overlap;
cc961e057668 Uploaded
rijst
parents:
diff changeset
104 if downstr[1] < bounds[i][1]:# Complete overlap;
cc961e057668 Uploaded
rijst
parents:
diff changeset
105 overlaps.append([downstr[0],downstr[1],bounds[i][2],downstr[2],[0,0]])
cc961e057668 Uploaded
rijst
parents:
diff changeset
106 else:# Partial overlap;
cc961e057668 Uploaded
rijst
parents:
diff changeset
107 overlaps.append([downstr[0],bounds[i][1],bounds[i][2],downstr[2],[0,0]])
cc961e057668 Uploaded
rijst
parents:
diff changeset
108 else:# No use looking further;
cc961e057668 Uploaded
rijst
parents:
diff changeset
109 break
cc961e057668 Uploaded
rijst
parents:
diff changeset
110
cc961e057668 Uploaded
rijst
parents:
diff changeset
111 i += 1
cc961e057668 Uploaded
rijst
parents:
diff changeset
112
cc961e057668 Uploaded
rijst
parents:
diff changeset
113 return overlaps
cc961e057668 Uploaded
rijst
parents:
diff changeset
114
cc961e057668 Uploaded
rijst
parents:
diff changeset
115
cc961e057668 Uploaded
rijst
parents:
diff changeset
116 def match_feature(bounds,pos,prev=0):
cc961e057668 Uploaded
rijst
parents:
diff changeset
117 '''This function checks if a position is located inside a feature and
cc961e057668 Uploaded
rijst
parents:
diff changeset
118 returns the feature's number if found or -1 if none is found.
cc961e057668 Uploaded
rijst
parents:
diff changeset
119 Input: {start,end},pos,prev_feat (default = 0)'''
cc961e057668 Uploaded
rijst
parents:
diff changeset
120 for i in range(prev, len(bounds)):
cc961e057668 Uploaded
rijst
parents:
diff changeset
121 if (pos >= bounds[i]['start']) and (pos < bounds[i]['end']):
cc961e057668 Uploaded
rijst
parents:
diff changeset
122 return i
cc961e057668 Uploaded
rijst
parents:
diff changeset
123 elif pos < bounds[i]['start']:# No use looking further
cc961e057668 Uploaded
rijst
parents:
diff changeset
124 return -1
cc961e057668 Uploaded
rijst
parents:
diff changeset
125
cc961e057668 Uploaded
rijst
parents:
diff changeset
126 return -1
cc961e057668 Uploaded
rijst
parents:
diff changeset
127
cc961e057668 Uploaded
rijst
parents:
diff changeset
128
cc961e057668 Uploaded
rijst
parents:
diff changeset
129 def write_output(line,target=file_out):
cc961e057668 Uploaded
rijst
parents:
diff changeset
130 '''This function takes the 2 dimensional array containing all the SNP
cc961e057668 Uploaded
rijst
parents:
diff changeset
131 data. It contains an array of information on the feature and an array
cc961e057668 Uploaded
rijst
parents:
diff changeset
132 for each strain for which SNPs are given.
cc961e057668 Uploaded
rijst
parents:
diff changeset
133 Input: [[pos],[ref],[cells],[cells],etc]'''
cc961e057668 Uploaded
rijst
parents:
diff changeset
134 target.write('\n'+str(line[0][0]))
cc961e057668 Uploaded
rijst
parents:
diff changeset
135 for cell in line[1]:
cc961e057668 Uploaded
rijst
parents:
diff changeset
136 target.write('\t'+str(cell))
cc961e057668 Uploaded
rijst
parents:
diff changeset
137 for strain in line[2:]:
cc961e057668 Uploaded
rijst
parents:
diff changeset
138 target.write('\t')
cc961e057668 Uploaded
rijst
parents:
diff changeset
139 for cell in strain:
cc961e057668 Uploaded
rijst
parents:
diff changeset
140 target.write('\t'+str(cell))
cc961e057668 Uploaded
rijst
parents:
diff changeset
141
cc961e057668 Uploaded
rijst
parents:
diff changeset
142 target.flush()
cc961e057668 Uploaded
rijst
parents:
diff changeset
143
cc961e057668 Uploaded
rijst
parents:
diff changeset
144
cc961e057668 Uploaded
rijst
parents:
diff changeset
145 def new_codon_calc(ref_codon, new_base, pos_in_cod):
cc961e057668 Uploaded
rijst
parents:
diff changeset
146 return str(ref_codon[0:pos_in_cod-1]+new_base+ref_codon[pos_in_cod:len(ref_codon)])
cc961e057668 Uploaded
rijst
parents:
diff changeset
147
cc961e057668 Uploaded
rijst
parents:
diff changeset
148
cc961e057668 Uploaded
rijst
parents:
diff changeset
149 def mut_type_check(ref_res, ref_codon, pos_in_gene, new_base, new_codon):
cc961e057668 Uploaded
rijst
parents:
diff changeset
150 if str(new_codon).lower() == str(ref_codon).lower():
cc961e057668 Uploaded
rijst
parents:
diff changeset
151 return ['','','','']
cc961e057668 Uploaded
rijst
parents:
diff changeset
152 new_residue = Seq(new_codon).translate(table=transl_table)
cc961e057668 Uploaded
rijst
parents:
diff changeset
153 if str(new_residue) == str(ref_res):
cc961e057668 Uploaded
rijst
parents:
diff changeset
154 mut_type = 'synonymous'
cc961e057668 Uploaded
rijst
parents:
diff changeset
155 elif (pos_in_gene / 3) < 1 and str(ref_codon).upper() in CodonTable.unambiguous_dna_by_id[transl_table].start_codons:# position 0,1 or 2 and SNP is in start codon;
cc961e057668 Uploaded
rijst
parents:
diff changeset
156 if str(new_codon).upper() in CodonTable.unambiguous_dna_by_id[transl_table].start_codons: mut_type = 'nonsynonymous'
cc961e057668 Uploaded
rijst
parents:
diff changeset
157 else: mut_type = 'nonstart'
cc961e057668 Uploaded
rijst
parents:
diff changeset
158 elif str(new_residue) == '*': mut_type = 'nonsense'
cc961e057668 Uploaded
rijst
parents:
diff changeset
159 elif str(ref_res) == '*': mut_type = 'nonstop'
cc961e057668 Uploaded
rijst
parents:
diff changeset
160 else: mut_type = 'nonsynonymous'
cc961e057668 Uploaded
rijst
parents:
diff changeset
161
cc961e057668 Uploaded
rijst
parents:
diff changeset
162 return [mut_type,new_base,new_codon,new_residue]
cc961e057668 Uploaded
rijst
parents:
diff changeset
163
cc961e057668 Uploaded
rijst
parents:
diff changeset
164
cc961e057668 Uploaded
rijst
parents:
diff changeset
165 def codon_process(codon):
cc961e057668 Uploaded
rijst
parents:
diff changeset
166 '''This function processes a codon. It loops through it 3 times,
cc961e057668 Uploaded
rijst
parents:
diff changeset
167 once to determine which is the highest position mutated, once to
cc961e057668 Uploaded
rijst
parents:
diff changeset
168 fill in the cells for the output file and once to output all lines.
cc961e057668 Uploaded
rijst
parents:
diff changeset
169 Input: [empty,start_pos,[line1],[line2],[line3],strand]
cc961e057668 Uploaded
rijst
parents:
diff changeset
170 It also uses global variable strain_nr'''
cc961e057668 Uploaded
rijst
parents:
diff changeset
171 lastposition = [-1]*(strain_nr-1)
cc961e057668 Uploaded
rijst
parents:
diff changeset
172 new_codons = ['']*(strain_nr-1)
cc961e057668 Uploaded
rijst
parents:
diff changeset
173 if codon[-1] == -1:# Change codon position order for -1 features;
cc961e057668 Uploaded
rijst
parents:
diff changeset
174 temp = codon [1:-1]
cc961e057668 Uploaded
rijst
parents:
diff changeset
175 temp.reverse()
cc961e057668 Uploaded
rijst
parents:
diff changeset
176 codon[1:-1] = temp
cc961e057668 Uploaded
rijst
parents:
diff changeset
177 for i,line in enumerate(codon[1:-1],1):
cc961e057668 Uploaded
rijst
parents:
diff changeset
178 if line == '': continue
cc961e057668 Uploaded
rijst
parents:
diff changeset
179 for j,strain in enumerate(line[2:]):
cc961e057668 Uploaded
rijst
parents:
diff changeset
180 if strain[1] in ['a','g','c','t']:
cc961e057668 Uploaded
rijst
parents:
diff changeset
181 lastposition[j] = i
cc961e057668 Uploaded
rijst
parents:
diff changeset
182 new_codons[j] = codon[i][1][8]
cc961e057668 Uploaded
rijst
parents:
diff changeset
183
cc961e057668 Uploaded
rijst
parents:
diff changeset
184 for i,line in enumerate(codon[1:-1],1):
cc961e057668 Uploaded
rijst
parents:
diff changeset
185 if codon[-1] == -1: pos_in_cod = 4-i
cc961e057668 Uploaded
rijst
parents:
diff changeset
186 else: pos_in_cod = i
cc961e057668 Uploaded
rijst
parents:
diff changeset
187
cc961e057668 Uploaded
rijst
parents:
diff changeset
188 if line == '': continue
cc961e057668 Uploaded
rijst
parents:
diff changeset
189 for j,strain in enumerate(line[2:]):
cc961e057668 Uploaded
rijst
parents:
diff changeset
190 if i == lastposition[j]: # Check for synonymous etc.;
cc961e057668 Uploaded
rijst
parents:
diff changeset
191 new_codons[j] = new_codon_calc(new_codons[j],strain[1],pos_in_cod)
cc961e057668 Uploaded
rijst
parents:
diff changeset
192 codon[i][j+2] = mut_type_check(line[1][9],line[1][8],codon[0],strain[1],new_codons[j])
cc961e057668 Uploaded
rijst
parents:
diff changeset
193 straininfo[j][codon[i][j+2][0]] += 1# Counting;
cc961e057668 Uploaded
rijst
parents:
diff changeset
194 elif strain[1] in ['a','g','c','t']:
cc961e057668 Uploaded
rijst
parents:
diff changeset
195 codon[i][j+2] = ['MNP',strain[1],'','']
cc961e057668 Uploaded
rijst
parents:
diff changeset
196 straininfo[j]['mnps'] += 1
cc961e057668 Uploaded
rijst
parents:
diff changeset
197 new_codons[j] = new_codon_calc(new_codons[j],strain[1],pos_in_cod)
cc961e057668 Uploaded
rijst
parents:
diff changeset
198 elif strain[0] == 'Allele missing': codon[i][j+2] = strain
cc961e057668 Uploaded
rijst
parents:
diff changeset
199 else: codon[i][j+2] = ['']*4
cc961e057668 Uploaded
rijst
parents:
diff changeset
200
cc961e057668 Uploaded
rijst
parents:
diff changeset
201 for line in codon[1:-1]:
cc961e057668 Uploaded
rijst
parents:
diff changeset
202 if line != '': write_output(line)
cc961e057668 Uploaded
rijst
parents:
diff changeset
203
cc961e057668 Uploaded
rijst
parents:
diff changeset
204 def feature_props(feature):
cc961e057668 Uploaded
rijst
parents:
diff changeset
205 properties = {'type':feature.type,'strand':feature.location._strand,
cc961e057668 Uploaded
rijst
parents:
diff changeset
206 'sequence':feature.extract(seq_record.seq),'pseudo': False,
cc961e057668 Uploaded
rijst
parents:
diff changeset
207 'locus_tag':'','gene_name':'','product':'',
cc961e057668 Uploaded
rijst
parents:
diff changeset
208 'start':int(feature.location._start.position),
cc961e057668 Uploaded
rijst
parents:
diff changeset
209 'end':int(feature.location._end.position)}
cc961e057668 Uploaded
rijst
parents:
diff changeset
210 if 'pseudo' in feature.qualifiers:
cc961e057668 Uploaded
rijst
parents:
diff changeset
211 properties['pseudo'] = True
cc961e057668 Uploaded
rijst
parents:
diff changeset
212 properties['type'] = 'pseudogene'
cc961e057668 Uploaded
rijst
parents:
diff changeset
213 properties['pure_seq'] = properties['sequence']
cc961e057668 Uploaded
rijst
parents:
diff changeset
214 if properties['strand'] == -1:
cc961e057668 Uploaded
rijst
parents:
diff changeset
215 properties['sequence'] = seq_record.seq[feature.location._start.position:feature.location._end.position].reverse_complement()
cc961e057668 Uploaded
rijst
parents:
diff changeset
216 else:
cc961e057668 Uploaded
rijst
parents:
diff changeset
217 properties['sequence'] = seq_record.seq[feature.location._start.position:feature.location._end.position]
cc961e057668 Uploaded
rijst
parents:
diff changeset
218 if feature.sub_features: properties['subfeats'] = feature.sub_features
cc961e057668 Uploaded
rijst
parents:
diff changeset
219 if 'locus_tag' in feature.qualifiers: properties['locus_tag'] = feature.qualifiers['locus_tag'][0]
cc961e057668 Uploaded
rijst
parents:
diff changeset
220 if 'gene' in feature.qualifiers: properties['gene_name']= feature.qualifiers['gene'][0]
cc961e057668 Uploaded
rijst
parents:
diff changeset
221 if feature.type in ['tRNA','rRNA','CDS']: properties['product'] = feature.qualifiers['product'][0]
cc961e057668 Uploaded
rijst
parents:
diff changeset
222
cc961e057668 Uploaded
rijst
parents:
diff changeset
223 return properties
cc961e057668 Uploaded
rijst
parents:
diff changeset
224
cc961e057668 Uploaded
rijst
parents:
diff changeset
225 # Read embl/genbank file for information on sequence features;
cc961e057668 Uploaded
rijst
parents:
diff changeset
226 try:
cc961e057668 Uploaded
rijst
parents:
diff changeset
227 seq_record = SeqIO.parse(file_ref, filetype_reference).next()
cc961e057668 Uploaded
rijst
parents:
diff changeset
228 except:
cc961e057668 Uploaded
rijst
parents:
diff changeset
229 file_ref.close()
cc961e057668 Uploaded
rijst
parents:
diff changeset
230 quit("Error reading "+sys.argv[2]+", please check file for errors.")
cc961e057668 Uploaded
rijst
parents:
diff changeset
231 file_ref.close()
cc961e057668 Uploaded
rijst
parents:
diff changeset
232
cc961e057668 Uploaded
rijst
parents:
diff changeset
233 # Loop through genome features and save relevant properties;
cc961e057668 Uploaded
rijst
parents:
diff changeset
234 feats = []# Dictionary of properties;
cc961e057668 Uploaded
rijst
parents:
diff changeset
235
cc961e057668 Uploaded
rijst
parents:
diff changeset
236 feature_types = {'intergenic':0,'gene':0,'pseudogene':0}
cc961e057668 Uploaded
rijst
parents:
diff changeset
237 feat_temp_store = ''
cc961e057668 Uploaded
rijst
parents:
diff changeset
238 for feature in seq_record.features:
cc961e057668 Uploaded
rijst
parents:
diff changeset
239 # Check if gene is defined as other feature (e.g. CDS). Else, save info from 'gene';
cc961e057668 Uploaded
rijst
parents:
diff changeset
240 if feat_temp_store != '':
cc961e057668 Uploaded
rijst
parents:
diff changeset
241 if (feature.location._start.position == feat_temp_store.location._start.position and
cc961e057668 Uploaded
rijst
parents:
diff changeset
242 feature.location._end.position == feat_temp_store.location._end.position):# Gene also defined as other feature;
cc961e057668 Uploaded
rijst
parents:
diff changeset
243 feat_temp_store = ''
cc961e057668 Uploaded
rijst
parents:
diff changeset
244 else:# Gene not also defined as CDS;
cc961e057668 Uploaded
rijst
parents:
diff changeset
245 feats.append(feature_props(feat_temp_store))
cc961e057668 Uploaded
rijst
parents:
diff changeset
246 feat_temp_store = ''
cc961e057668 Uploaded
rijst
parents:
diff changeset
247 elif feature.type == 'gene':
cc961e057668 Uploaded
rijst
parents:
diff changeset
248 feat_temp_store = feature
cc961e057668 Uploaded
rijst
parents:
diff changeset
249
cc961e057668 Uploaded
rijst
parents:
diff changeset
250 if not feature.type in ['source','gene','misc_feature']:
cc961e057668 Uploaded
rijst
parents:
diff changeset
251 if not feature.type in feature_types and feature.type != 'CDS': feature_types[feature.type] = 0
cc961e057668 Uploaded
rijst
parents:
diff changeset
252 feats.append(feature_props(feature))
cc961e057668 Uploaded
rijst
parents:
diff changeset
253
cc961e057668 Uploaded
rijst
parents:
diff changeset
254
cc961e057668 Uploaded
rijst
parents:
diff changeset
255 feat_props = sorted(feats, key=lambda cells:int(cells['start']))
cc961e057668 Uploaded
rijst
parents:
diff changeset
256 feat_boundaries = [{'start':item['start'],'end':item['end']} for item in feat_props]
cc961e057668 Uploaded
rijst
parents:
diff changeset
257 regions = region_calc(feat_boundaries,len(seq_record.seq))
cc961e057668 Uploaded
rijst
parents:
diff changeset
258 feat_overlap = overlap_calc(regions)
cc961e057668 Uploaded
rijst
parents:
diff changeset
259
cc961e057668 Uploaded
rijst
parents:
diff changeset
260 reference_loaded = time.clock()
cc961e057668 Uploaded
rijst
parents:
diff changeset
261
cc961e057668 Uploaded
rijst
parents:
diff changeset
262 # Create array of SNPs from input file for processing;
cc961e057668 Uploaded
rijst
parents:
diff changeset
263 lines = [line.split('\t') for line in file_snps if line.strip()]
cc961e057668 Uploaded
rijst
parents:
diff changeset
264 file_snps.close()
cc961e057668 Uploaded
rijst
parents:
diff changeset
265 # First line contains headers, extract number of strains etc;
cc961e057668 Uploaded
rijst
parents:
diff changeset
266 headers = lines[0]
cc961e057668 Uploaded
rijst
parents:
diff changeset
267 snp_table = sorted(lines[1:], key=lambda cells:int(cells[0]))
cc961e057668 Uploaded
rijst
parents:
diff changeset
268
cc961e057668 Uploaded
rijst
parents:
diff changeset
269 snps_loaded = time.clock()
cc961e057668 Uploaded
rijst
parents:
diff changeset
270
cc961e057668 Uploaded
rijst
parents:
diff changeset
271 # Print output file headers;
cc961e057668 Uploaded
rijst
parents:
diff changeset
272 headers[-1] = headers[-1].rstrip()# Remove newline character;
cc961e057668 Uploaded
rijst
parents:
diff changeset
273 strain_nr = len(headers)-1
cc961e057668 Uploaded
rijst
parents:
diff changeset
274 strains_found = 'Found '+str(strain_nr)+' strains: '+headers[1]+' (reference)'
cc961e057668 Uploaded
rijst
parents:
diff changeset
275 first_line = '\t'+headers[1]+'\t'*9
cc961e057668 Uploaded
rijst
parents:
diff changeset
276 second_line = 'Position\tFeature\tLocus tag\tGene\tProduct\tStart\tEnd\tStrand\tRef. base\tRef. codon\tRef. res.'
cc961e057668 Uploaded
rijst
parents:
diff changeset
277 straininfo = [0]*(len(headers[2:]))
cc961e057668 Uploaded
rijst
parents:
diff changeset
278 for i,strain in enumerate(headers[2:]):
cc961e057668 Uploaded
rijst
parents:
diff changeset
279 straininfo[i] = {'snps':0,'mnps':0,'synonymous':0,'nonsynonymous':0,'nonstart':0,'nonstop':0,'nonsense':0}
cc961e057668 Uploaded
rijst
parents:
diff changeset
280 straininfo[i].update(feature_types)
cc961e057668 Uploaded
rijst
parents:
diff changeset
281 strains_found += ', '+strain
cc961e057668 Uploaded
rijst
parents:
diff changeset
282 first_line += '\t\t'+strain+'\t'*3
cc961e057668 Uploaded
rijst
parents:
diff changeset
283 second_line += '\t\tSNP type\tNew base\tNew codon\tNew res.'
cc961e057668 Uploaded
rijst
parents:
diff changeset
284
cc961e057668 Uploaded
rijst
parents:
diff changeset
285 file_out.write(first_line+'\n'+second_line)
cc961e057668 Uploaded
rijst
parents:
diff changeset
286 file_out.flush()
cc961e057668 Uploaded
rijst
parents:
diff changeset
287
cc961e057668 Uploaded
rijst
parents:
diff changeset
288 # Loop through SNPs from array and process them;
cc961e057668 Uploaded
rijst
parents:
diff changeset
289 props = {}# Properties of a feature;
cc961e057668 Uploaded
rijst
parents:
diff changeset
290 prev_snp = ''# Position of previous SNP;
cc961e057668 Uploaded
rijst
parents:
diff changeset
291 to_write = []# Information of current SNP;
cc961e057668 Uploaded
rijst
parents:
diff changeset
292 compl_bases = {'a':'t','t':'a','g':'c','c':'g'}
cc961e057668 Uploaded
rijst
parents:
diff changeset
293 firstsnp = True# First snp of region, or of codon in cases of 3 positions in codon mutated;
cc961e057668 Uploaded
rijst
parents:
diff changeset
294 prev_start=j=k=0
cc961e057668 Uploaded
rijst
parents:
diff changeset
295 overlap_snps = []
cc961e057668 Uploaded
rijst
parents:
diff changeset
296 codon = ['']*5# Array of codon positions. First item is position of first base of codon in the gene;
cc961e057668 Uploaded
rijst
parents:
diff changeset
297
cc961e057668 Uploaded
rijst
parents:
diff changeset
298 for region in regions:
cc961e057668 Uploaded
rijst
parents:
diff changeset
299 firstsnp = True
cc961e057668 Uploaded
rijst
parents:
diff changeset
300 i = prev_start
cc961e057668 Uploaded
rijst
parents:
diff changeset
301 while i < len(snp_table):# Loop through SNPs
cc961e057668 Uploaded
rijst
parents:
diff changeset
302 snp_entry = snp_table[i]
cc961e057668 Uploaded
rijst
parents:
diff changeset
303 if not str(snp_entry[0]).isdigit():# Not a valid line, skip;
cc961e057668 Uploaded
rijst
parents:
diff changeset
304 i += 1
cc961e057668 Uploaded
rijst
parents:
diff changeset
305 continue
cc961e057668 Uploaded
rijst
parents:
diff changeset
306
cc961e057668 Uploaded
rijst
parents:
diff changeset
307 pos = int(snp_entry[0])-1
cc961e057668 Uploaded
rijst
parents:
diff changeset
308 if pos < region[0]:# Not inside region yet;
cc961e057668 Uploaded
rijst
parents:
diff changeset
309 i += 1
cc961e057668 Uploaded
rijst
parents:
diff changeset
310 continue
cc961e057668 Uploaded
rijst
parents:
diff changeset
311 elif firstsnp and pos < region[1]:
cc961e057668 Uploaded
rijst
parents:
diff changeset
312 prev_start = i
cc961e057668 Uploaded
rijst
parents:
diff changeset
313 elif pos >= region[1]:# End of region, process and next;
cc961e057668 Uploaded
rijst
parents:
diff changeset
314 if not firstsnp and codon != ['','','','','']:
cc961e057668 Uploaded
rijst
parents:
diff changeset
315 codon_process(codon)
cc961e057668 Uploaded
rijst
parents:
diff changeset
316 break
cc961e057668 Uploaded
rijst
parents:
diff changeset
317
cc961e057668 Uploaded
rijst
parents:
diff changeset
318 # Documentation of SNPs in feature overlaps;
cc961e057668 Uploaded
rijst
parents:
diff changeset
319 while j < len(feat_overlap)-1 and pos > feat_overlap[j][1]: j += 1
cc961e057668 Uploaded
rijst
parents:
diff changeset
320 k = j
cc961e057668 Uploaded
rijst
parents:
diff changeset
321 while k < len(feat_overlap) and pos >= feat_overlap[k][0]:
cc961e057668 Uploaded
rijst
parents:
diff changeset
322 if pos < feat_overlap[k][1]:
cc961e057668 Uploaded
rijst
parents:
diff changeset
323 if feat_overlap[k][4][0] == 0:
cc961e057668 Uploaded
rijst
parents:
diff changeset
324 feat_overlap[k][4][0] = pos
cc961e057668 Uploaded
rijst
parents:
diff changeset
325 feat_overlap[k][4][1] = pos
cc961e057668 Uploaded
rijst
parents:
diff changeset
326 k += 1
cc961e057668 Uploaded
rijst
parents:
diff changeset
327
cc961e057668 Uploaded
rijst
parents:
diff changeset
328
cc961e057668 Uploaded
rijst
parents:
diff changeset
329 snp_entry[-1] = snp_entry[-1].rstrip()# Remove newline character at end of line;
cc961e057668 Uploaded
rijst
parents:
diff changeset
330 mnp=in_feat=False
cc961e057668 Uploaded
rijst
parents:
diff changeset
331 snp_feat = region[2]
cc961e057668 Uploaded
rijst
parents:
diff changeset
332 ref_base = snp_entry[1]
cc961e057668 Uploaded
rijst
parents:
diff changeset
333
cc961e057668 Uploaded
rijst
parents:
diff changeset
334 to_write = [[pos+1]]
cc961e057668 Uploaded
rijst
parents:
diff changeset
335
cc961e057668 Uploaded
rijst
parents:
diff changeset
336 # Output feature properties and reference situation;
cc961e057668 Uploaded
rijst
parents:
diff changeset
337 if snp_feat == -1:
cc961e057668 Uploaded
rijst
parents:
diff changeset
338 codon = ['']*5
cc961e057668 Uploaded
rijst
parents:
diff changeset
339 to_write.append(['intergenic','','','','','','',ref_base.upper(),'',''])
cc961e057668 Uploaded
rijst
parents:
diff changeset
340 elif feat_props[snp_feat]['type'] not in ['CDS','gene','pseudogene']:# In feature, but non-coding;
cc961e057668 Uploaded
rijst
parents:
diff changeset
341 codon = ['']*5
cc961e057668 Uploaded
rijst
parents:
diff changeset
342 props = feat_props[snp_feat]
cc961e057668 Uploaded
rijst
parents:
diff changeset
343 if props['strand'] == -1: ref_base = (compl_bases[snp_entry[1].lower()])
cc961e057668 Uploaded
rijst
parents:
diff changeset
344 else: ref_base = snp_entry[1]
cc961e057668 Uploaded
rijst
parents:
diff changeset
345 to_write.append([props['type'],props['locus_tag'],props['gene_name'],
cc961e057668 Uploaded
rijst
parents:
diff changeset
346 props['product'],props['start']+1,props['end'],
cc961e057668 Uploaded
rijst
parents:
diff changeset
347 '',ref_base.upper(),'',''])
cc961e057668 Uploaded
rijst
parents:
diff changeset
348 else:# in CDS/gene feature, check codon etc;
cc961e057668 Uploaded
rijst
parents:
diff changeset
349 props = feat_props[snp_feat]
cc961e057668 Uploaded
rijst
parents:
diff changeset
350 sequence = props['sequence']
cc961e057668 Uploaded
rijst
parents:
diff changeset
351 if props['strand'] == -1:
cc961e057668 Uploaded
rijst
parents:
diff changeset
352 pos_in_gene = props['end'] - pos - 1# Python counting
cc961e057668 Uploaded
rijst
parents:
diff changeset
353 ref_base = (compl_bases[snp_entry[1].lower()])
cc961e057668 Uploaded
rijst
parents:
diff changeset
354 else:
cc961e057668 Uploaded
rijst
parents:
diff changeset
355 pos_in_gene = pos - props['start']# Python counting
cc961e057668 Uploaded
rijst
parents:
diff changeset
356 ref_base = snp_entry[1]
cc961e057668 Uploaded
rijst
parents:
diff changeset
357
cc961e057668 Uploaded
rijst
parents:
diff changeset
358 in_feat = True
cc961e057668 Uploaded
rijst
parents:
diff changeset
359 if props['pseudo'] and 'subfeats' in props:# Pseudogene that needs special attention;
cc961e057668 Uploaded
rijst
parents:
diff changeset
360 in_feat = False
cc961e057668 Uploaded
rijst
parents:
diff changeset
361 subfeat_boundaries = [{'start':item.location._start.position,'end':item.location._end.position}
cc961e057668 Uploaded
rijst
parents:
diff changeset
362 for item in props['subfeats']]
cc961e057668 Uploaded
rijst
parents:
diff changeset
363 snp_subfeat = match_feature(subfeat_boundaries,pos)
cc961e057668 Uploaded
rijst
parents:
diff changeset
364 if snp_subfeat != -1:
cc961e057668 Uploaded
rijst
parents:
diff changeset
365 in_feat = True
cc961e057668 Uploaded
rijst
parents:
diff changeset
366 pos_in_gene -= non_coding_calc({'start':props['start'],'subfeats':props['subfeats'],
cc961e057668 Uploaded
rijst
parents:
diff changeset
367 'pseudo':True,'strand':props['strand']},pos)
cc961e057668 Uploaded
rijst
parents:
diff changeset
368 sequence = props['pure_seq']
cc961e057668 Uploaded
rijst
parents:
diff changeset
369
cc961e057668 Uploaded
rijst
parents:
diff changeset
370 if not in_feat:# In pseudogene non-coding region;
cc961e057668 Uploaded
rijst
parents:
diff changeset
371 codon = ['']*5
cc961e057668 Uploaded
rijst
parents:
diff changeset
372 to_write.append(['non coding',props['locus_tag'],props['gene_name'],props['product'],
cc961e057668 Uploaded
rijst
parents:
diff changeset
373 props['start']+1,props['end'],props['strand'],ref_base.upper(),
cc961e057668 Uploaded
rijst
parents:
diff changeset
374 '',''])
cc961e057668 Uploaded
rijst
parents:
diff changeset
375 else:# In coding region;
cc961e057668 Uploaded
rijst
parents:
diff changeset
376 pos_in_cod = (pos_in_gene+1)%3
cc961e057668 Uploaded
rijst
parents:
diff changeset
377 if pos_in_cod == 0: pos_in_cod = 3# Remainder of division 0 means 3rd place in codon;
cc961e057668 Uploaded
rijst
parents:
diff changeset
378
cc961e057668 Uploaded
rijst
parents:
diff changeset
379 old_codon = sequence[pos_in_gene-pos_in_cod+1:pos_in_gene-pos_in_cod+4].upper()
cc961e057668 Uploaded
rijst
parents:
diff changeset
380 old_residue = old_codon.translate(table=transl_table)
cc961e057668 Uploaded
rijst
parents:
diff changeset
381 to_write.append([props['type'],props['locus_tag'],props['gene_name'],props['product'],
cc961e057668 Uploaded
rijst
parents:
diff changeset
382 props['start']+1,props['end'],props['strand'],ref_base.upper(),
cc961e057668 Uploaded
rijst
parents:
diff changeset
383 old_codon,old_residue])
cc961e057668 Uploaded
rijst
parents:
diff changeset
384
cc961e057668 Uploaded
rijst
parents:
diff changeset
385 if in_feat and not firstsnp and (pos >= prev_snp):# Check if snp is in same codon as previous snp. Position check for overlapping features;
cc961e057668 Uploaded
rijst
parents:
diff changeset
386 if props['strand'] == 1 and (pos - prev_snp + 1) < pos_in_cod:# Same codon (Positive strand);
cc961e057668 Uploaded
rijst
parents:
diff changeset
387 mnp = True
cc961e057668 Uploaded
rijst
parents:
diff changeset
388 elif props['strand'] == -1 and (pos - prev_snp + 1) <= (3 - pos_in_cod):# Same codon (negative strand);
cc961e057668 Uploaded
rijst
parents:
diff changeset
389 mnp = True
cc961e057668 Uploaded
rijst
parents:
diff changeset
390
cc961e057668 Uploaded
rijst
parents:
diff changeset
391 # Process previous codon if not MNP;
cc961e057668 Uploaded
rijst
parents:
diff changeset
392 if in_feat and not mnp:
cc961e057668 Uploaded
rijst
parents:
diff changeset
393 if not firstsnp:
cc961e057668 Uploaded
rijst
parents:
diff changeset
394 codon_process(codon)
cc961e057668 Uploaded
rijst
parents:
diff changeset
395 codon = [pos_in_gene-pos_in_cod+1,'','','',props['strand']]
cc961e057668 Uploaded
rijst
parents:
diff changeset
396
cc961e057668 Uploaded
rijst
parents:
diff changeset
397
cc961e057668 Uploaded
rijst
parents:
diff changeset
398 for l, snp in enumerate(snp_entry[2:]):# Loop through SNPs/strains;
cc961e057668 Uploaded
rijst
parents:
diff changeset
399
cc961e057668 Uploaded
rijst
parents:
diff changeset
400 snp = snp.lower()
cc961e057668 Uploaded
rijst
parents:
diff changeset
401 if snp == '':# Empty cell;
cc961e057668 Uploaded
rijst
parents:
diff changeset
402 to_write.append(['','','',''])
cc961e057668 Uploaded
rijst
parents:
diff changeset
403 continue
cc961e057668 Uploaded
rijst
parents:
diff changeset
404
cc961e057668 Uploaded
rijst
parents:
diff changeset
405 if snp == '-': # Feature not present in this strain;
cc961e057668 Uploaded
rijst
parents:
diff changeset
406 to_write.append(['Allele missing','','',''])
cc961e057668 Uploaded
rijst
parents:
diff changeset
407 continue
cc961e057668 Uploaded
rijst
parents:
diff changeset
408
cc961e057668 Uploaded
rijst
parents:
diff changeset
409 if snp_feat == -1:# Intergenic;
cc961e057668 Uploaded
rijst
parents:
diff changeset
410 if snp == ref_base.lower():
cc961e057668 Uploaded
rijst
parents:
diff changeset
411 to_write.append(['']*4)
cc961e057668 Uploaded
rijst
parents:
diff changeset
412 else:
cc961e057668 Uploaded
rijst
parents:
diff changeset
413 to_write.append(['',snp,'',''])
cc961e057668 Uploaded
rijst
parents:
diff changeset
414 straininfo[l]['intergenic'] += 1
cc961e057668 Uploaded
rijst
parents:
diff changeset
415 straininfo[l]['snps'] += 1
cc961e057668 Uploaded
rijst
parents:
diff changeset
416 continue
cc961e057668 Uploaded
rijst
parents:
diff changeset
417
cc961e057668 Uploaded
rijst
parents:
diff changeset
418 if props['strand'] == -1:
cc961e057668 Uploaded
rijst
parents:
diff changeset
419 snp = compl_bases[snp]
cc961e057668 Uploaded
rijst
parents:
diff changeset
420
cc961e057668 Uploaded
rijst
parents:
diff changeset
421 if snp == ref_base.lower():
cc961e057668 Uploaded
rijst
parents:
diff changeset
422 to_write.append(['']*4)
cc961e057668 Uploaded
rijst
parents:
diff changeset
423 else:
cc961e057668 Uploaded
rijst
parents:
diff changeset
424 to_write.append(['',snp,'',''])
cc961e057668 Uploaded
rijst
parents:
diff changeset
425 straininfo[l]['snps'] += 1
cc961e057668 Uploaded
rijst
parents:
diff changeset
426 if props['type'] != 'CDS':
cc961e057668 Uploaded
rijst
parents:
diff changeset
427 straininfo[l][props['type']] += 1
cc961e057668 Uploaded
rijst
parents:
diff changeset
428
cc961e057668 Uploaded
rijst
parents:
diff changeset
429
cc961e057668 Uploaded
rijst
parents:
diff changeset
430
cc961e057668 Uploaded
rijst
parents:
diff changeset
431 if props['type'] in ['CDS','gene','pseudogene'] and in_feat:
cc961e057668 Uploaded
rijst
parents:
diff changeset
432 codon[pos_in_cod] = to_write
cc961e057668 Uploaded
rijst
parents:
diff changeset
433 else:
cc961e057668 Uploaded
rijst
parents:
diff changeset
434 write_output(to_write)
cc961e057668 Uploaded
rijst
parents:
diff changeset
435
cc961e057668 Uploaded
rijst
parents:
diff changeset
436 if firstsnp: firstsnp = False
cc961e057668 Uploaded
rijst
parents:
diff changeset
437 prev_snp = pos+1
cc961e057668 Uploaded
rijst
parents:
diff changeset
438 i += 1
cc961e057668 Uploaded
rijst
parents:
diff changeset
439
cc961e057668 Uploaded
rijst
parents:
diff changeset
440
cc961e057668 Uploaded
rijst
parents:
diff changeset
441 if codon != ['','','','','']: codon_process(codon)
cc961e057668 Uploaded
rijst
parents:
diff changeset
442
cc961e057668 Uploaded
rijst
parents:
diff changeset
443 file_out.close()
cc961e057668 Uploaded
rijst
parents:
diff changeset
444
cc961e057668 Uploaded
rijst
parents:
diff changeset
445 end = time.clock()
cc961e057668 Uploaded
rijst
parents:
diff changeset
446
cc961e057668 Uploaded
rijst
parents:
diff changeset
447 file_summary.write("\n")
cc961e057668 Uploaded
rijst
parents:
diff changeset
448 file_summary.write(intro_message)
cc961e057668 Uploaded
rijst
parents:
diff changeset
449 file_summary.write('\n'+strains_found+'.\n')
cc961e057668 Uploaded
rijst
parents:
diff changeset
450
cc961e057668 Uploaded
rijst
parents:
diff changeset
451 file_summary.write("\nFinished annotation. Total time: %s s\n\n" % round(end-start,1))
cc961e057668 Uploaded
rijst
parents:
diff changeset
452
cc961e057668 Uploaded
rijst
parents:
diff changeset
453
cc961e057668 Uploaded
rijst
parents:
diff changeset
454 file_overlap.write('SNP start\tSNP end\tFeature 1\tLocus tag\tProduct\t\tFeature 2\tLocus tag\tProduct')
cc961e057668 Uploaded
rijst
parents:
diff changeset
455 for overlap in feat_overlap:
cc961e057668 Uploaded
rijst
parents:
diff changeset
456 if overlap[4] != [0,0]:
cc961e057668 Uploaded
rijst
parents:
diff changeset
457 overlap[4][0]+=1
cc961e057668 Uploaded
rijst
parents:
diff changeset
458 overlap[4][1]+=1
cc961e057668 Uploaded
rijst
parents:
diff changeset
459 if overlap[4][0] == overlap[4][1]: overlap[4][1] = ''
cc961e057668 Uploaded
rijst
parents:
diff changeset
460 write_output([[str(overlap[4][0])],[str(overlap[4][1]),feat_props[overlap[2]]['type'],feat_props[overlap[2]]['locus_tag'],feat_props[overlap[2]]['product']],
cc961e057668 Uploaded
rijst
parents:
diff changeset
461 [feat_props[overlap[3]]['type'],feat_props[overlap[3]]['locus_tag'],feat_props[overlap[3]]['product']]],
cc961e057668 Uploaded
rijst
parents:
diff changeset
462 file_overlap)
cc961e057668 Uploaded
rijst
parents:
diff changeset
463
cc961e057668 Uploaded
rijst
parents:
diff changeset
464
cc961e057668 Uploaded
rijst
parents:
diff changeset
465 for i,strain in enumerate(headers[2:]):
cc961e057668 Uploaded
rijst
parents:
diff changeset
466 file_summary.write("\n")
cc961e057668 Uploaded
rijst
parents:
diff changeset
467 info = straininfo[i]
cc961e057668 Uploaded
rijst
parents:
diff changeset
468 file_summary.write("+ Strain %s:\n" % strain)
cc961e057668 Uploaded
rijst
parents:
diff changeset
469 file_summary.write(" %s SNPs found\n" % info['snps'])
cc961e057668 Uploaded
rijst
parents:
diff changeset
470 file_summary.write(" Number of SNPs found CDS features: %s\n" % (info['mnps']+info['nonstart']+info['nonstop']+info['nonsense']+
cc961e057668 Uploaded
rijst
parents:
diff changeset
471 info['synonymous']+info['nonsynonymous']))
cc961e057668 Uploaded
rijst
parents:
diff changeset
472 file_summary.write(" (of which in pseudogenes: %s)\n" % info['pseudogene'])
cc961e057668 Uploaded
rijst
parents:
diff changeset
473 file_summary.write(" - MNPs: %s\n" % info['mnps'])
cc961e057668 Uploaded
rijst
parents:
diff changeset
474 file_summary.write(" - Synonymous: %s\n" % info['synonymous'])
cc961e057668 Uploaded
rijst
parents:
diff changeset
475 file_summary.write(" - Nonsynonymous: %s\n" % info['nonsynonymous'])
cc961e057668 Uploaded
rijst
parents:
diff changeset
476 file_summary.write(" - Nonsense: %s\n" % info['nonsense'])
cc961e057668 Uploaded
rijst
parents:
diff changeset
477 file_summary.write(" - Nonstart: %s\n" % info['nonstart'])
cc961e057668 Uploaded
rijst
parents:
diff changeset
478 file_summary.write(" - Nonstop: %s\n" % info['nonstop'])
cc961e057668 Uploaded
rijst
parents:
diff changeset
479 file_summary.write(" Intergenic: %s\n" % info['intergenic'])
cc961e057668 Uploaded
rijst
parents:
diff changeset
480
cc961e057668 Uploaded
rijst
parents:
diff changeset
481 for typ in feature_types:
cc961e057668 Uploaded
rijst
parents:
diff changeset
482 if typ not in ['intergenic','pseudogene'] and info[typ] != 0:
cc961e057668 Uploaded
rijst
parents:
diff changeset
483 file_summary.write(" %s: %s\n" % (typ,info[typ]))
cc961e057668 Uploaded
rijst
parents:
diff changeset
484 file_summary.flush()
cc961e057668 Uploaded
rijst
parents:
diff changeset
485
cc961e057668 Uploaded
rijst
parents:
diff changeset
486 file_overlap.close()
cc961e057668 Uploaded
rijst
parents:
diff changeset
487 file_summary.close()