Mercurial > repos > public-health-bioinformatics > linelisting
comparison linelisting.py @ 2:141cbefca027 draft default tip
planemo upload for repository https://github.com/Public-Health-Bioinformatics/flu_classification_suite commit 6f09c69c51ec3d6bd7487f55384b97155355c456
author | public-health-bioinformatics |
---|---|
date | Mon, 04 Feb 2019 18:35:20 -0500 |
parents | bda72dec1f55 |
children |
comparison
equal
deleted
inserted
replaced
1:8a4ee4be0b7e | 2:141cbefca027 |
---|---|
170 numPos = len(positions) | 170 numPos = len(positions) |
171 empty_indicesLine = ',' * numPos | 171 empty_indicesLine = ',' * numPos |
172 #print column headers for sample sequences | 172 #print column headers for sample sequences |
173 row4 = "Sequence Name,N,Clade,Extra Substitutions," + empty_indicesLine + "Number of Amino Acid Substitutions in Antigenic Sites,% Identity of Antigenic Site Residues\n" | 173 row4 = "Sequence Name,N,Clade,Extra Substitutions," + empty_indicesLine + "Number of Amino Acid Substitutions in Antigenic Sites,% Identity of Antigenic Site Residues\n" |
174 lineListFile.write(row4) | 174 lineListFile.write(row4) |
175 print ("\nREFERENCE ANTIGENIC MAP: '%s' (%i amino acids)" % (record.id, len(record))) | 175 print("\nREFERENCE ANTIGENIC MAP: '%s' (%i amino acids)" % (record.id, len(record))) |
176 | 176 |
177 with open(cladeDefinitionFile,'r') as cladeFile: | 177 with open(cladeDefinitionFile,'r') as cladeFile: |
178 """Read clade definition file and store clade names in a list.""" | 178 """Read clade definition file and store clade names in a list.""" |
179 #remove whitespace from the end of each line and split elements at commas | 179 #remove whitespace from the end of each line and split elements at commas |
180 for line in cladeFile: | 180 for line in cladeFile: |
188 for record in SeqIO.parse(extrAntigMapFile, "fasta", alphabet=IUPAC.protein): | 188 for record in SeqIO.parse(extrAntigMapFile, "fasta", alphabet=IUPAC.protein): |
189 record = record.upper() | 189 record = record.upper() |
190 seqList.append(record) #add Seq to list of Sequences | 190 seqList.append(record) #add Seq to list of Sequences |
191 | 191 |
192 #print number of sequences to be process as user check | 192 #print number of sequences to be process as user check |
193 print "\nCOMPARING %i flu antigenic map sequences to the reference..." % len(seqList) | 193 print("\nCOMPARING %i flu antigenic map sequences to the reference..." % len(seqList)) |
194 #parse each antigenic map sequence object | 194 #parse each antigenic map sequence object |
195 for record in seqList: | 195 for record in seqList: |
196 #assign Sequence to dictionaries according to location in name | 196 #assign Sequence to dictionaries according to location in name |
197 sort_by_location(record) | 197 sort_by_location(record) |
198 #sort dictionary keys that access province-segregated lists | 198 #sort dictionary keys that access province-segregated lists |
199 sorted_segregated_list_keys = sorted(segregated_lists.keys()) | 199 sorted_segregated_list_keys = sorted(segregated_lists.keys()) |
200 print "\nSequence Lists Sorted by Province: " | 200 print("\nSequence Lists Sorted by Province: ") |
201 #process each province-segregated SeqRecord list | 201 #process each province-segregated SeqRecord list |
202 for listname in sorted_segregated_list_keys: | 202 for listname in sorted_segregated_list_keys: |
203 #acesss list of sequences by the listname key | 203 #acesss list of sequences by the listname key |
204 a_list = segregated_lists[listname] | 204 a_list = segregated_lists[listname] |
205 # sort original SeqRecords by record id (i.e. name) | 205 # sort original SeqRecords by record id (i.e. name) |
208 for record in a_list: | 208 for record in a_list: |
209 #replace matching amino acid symbols with dots | 209 #replace matching amino acid symbols with dots |
210 rec = replace_matching_aa_with_dot(record) | 210 rec = replace_matching_aa_with_dot(record) |
211 mod_list.append(rec) #populate a list of modified records | 211 mod_list.append(rec) #populate a list of modified records |
212 segregated_lists[listname] = mod_list | 212 segregated_lists[listname] = mod_list |
213 print "\n'%s' List (Amino Acids identical to Reference Masked): " % (listname) | 213 print("\n'%s' List (Amino Acids identical to Reference Masked): " % (listname)) |
214 #output the list to csv as non-aggregated linelist | 214 #output the list to csv as non-aggregated linelist |
215 output_linelist(segregated_lists[listname]) | 215 output_linelist(segregated_lists[listname]) |
216 | 216 |
217 extrAntigMapFile.close() | 217 extrAntigMapFile.close() |
218 refMapFile.close() | 218 refMapFile.close() |