comparison linelisting.py @ 2:141cbefca027 draft default tip

planemo upload for repository https://github.com/Public-Health-Bioinformatics/flu_classification_suite commit 6f09c69c51ec3d6bd7487f55384b97155355c456
author public-health-bioinformatics
date Mon, 04 Feb 2019 18:35:20 -0500
parents bda72dec1f55
children
comparison
equal deleted inserted replaced
1:8a4ee4be0b7e 2:141cbefca027
170 numPos = len(positions) 170 numPos = len(positions)
171 empty_indicesLine = ',' * numPos 171 empty_indicesLine = ',' * numPos
172 #print column headers for sample sequences 172 #print column headers for sample sequences
173 row4 = "Sequence Name,N,Clade,Extra Substitutions," + empty_indicesLine + "Number of Amino Acid Substitutions in Antigenic Sites,% Identity of Antigenic Site Residues\n" 173 row4 = "Sequence Name,N,Clade,Extra Substitutions," + empty_indicesLine + "Number of Amino Acid Substitutions in Antigenic Sites,% Identity of Antigenic Site Residues\n"
174 lineListFile.write(row4) 174 lineListFile.write(row4)
175 print ("\nREFERENCE ANTIGENIC MAP: '%s' (%i amino acids)" % (record.id, len(record))) 175 print("\nREFERENCE ANTIGENIC MAP: '%s' (%i amino acids)" % (record.id, len(record)))
176 176
177 with open(cladeDefinitionFile,'r') as cladeFile: 177 with open(cladeDefinitionFile,'r') as cladeFile:
178 """Read clade definition file and store clade names in a list.""" 178 """Read clade definition file and store clade names in a list."""
179 #remove whitespace from the end of each line and split elements at commas 179 #remove whitespace from the end of each line and split elements at commas
180 for line in cladeFile: 180 for line in cladeFile:
188 for record in SeqIO.parse(extrAntigMapFile, "fasta", alphabet=IUPAC.protein): 188 for record in SeqIO.parse(extrAntigMapFile, "fasta", alphabet=IUPAC.protein):
189 record = record.upper() 189 record = record.upper()
190 seqList.append(record) #add Seq to list of Sequences 190 seqList.append(record) #add Seq to list of Sequences
191 191
192 #print number of sequences to be process as user check 192 #print number of sequences to be process as user check
193 print "\nCOMPARING %i flu antigenic map sequences to the reference..." % len(seqList) 193 print("\nCOMPARING %i flu antigenic map sequences to the reference..." % len(seqList))
194 #parse each antigenic map sequence object 194 #parse each antigenic map sequence object
195 for record in seqList: 195 for record in seqList:
196 #assign Sequence to dictionaries according to location in name 196 #assign Sequence to dictionaries according to location in name
197 sort_by_location(record) 197 sort_by_location(record)
198 #sort dictionary keys that access province-segregated lists 198 #sort dictionary keys that access province-segregated lists
199 sorted_segregated_list_keys = sorted(segregated_lists.keys()) 199 sorted_segregated_list_keys = sorted(segregated_lists.keys())
200 print "\nSequence Lists Sorted by Province: " 200 print("\nSequence Lists Sorted by Province: ")
201 #process each province-segregated SeqRecord list 201 #process each province-segregated SeqRecord list
202 for listname in sorted_segregated_list_keys: 202 for listname in sorted_segregated_list_keys:
203 #acesss list of sequences by the listname key 203 #acesss list of sequences by the listname key
204 a_list = segregated_lists[listname] 204 a_list = segregated_lists[listname]
205 # sort original SeqRecords by record id (i.e. name) 205 # sort original SeqRecords by record id (i.e. name)
208 for record in a_list: 208 for record in a_list:
209 #replace matching amino acid symbols with dots 209 #replace matching amino acid symbols with dots
210 rec = replace_matching_aa_with_dot(record) 210 rec = replace_matching_aa_with_dot(record)
211 mod_list.append(rec) #populate a list of modified records 211 mod_list.append(rec) #populate a list of modified records
212 segregated_lists[listname] = mod_list 212 segregated_lists[listname] = mod_list
213 print "\n'%s' List (Amino Acids identical to Reference Masked): " % (listname) 213 print("\n'%s' List (Amino Acids identical to Reference Masked): " % (listname))
214 #output the list to csv as non-aggregated linelist 214 #output the list to csv as non-aggregated linelist
215 output_linelist(segregated_lists[listname]) 215 output_linelist(segregated_lists[listname])
216 216
217 extrAntigMapFile.close() 217 extrAntigMapFile.close()
218 refMapFile.close() 218 refMapFile.close()