Mercurial > repos > public-health-bioinformatics > linelisting
comparison linelisting.py @ 2:141cbefca027 draft default tip
planemo upload for repository https://github.com/Public-Health-Bioinformatics/flu_classification_suite commit 6f09c69c51ec3d6bd7487f55384b97155355c456
| author | public-health-bioinformatics |
|---|---|
| date | Mon, 04 Feb 2019 18:35:20 -0500 |
| parents | bda72dec1f55 |
| children |
comparison
equal
deleted
inserted
replaced
| 1:8a4ee4be0b7e | 2:141cbefca027 |
|---|---|
| 170 numPos = len(positions) | 170 numPos = len(positions) |
| 171 empty_indicesLine = ',' * numPos | 171 empty_indicesLine = ',' * numPos |
| 172 #print column headers for sample sequences | 172 #print column headers for sample sequences |
| 173 row4 = "Sequence Name,N,Clade,Extra Substitutions," + empty_indicesLine + "Number of Amino Acid Substitutions in Antigenic Sites,% Identity of Antigenic Site Residues\n" | 173 row4 = "Sequence Name,N,Clade,Extra Substitutions," + empty_indicesLine + "Number of Amino Acid Substitutions in Antigenic Sites,% Identity of Antigenic Site Residues\n" |
| 174 lineListFile.write(row4) | 174 lineListFile.write(row4) |
| 175 print ("\nREFERENCE ANTIGENIC MAP: '%s' (%i amino acids)" % (record.id, len(record))) | 175 print("\nREFERENCE ANTIGENIC MAP: '%s' (%i amino acids)" % (record.id, len(record))) |
| 176 | 176 |
| 177 with open(cladeDefinitionFile,'r') as cladeFile: | 177 with open(cladeDefinitionFile,'r') as cladeFile: |
| 178 """Read clade definition file and store clade names in a list.""" | 178 """Read clade definition file and store clade names in a list.""" |
| 179 #remove whitespace from the end of each line and split elements at commas | 179 #remove whitespace from the end of each line and split elements at commas |
| 180 for line in cladeFile: | 180 for line in cladeFile: |
| 188 for record in SeqIO.parse(extrAntigMapFile, "fasta", alphabet=IUPAC.protein): | 188 for record in SeqIO.parse(extrAntigMapFile, "fasta", alphabet=IUPAC.protein): |
| 189 record = record.upper() | 189 record = record.upper() |
| 190 seqList.append(record) #add Seq to list of Sequences | 190 seqList.append(record) #add Seq to list of Sequences |
| 191 | 191 |
| 192 #print number of sequences to be process as user check | 192 #print number of sequences to be process as user check |
| 193 print "\nCOMPARING %i flu antigenic map sequences to the reference..." % len(seqList) | 193 print("\nCOMPARING %i flu antigenic map sequences to the reference..." % len(seqList)) |
| 194 #parse each antigenic map sequence object | 194 #parse each antigenic map sequence object |
| 195 for record in seqList: | 195 for record in seqList: |
| 196 #assign Sequence to dictionaries according to location in name | 196 #assign Sequence to dictionaries according to location in name |
| 197 sort_by_location(record) | 197 sort_by_location(record) |
| 198 #sort dictionary keys that access province-segregated lists | 198 #sort dictionary keys that access province-segregated lists |
| 199 sorted_segregated_list_keys = sorted(segregated_lists.keys()) | 199 sorted_segregated_list_keys = sorted(segregated_lists.keys()) |
| 200 print "\nSequence Lists Sorted by Province: " | 200 print("\nSequence Lists Sorted by Province: ") |
| 201 #process each province-segregated SeqRecord list | 201 #process each province-segregated SeqRecord list |
| 202 for listname in sorted_segregated_list_keys: | 202 for listname in sorted_segregated_list_keys: |
| 203 #acesss list of sequences by the listname key | 203 #acesss list of sequences by the listname key |
| 204 a_list = segregated_lists[listname] | 204 a_list = segregated_lists[listname] |
| 205 # sort original SeqRecords by record id (i.e. name) | 205 # sort original SeqRecords by record id (i.e. name) |
| 208 for record in a_list: | 208 for record in a_list: |
| 209 #replace matching amino acid symbols with dots | 209 #replace matching amino acid symbols with dots |
| 210 rec = replace_matching_aa_with_dot(record) | 210 rec = replace_matching_aa_with_dot(record) |
| 211 mod_list.append(rec) #populate a list of modified records | 211 mod_list.append(rec) #populate a list of modified records |
| 212 segregated_lists[listname] = mod_list | 212 segregated_lists[listname] = mod_list |
| 213 print "\n'%s' List (Amino Acids identical to Reference Masked): " % (listname) | 213 print("\n'%s' List (Amino Acids identical to Reference Masked): " % (listname)) |
| 214 #output the list to csv as non-aggregated linelist | 214 #output the list to csv as non-aggregated linelist |
| 215 output_linelist(segregated_lists[listname]) | 215 output_linelist(segregated_lists[listname]) |
| 216 | 216 |
| 217 extrAntigMapFile.close() | 217 extrAntigMapFile.close() |
| 218 refMapFile.close() | 218 refMapFile.close() |
