aggregate_linelisting: aggregate_linelisting.py comparison

comparison aggregate_linelisting.py @ 3:45a01281f796 draft default tip

planemo upload for repository https://github.com/Public-Health-Bioinformatics/flu_classification_suite commit 6f09c69c51ec3d6bd7487f55384b97155355c456

author	public-health-bioinformatics
date	Mon, 04 Feb 2019 18:33:05 -0500
parents	91c7d74bc709
children

comparison

equal deleted inserted replaced

-:eb0701da22d1
+:45a01281f796
 csv_seq = ",".join(sequence) +","
 comma_sep_output = name_part + N_part + clade_part + col + csv_seq + substitutions_part + percID_part + "\n"
 #write first member of unique sequence list to csv
 agg_lineListFile.write(comma_sep_output)
 #print sequence records in sequevar to console
-print "\n\t\t%i SeqRecords matching Sequevar: %s" % (len(listOfSeqs), u)
+print("\n\t\t%i SeqRecords matching Sequevar: %s" % (len(listOfSeqs), u))
 #to uncollapse sequevar group, print each member of the sequevar list to csv output
 '''for i in range(1,len(listOfSeqs)):
 currentRec = listOfSeqs[i]
 province = extract_province(currentRec)
 numPos = len(positions)
 empty_indicesLine = ',' * numPos
 #print column headers for sample sequences
 row4 = "Sequence Name,N,Clade,Extra Substitutions," + empty_indicesLine + "Number of Amino Acid Substitutions in Antigenic Sites,% Identity of Antigenic Site Residues\n"
 agg_lineListFile.write(row4)
-print ("\nREFERENCE ANTIGENIC MAP: '%s' (%i amino acids)" % (record.id, len(record)))
+print("\nREFERENCE ANTIGENIC MAP: '%s' (%i amino acids)" % (record.id, len(record)))
 with open(cladeDefinitionFile,'r') as cladeFile:
 """Read clade definition file and store clade names in list."""
 #remove whitespace from the end of each line and split elements at commas
 for line in cladeFile:
 for record in SeqIO.parse(extrAntigMapFile, "fasta", alphabet=IUPAC.protein):
 record = record.upper()
 seqList.append(record) #add Seq to list of Sequences
 #print number of sequences to be processed as user check
-print "\nCOMPARING %i flu antigenic map sequences to the reference..." % len(seqList)
+print("\nCOMPARING %i flu antigenic map sequences to the reference..." % len(seqList))
 for record in seqList:
 #assign SeqRecords to province-specific dictionaries
 sort_by_location(record)
 #access prov segregated lists in order
 sorted_prov_keys = sorted(prov_lists.keys())
-print "\nSequence Lists Sorted by Province: "
+print("\nSequence Lists Sorted by Province: ")
 for prov in sorted_prov_keys:
 current_list = prov_lists[prov]
 #mask AA's identical to reference sequence with dot
 masked_list = [] # empty temporary list to park masked sequences
 for record in current_list:
 #group sequences in province-sorted list into clades
 for prov in sorted_prov_keys:
 prov_list = prov_lists[prov]
 by_clades_dict = {} #empty dict for clade:seqRecord list groups
-print "\n'%s' List (Amino Acids identical to Reference are Masked): " % (prov)
+print("\n'%s' List (Amino Acids identical to Reference are Masked): " % (prov))
 for rec in prov_list:
 clade = extract_clade(rec)
 if clade in by_clades_dict:
 #if clade already in dict as key, append record to list (value)
 by_clades_dict[clade].append(rec)
 else: #add clade as key to dict, value is list of 1 SeqRecord
 by_clades_dict[clade] = [rec]
 #get list of alphabetically sorted clade keys
 sorted_clade_keys = sorted(by_clades_dict.keys())
-print "\tNumber of clades: ", len(by_clades_dict)
+print("\tNumber of clades: ", len(by_clades_dict))
 #group each list of sequences in clade by sequevars
 for key in sorted_clade_keys:
-print "\n\tCLADE: %s Number of Members: %i" % (key, len(by_clades_dict[key]))
+print("\n\tCLADE: %s Number of Members: %i" % (key, len(by_clades_dict[key])))
 a_list = by_clades_dict[key]
 for seqrec in a_list:
-print "\t %s: %s" %(seqrec.id,str(seqrec.seq))
+print("\t %s: %s" %(seqrec.id,str(seqrec.seq)))
 #output the list to csv as aggregated linelist
 output_aggregated_linelist(a_list)
 print("Aggregated Linelist written to file: '%s\n'"  % (outFileHandle))
 extrAntigMapFile.close()

Mercurial > repos > public-health-bioinformatics > aggregate_linelisting

comparison aggregate_linelisting.py @ 3:45a01281f796 draft default tip