Mercurial > repos > public-health-bioinformatics > aggregate_linelisting
diff aggregate_linelisting.py @ 3:45a01281f796 draft default tip
planemo upload for repository https://github.com/Public-Health-Bioinformatics/flu_classification_suite commit 6f09c69c51ec3d6bd7487f55384b97155355c456
author | public-health-bioinformatics |
---|---|
date | Mon, 04 Feb 2019 18:33:05 -0500 |
parents | 91c7d74bc709 |
children |
line wrap: on
line diff
--- a/aggregate_linelisting.py Thu Jan 17 19:08:33 2019 -0500 +++ b/aggregate_linelisting.py Mon Feb 04 18:33:05 2019 -0500 @@ -160,7 +160,7 @@ #write first member of unique sequence list to csv agg_lineListFile.write(comma_sep_output) #print sequence records in sequevar to console - print "\n\t\t%i SeqRecords matching Sequevar: %s" % (len(listOfSeqs), u) + print("\n\t\t%i SeqRecords matching Sequevar: %s" % (len(listOfSeqs), u)) #to uncollapse sequevar group, print each member of the sequevar list to csv output '''for i in range(1,len(listOfSeqs)): @@ -214,7 +214,7 @@ #print column headers for sample sequences row4 = "Sequence Name,N,Clade,Extra Substitutions," + empty_indicesLine + "Number of Amino Acid Substitutions in Antigenic Sites,% Identity of Antigenic Site Residues\n" agg_lineListFile.write(row4) - print ("\nREFERENCE ANTIGENIC MAP: '%s' (%i amino acids)" % (record.id, len(record))) + print("\nREFERENCE ANTIGENIC MAP: '%s' (%i amino acids)" % (record.id, len(record))) with open(cladeDefinitionFile,'r') as cladeFile: """Read clade definition file and store clade names in list.""" @@ -232,14 +232,14 @@ seqList.append(record) #add Seq to list of Sequences #print number of sequences to be processed as user check -print "\nCOMPARING %i flu antigenic map sequences to the reference..." % len(seqList) +print("\nCOMPARING %i flu antigenic map sequences to the reference..." % len(seqList)) for record in seqList: #assign SeqRecords to province-specific dictionaries sort_by_location(record) #access prov segregated lists in order sorted_prov_keys = sorted(prov_lists.keys()) -print "\nSequence Lists Sorted by Province: " +print("\nSequence Lists Sorted by Province: ") for prov in sorted_prov_keys: current_list = prov_lists[prov] #mask AA's identical to reference sequence with dot @@ -253,7 +253,7 @@ for prov in sorted_prov_keys: prov_list = prov_lists[prov] by_clades_dict = {} #empty dict for clade:seqRecord list groups - print "\n'%s' List (Amino Acids identical to Reference are Masked): " % (prov) + print("\n'%s' List (Amino Acids identical to Reference are Masked): " % (prov)) for rec in prov_list: clade = extract_clade(rec) if clade in by_clades_dict: @@ -263,13 +263,13 @@ by_clades_dict[clade] = [rec] #get list of alphabetically sorted clade keys sorted_clade_keys = sorted(by_clades_dict.keys()) - print "\tNumber of clades: ", len(by_clades_dict) + print("\tNumber of clades: ", len(by_clades_dict)) #group each list of sequences in clade by sequevars for key in sorted_clade_keys: - print "\n\tCLADE: %s Number of Members: %i" % (key, len(by_clades_dict[key])) + print("\n\tCLADE: %s Number of Members: %i" % (key, len(by_clades_dict[key]))) a_list = by_clades_dict[key] for seqrec in a_list: - print "\t %s: %s" %(seqrec.id,str(seqrec.seq)) + print("\t %s: %s" %(seqrec.id,str(seqrec.seq))) #output the list to csv as aggregated linelist output_aggregated_linelist(a_list)