# HG changeset patch # User public-health-bioinformatics # Date 1549323254 18000 # Node ID bb1cdfafee5957ef2c5ec02b34f43d54937e50f6 # Parent 0d3dad15541307fb79a7a2c988c15fab6cbbdf9e planemo upload for repository https://github.com/Public-Health-Bioinformatics/flu_classification_suite commit 6f09c69c51ec3d6bd7487f55384b97155355c456 diff -r 0d3dad155413 -r bb1cdfafee59 assign_clades.py --- a/assign_clades.py Thu Jan 17 19:09:30 2019 -0500 +++ b/assign_clades.py Mon Feb 04 18:34:14 2019 -0500 @@ -27,8 +27,8 @@ '''Searches record for required amino acids at defined positions. If found, assigns clade name to sequence name by appending underscore and clade name to record id.''' def call_clade(record): - print "---------------------------------------------------------------------" - print "Parsing %s for matching flu clade definitions..." % (record.id) + print("---------------------------------------------------------------------") + print("Parsing %s for matching flu clade definitions..." % (record.id)) matchList = [] #empty list to hold clades that match 100% #iterate over each tuple in the clade list for clade in cladeList: @@ -38,7 +38,7 @@ shouldFind = len(sites) #number of sites that should match found = 0 #a counter to hold matches to antigenic sites #iterate over each position in sites dictionary - for pos, aa in sites.iteritems(): + for pos, aa in sites.items(): #translate pos to corresponding index in target sequence index = int(pos) - 1 #if record at index has same amino acid as 'aa', increment 'found' @@ -87,11 +87,11 @@ #add the clade info as a tuple to the cladeList[] oneClade =(name, depth, sites) cladeList.append(oneClade) - print "The List of Clades:" + print("The List of Clades:") for clade in cladeList: - print "Clade Name: %s Depth: %i Antigenic Sites: %i" % (clade[0], clade[1], len(clade[2])) - for pos, aa in clade[2].iteritems(): - print "Pos: %s\tAA: %s" % (pos,aa) + print("Clade Name: %s Depth: %i Antigenic Sites: %i" % (clade[0], clade[1], len(clade[2]))) + for pos, aa in clade[2].items(): + print("Pos: %s\tAA: %s" % (pos,aa)) '''opens readable input file of sequences to parse using filename from cmd line, instantiates as AA Sequence objects, with ppercase sequences''' @@ -100,7 +100,7 @@ for record in SeqIO.parse(inFile, "fasta", alphabet=IUPAC.protein): record = record.upper() seqList.append(record) #add Seq to list of Sequences - print "\n%i flu HA sequences will be compared to current clade definitions..." % len(seqList) + print("\n%i flu HA sequences will be compared to current clade definitions..." % len(seqList)) #parse each target sequence object for record in seqList: clade_call = '' #empty variale for final clade call on sequence @@ -116,10 +116,10 @@ #empty list return, no matches else: clade_call = "No_Match" - print clade_call + print(clade_call) seq_name = record.id mod_name = seq_name + "_" + clade_call - print "New Sequence Name: " + mod_name + print("New Sequence Name: " + mod_name) record.id = mod_name diff -r 0d3dad155413 -r bb1cdfafee59 test-data/input_fasta.fasta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/input_fasta.fasta Mon Feb 04 18:34:14 2019 -0500 @@ -0,0 +1,2 @@ +>test +QKIPGNDNSTATLCLGHHAVPNGTIVKTITNDRIEVTNATELVQNSSIGEICDSPHQILDGENCTLIDALLGDPQCDGFQNKKWDLFVERNKAYSSCYPYDVPDYASLRSLVASSGTLEFNNESFNWAGVTQNGTSSSCIRGSKSSFFSRLNWLTHLNSKYPALNVTMPNNEQFDKLYIWGVHHPGTDKDQISLYAQSSGRITVSTKRSQQAVIPNIGSRPRIRDIPSRISIYWTIVKPGDILLINSTGNLIAPRGYFKIRSGKSSIMRSDAPIGKCKSECITPNGSIPNDKPFQNVNRITYGACPRYVKQSTLKLATGMRNVPERQTRGIFGAIAGFIENGWEGMVDGWYGFRHQNSEGRGQAADLKSTQAAIDQINGKLNRLIGKTNEKFHQIEKEFSEVEGRIQDLEKYVEDTKIDLWSYNAELLVALENQHTIDLTDSEMNKLFEKTKKQLRENAEDMGNGCFKIYHKCDNACIGSIRNGTYDHNVYRDEALNNRFQIKGVELKSGYKDW diff -r 0d3dad155413 -r bb1cdfafee59 test-data/output.fa --- a/test-data/output.fa Thu Jan 17 19:09:30 2019 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,10 +0,0 @@ ->test_3C.3a test -QKIPGNDNSTATLCLGHHAVPNGTIVKTITNDRIEVTNATELVQNSSIGEICDSPHQILD -GENCTLIDALLGDPQCDGFQNKKWDLFVERNKAYSSCYPYDVPDYASLRSLVASSGTLEF -NNESFNWAGVTQNGTSSSCIRGSKSSFFSRLNWLTHLNSKYPALNVTMPNNEQFDKLYIW -GVHHPGTDKDQISLYAQSSGRITVSTKRSQQAVIPNIGSRPRIRDIPSRISIYWTIVKPG -DILLINSTGNLIAPRGYFKIRSGKSSIMRSDAPIGKCKSECITPNGSIPNDKPFQNVNRI -TYGACPRYVKQSTLKLATGMRNVPERQTRGIFGAIAGFIENGWEGMVDGWYGFRHQNSEG -RGQAADLKSTQAAIDQINGKLNRLIGKTNEKFHQIEKEFSEVEGRIQDLEKYVEDTKIDL -WSYNAELLVALENQHTIDLTDSEMNKLFEKTKKQLRENAEDMGNGCFKIYHKCDNACIGS -IRNGTYDHNVYRDEALNNRFQIKGVELKSGYKDW diff -r 0d3dad155413 -r bb1cdfafee59 test-data/output.fasta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/output.fasta Mon Feb 04 18:34:14 2019 -0500 @@ -0,0 +1,10 @@ +>test_3C.3a test +QKIPGNDNSTATLCLGHHAVPNGTIVKTITNDRIEVTNATELVQNSSIGEICDSPHQILD +GENCTLIDALLGDPQCDGFQNKKWDLFVERNKAYSSCYPYDVPDYASLRSLVASSGTLEF +NNESFNWAGVTQNGTSSSCIRGSKSSFFSRLNWLTHLNSKYPALNVTMPNNEQFDKLYIW +GVHHPGTDKDQISLYAQSSGRITVSTKRSQQAVIPNIGSRPRIRDIPSRISIYWTIVKPG +DILLINSTGNLIAPRGYFKIRSGKSSIMRSDAPIGKCKSECITPNGSIPNDKPFQNVNRI +TYGACPRYVKQSTLKLATGMRNVPERQTRGIFGAIAGFIENGWEGMVDGWYGFRHQNSEG +RGQAADLKSTQAAIDQINGKLNRLIGKTNEKFHQIEKEFSEVEGRIQDLEKYVEDTKIDL +WSYNAELLVALENQHTIDLTDSEMNKLFEKTKKQLRENAEDMGNGCFKIYHKCDNACIGS +IRNGTYDHNVYRDEALNNRFQIKGVELKSGYKDW diff -r 0d3dad155413 -r bb1cdfafee59 test-data/test_input.fasta --- a/test-data/test_input.fasta Thu Jan 17 19:09:30 2019 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,2 +0,0 @@ ->test -QKIPGNDNSTATLCLGHHAVPNGTIVKTITNDRIEVTNATELVQNSSIGEICDSPHQILDGENCTLIDALLGDPQCDGFQNKKWDLFVERNKAYSSCYPYDVPDYASLRSLVASSGTLEFNNESFNWAGVTQNGTSSSCIRGSKSSFFSRLNWLTHLNSKYPALNVTMPNNEQFDKLYIWGVHHPGTDKDQISLYAQSSGRITVSTKRSQQAVIPNIGSRPRIRDIPSRISIYWTIVKPGDILLINSTGNLIAPRGYFKIRSGKSSIMRSDAPIGKCKSECITPNGSIPNDKPFQNVNRITYGACPRYVKQSTLKLATGMRNVPERQTRGIFGAIAGFIENGWEGMVDGWYGFRHQNSEGRGQAADLKSTQAAIDQINGKLNRLIGKTNEKFHQIEKEFSEVEGRIQDLEKYVEDTKIDLWSYNAELLVALENQHTIDLTDSEMNKLFEKTKKQLRENAEDMGNGCFKIYHKCDNACIGSIRNGTYDHNVYRDEALNNRFQIKGVELKSGYKDW