Mercurial > repos > public-health-bioinformatics > assign_clades
diff assign_clades.py @ 3:bb1cdfafee59 draft default tip
planemo upload for repository https://github.com/Public-Health-Bioinformatics/flu_classification_suite commit 6f09c69c51ec3d6bd7487f55384b97155355c456
author | public-health-bioinformatics |
---|---|
date | Mon, 04 Feb 2019 18:34:14 -0500 |
parents | 1f113d9db8ba |
children |
line wrap: on
line diff
--- a/assign_clades.py Thu Jan 17 19:09:30 2019 -0500 +++ b/assign_clades.py Mon Feb 04 18:34:14 2019 -0500 @@ -27,8 +27,8 @@ '''Searches record for required amino acids at defined positions. If found, assigns clade name to sequence name by appending underscore and clade name to record id.''' def call_clade(record): - print "---------------------------------------------------------------------" - print "Parsing %s for matching flu clade definitions..." % (record.id) + print("---------------------------------------------------------------------") + print("Parsing %s for matching flu clade definitions..." % (record.id)) matchList = [] #empty list to hold clades that match 100% #iterate over each tuple in the clade list for clade in cladeList: @@ -38,7 +38,7 @@ shouldFind = len(sites) #number of sites that should match found = 0 #a counter to hold matches to antigenic sites #iterate over each position in sites dictionary - for pos, aa in sites.iteritems(): + for pos, aa in sites.items(): #translate pos to corresponding index in target sequence index = int(pos) - 1 #if record at index has same amino acid as 'aa', increment 'found' @@ -87,11 +87,11 @@ #add the clade info as a tuple to the cladeList[] oneClade =(name, depth, sites) cladeList.append(oneClade) - print "The List of Clades:" + print("The List of Clades:") for clade in cladeList: - print "Clade Name: %s Depth: %i Antigenic Sites: %i" % (clade[0], clade[1], len(clade[2])) - for pos, aa in clade[2].iteritems(): - print "Pos: %s\tAA: %s" % (pos,aa) + print("Clade Name: %s Depth: %i Antigenic Sites: %i" % (clade[0], clade[1], len(clade[2]))) + for pos, aa in clade[2].items(): + print("Pos: %s\tAA: %s" % (pos,aa)) '''opens readable input file of sequences to parse using filename from cmd line, instantiates as AA Sequence objects, with ppercase sequences''' @@ -100,7 +100,7 @@ for record in SeqIO.parse(inFile, "fasta", alphabet=IUPAC.protein): record = record.upper() seqList.append(record) #add Seq to list of Sequences - print "\n%i flu HA sequences will be compared to current clade definitions..." % len(seqList) + print("\n%i flu HA sequences will be compared to current clade definitions..." % len(seqList)) #parse each target sequence object for record in seqList: clade_call = '' #empty variale for final clade call on sequence @@ -116,10 +116,10 @@ #empty list return, no matches else: clade_call = "No_Match" - print clade_call + print(clade_call) seq_name = record.id mod_name = seq_name + "_" + clade_call - print "New Sequence Name: " + mod_name + print("New Sequence Name: " + mod_name) record.id = mod_name