changeset 3:f5522fa90c03 draft default tip

planemo upload for repository https://github.com/Public-Health-Bioinformatics/flu_classification_suite commit 6f09c69c51ec3d6bd7487f55384b97155355c456
author public-health-bioinformatics
date Mon, 04 Feb 2019 18:33:43 -0500
parents f879bf22aa12
children
files antigenic_site_extraction.py test-data/FluA_H1_antigenic_aa_indices.csv test-data/FluB_Victoria_antigenic_aa_indices.csv test-data/FluB_Yamagata_antigenic_aa_indices.csv test-data/output.fasta
diffstat 5 files changed, 64 insertions(+), 5 deletions(-) [+]
line wrap: on
line diff
--- a/antigenic_site_extraction.py	Thu Jan 17 19:08:56 2019 -0500
+++ b/antigenic_site_extraction.py	Mon Feb 04 18:33:43 2019 -0500
@@ -54,8 +54,8 @@
     for item in positionList:
         indexArray.append(int(item))
     #print number of amino acids to extract and array to console as user check
-    print "Amino Acid positions to extract: %i " %(len(indexArray))
-    print indexArray
+    print("Amino Acid positions to extract: %i " %(len(indexArray)))
+    print(indexArray)
 
 with open(args.inFileHandle1,'r') as inFile:
     '''Open fasta of amino acid sequences to parse, uppercase and add to protein Sequence list.'''
@@ -64,14 +64,14 @@
         record = record.upper()
         seqList.append(record) #add Seq to list of Sequences
     #print number of sequences to be process as user check
-    print "\n%i flu sequences will be extracted for antigenic sites..." % len(seqList)
+    print("\n%i flu sequences will be extracted for antigenic sites..." % len(seqList))
     #parse each target sequence object
     for record in seqList:
         extract_aa_from_sequence(record)
 
 #print original and extracted sequence
 for x in range(0, len(seqList)):
-    print "Original %s: %i amino acids,\tExtracted: %i" % (seqList[x].id,len(seqList[x]),len(extractedSeqList[x]))
+    print("Original %s: %i amino acids,\tExtracted: %i" % (seqList[x].id,len(seqList[x]),len(extractedSeqList[x])))
 
 #determine if output format is fasta (default) or csv
 if args.csv:
@@ -82,7 +82,7 @@
         sequence = str(record.seq).strip()
         csv_seq = ",".join(sequence)
         comma_separated_sequence = name_part + csv_seq + "\n"
-        print comma_separated_sequence
+        print(comma_separated_sequence)
         outFile.write(comma_separated_sequence)
 else:
     #write fasta file of extracted antigenic sites
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/FluA_H1_antigenic_aa_indices.csv	Mon Feb 04 18:33:43 2019 -0500
@@ -0,0 +1,1 @@
+70,71,72,73,74,75,124,125,137,138,139,140,141,142,153,154,155,156,157,159,160,161,162,163,164,166,167,168,169,170,184,185,186,187,188,189,190,191,192,193,194,195,203,204,205,221,222,235,236,237
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/FluB_Victoria_antigenic_aa_indices.csv	Mon Feb 04 18:33:43 2019 -0500
@@ -0,0 +1,1 @@
+73,74,75,76,77,78,79,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132,133,134,135,136,137,141,142,143,144,145,146,147,148,149,150,162,163,164,165,166,167,168,169,170,197,198,199,200,201,202,203,204,205,229,230,231,232,233,234,235,236,237,238,239,240,241,242,243,244
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/FluB_Yamagata_antigenic_aa_indices.csv	Mon Feb 04 18:33:43 2019 -0500
@@ -0,0 +1,1 @@
+73,74,75,76,77,78,79,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132,133,134,135,136,137,141,142,143,144,145,146,147,148,149,150,162,163,164,165,166,167,168,169,196,197,198,199,200,201,202,203,204,228,229,230,231,232,233,234,235,236,237,238,239,240,241,242,243
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/output.fasta	Mon Feb 04 18:33:43 2019 -0500
@@ -0,0 +1,56 @@
+>Seq1(3C.2a.3)
+QNSSIEIDSQLENIQGQNKKLFVSKYNVPRTNNSNTGVKQNTSAIKSSSSRNTHLNYTAL
+NTMNNEQFDKLIVGTDKDIFLAQSRTKRSAVIPNIGSIPSRIKGILNSTIQSSPGKKSEF
+VRIACRYVKHS
+>Seq2(3C.2a.4)
+QNSSIEIDSQLENIQGQNKKLFVSKYNVPRTKDSNTGVTQNTSAIRSKSSRNTHLNYTAL
+NTMNNEQFDKLIVGTDKDIFLAQSRTKRSAVIPNIGSIPSRIKGILNSTIRNSPGKKSEF
+VRIACRYVKHS
+>Seq3(3C.2a.3)
+QNSSIEINSQLENIQGQNKKLFVSKYNVPRTNNSNTGVKQNTSAIKSSSSRNTHLNYTAL
+NTMNNEQFDKLIVGTDKDISLAQSRTKRSAVIPNIGSIPSRIKGILNSTIQSSPGKKSEF
+VRIACRYVKHS
+>Seq4(3C.2a.2)
+QNSSIEIDSQLENIQGQNKKLFVSKYNVPRTNNSNTGVTQNTSAMRSSSSRNTHLNYTAL
+NTMNNEQFDKLIVGTDKDIFLAKSRTKRSAVIPNIGSIPSRIKGILNSTIQSSPGKKSEF
+VRIACRYVKHS
+>Seq5(3C.2a.3)
+QNSSIEIDSQLENIQGQNKKLFVSKYNVPRTNNSNTGVKQNTSAIKSSSSRNTHLNYTAL
+NTMNNEQFDKLIVGTDKDISLAQSRTKRSAVIPNIGSIPSRIKGILNSTIQSSPGKKSEF
+VRIACRYVKHS
+>Seq5(3C.2a.4)
+QNSSIEIDSQLENIQGQNKKLFVSKYNVPRTKNSNTGVTQNTSAIRSKSSRNTHLNYTAL
+NTMNNEQFDKLIVGTDKDIFLAQSRTKRSAVIPNIGYIPSRIKGILNSTIRSSPGKKSEF
+VRIACRYVKHS
+>Seq6(3C.3a)
+QNSSIEIDSQLENIQGQNKKLFVNKYSVPRTNNSNAGVTQNTSSIGSKSSRNTHLNSKAL
+NTMNNEQFDKLIVGTDKDISLAQSRTKRSAVIPNIGSIPSRIKGILNSTIRSSPGKKSEF
+VRIACRYVKQS
+>Seq7(3C.3a)
+QNSSIEIDSQLENIQGQNKKLFVNKYNVPRTNNSNAGVTQNTSSIGSKSSRNTHLNSKAL
+NTMNNEQFDKLIVGTDKDISLAQSRTKRSAVIPNIGSIPSRIKGILNSTIRSSPGKKSEF
+VRIACRYVKQS
+>Seq8(3C.2a.1)
+QNSSIEIDSQLENIQGQNKKLFVSKYNVPRTKNSNTGVTQNKSAIRSSSSRNTHLNYTAL
+NTMNKEQFDKLIVGTDKDIFLAQPRTKRSAVIPNIGSIPSRIKGILNSTIRSSPGKKSEF
+VRIACRYVKHS
+>Clade_3C.2a_A/Hong_Kong/5738/2014
+QNSSIEIDSQLENIQGQNKKLFVSKYNVPRTNNSNTGVTQNTSAIRSSSSRNTHLNYTAL
+NTMNNEQFDKLIVGTDKDIFLAQSRTKRSAVIPNIGSIPSRIKGILNSTIRSSPGKKSEF
+VRIACRYVKH?
+>Clade_3C.3a_A/Switzerland/9715293/2013
+QNSSIEIDSQLENIQGQNKKLFVSKYNVPRTNNSNAGVTQNTSSIGSNSSRNTHLNSKAL
+NTMNNEQFDKLIVGTDKDIFLAQSRTKRSAVIPNIGSIPSRIKGILNSTIRSSPGKKSEF
+VRIACRYVKQS
+>Seq9(3C.2a.1)
+QNSSIEIDSQLENIQGQNKKLFVSKYNVPRTNNSNTGVTQNTSAIRSSSSRNTHLNYTAL
+NTMNKEQFDKLIVGTDKDIFLAQSRTKRSAVIPNIGSIPSRIKGILNSTIRSSPGKKSEF
+VRIACRYVKHS
+>Seq10(3C.2a.1)
+QNSSIEIDSQLENIQGQNKKLFVSKYNVPRTKNSNTGVTQNKSAIRSSSSRNTHLNYTAL
+NTMNKEQFDKLIVGTDKDIFLAQSRTKRSAVIPNIGSIPSRIKGILNSTIRSSPGKKSEF
+VRIACRYVKHS
+>Clade_3C.2a.1_A/Bolzano/7/2016
+QNSSIEIDSQLENIQGQNKKLFVSKYNVPRTNNSNTGVTQNTSAIRSSSSRNTHLNYTAL
+NTMNKEQFDKLIVGTDKDIFLAQSRTKRSAVIPNIGSIPSRIKGILNSTIRSSPGKKSEF
+VRIACRYVKHS