Mercurial > repos > public-health-bioinformatics > antigenic_site_extraction
changeset 3:f5522fa90c03 draft default tip
planemo upload for repository https://github.com/Public-Health-Bioinformatics/flu_classification_suite commit 6f09c69c51ec3d6bd7487f55384b97155355c456
author | public-health-bioinformatics |
---|---|
date | Mon, 04 Feb 2019 18:33:43 -0500 |
parents | f879bf22aa12 |
children | |
files | antigenic_site_extraction.py test-data/FluA_H1_antigenic_aa_indices.csv test-data/FluB_Victoria_antigenic_aa_indices.csv test-data/FluB_Yamagata_antigenic_aa_indices.csv test-data/output.fasta |
diffstat | 5 files changed, 64 insertions(+), 5 deletions(-) [+] |
line wrap: on
line diff
--- a/antigenic_site_extraction.py Thu Jan 17 19:08:56 2019 -0500 +++ b/antigenic_site_extraction.py Mon Feb 04 18:33:43 2019 -0500 @@ -54,8 +54,8 @@ for item in positionList: indexArray.append(int(item)) #print number of amino acids to extract and array to console as user check - print "Amino Acid positions to extract: %i " %(len(indexArray)) - print indexArray + print("Amino Acid positions to extract: %i " %(len(indexArray))) + print(indexArray) with open(args.inFileHandle1,'r') as inFile: '''Open fasta of amino acid sequences to parse, uppercase and add to protein Sequence list.''' @@ -64,14 +64,14 @@ record = record.upper() seqList.append(record) #add Seq to list of Sequences #print number of sequences to be process as user check - print "\n%i flu sequences will be extracted for antigenic sites..." % len(seqList) + print("\n%i flu sequences will be extracted for antigenic sites..." % len(seqList)) #parse each target sequence object for record in seqList: extract_aa_from_sequence(record) #print original and extracted sequence for x in range(0, len(seqList)): - print "Original %s: %i amino acids,\tExtracted: %i" % (seqList[x].id,len(seqList[x]),len(extractedSeqList[x])) + print("Original %s: %i amino acids,\tExtracted: %i" % (seqList[x].id,len(seqList[x]),len(extractedSeqList[x]))) #determine if output format is fasta (default) or csv if args.csv: @@ -82,7 +82,7 @@ sequence = str(record.seq).strip() csv_seq = ",".join(sequence) comma_separated_sequence = name_part + csv_seq + "\n" - print comma_separated_sequence + print(comma_separated_sequence) outFile.write(comma_separated_sequence) else: #write fasta file of extracted antigenic sites
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/FluA_H1_antigenic_aa_indices.csv Mon Feb 04 18:33:43 2019 -0500 @@ -0,0 +1,1 @@ +70,71,72,73,74,75,124,125,137,138,139,140,141,142,153,154,155,156,157,159,160,161,162,163,164,166,167,168,169,170,184,185,186,187,188,189,190,191,192,193,194,195,203,204,205,221,222,235,236,237
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/FluB_Victoria_antigenic_aa_indices.csv Mon Feb 04 18:33:43 2019 -0500 @@ -0,0 +1,1 @@ +73,74,75,76,77,78,79,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132,133,134,135,136,137,141,142,143,144,145,146,147,148,149,150,162,163,164,165,166,167,168,169,170,197,198,199,200,201,202,203,204,205,229,230,231,232,233,234,235,236,237,238,239,240,241,242,243,244
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/FluB_Yamagata_antigenic_aa_indices.csv Mon Feb 04 18:33:43 2019 -0500 @@ -0,0 +1,1 @@ +73,74,75,76,77,78,79,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132,133,134,135,136,137,141,142,143,144,145,146,147,148,149,150,162,163,164,165,166,167,168,169,196,197,198,199,200,201,202,203,204,228,229,230,231,232,233,234,235,236,237,238,239,240,241,242,243
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/output.fasta Mon Feb 04 18:33:43 2019 -0500 @@ -0,0 +1,56 @@ +>Seq1(3C.2a.3) +QNSSIEIDSQLENIQGQNKKLFVSKYNVPRTNNSNTGVKQNTSAIKSSSSRNTHLNYTAL +NTMNNEQFDKLIVGTDKDIFLAQSRTKRSAVIPNIGSIPSRIKGILNSTIQSSPGKKSEF +VRIACRYVKHS +>Seq2(3C.2a.4) +QNSSIEIDSQLENIQGQNKKLFVSKYNVPRTKDSNTGVTQNTSAIRSKSSRNTHLNYTAL +NTMNNEQFDKLIVGTDKDIFLAQSRTKRSAVIPNIGSIPSRIKGILNSTIRNSPGKKSEF +VRIACRYVKHS +>Seq3(3C.2a.3) +QNSSIEINSQLENIQGQNKKLFVSKYNVPRTNNSNTGVKQNTSAIKSSSSRNTHLNYTAL +NTMNNEQFDKLIVGTDKDISLAQSRTKRSAVIPNIGSIPSRIKGILNSTIQSSPGKKSEF +VRIACRYVKHS +>Seq4(3C.2a.2) +QNSSIEIDSQLENIQGQNKKLFVSKYNVPRTNNSNTGVTQNTSAMRSSSSRNTHLNYTAL +NTMNNEQFDKLIVGTDKDIFLAKSRTKRSAVIPNIGSIPSRIKGILNSTIQSSPGKKSEF +VRIACRYVKHS +>Seq5(3C.2a.3) +QNSSIEIDSQLENIQGQNKKLFVSKYNVPRTNNSNTGVKQNTSAIKSSSSRNTHLNYTAL +NTMNNEQFDKLIVGTDKDISLAQSRTKRSAVIPNIGSIPSRIKGILNSTIQSSPGKKSEF +VRIACRYVKHS +>Seq5(3C.2a.4) +QNSSIEIDSQLENIQGQNKKLFVSKYNVPRTKNSNTGVTQNTSAIRSKSSRNTHLNYTAL +NTMNNEQFDKLIVGTDKDIFLAQSRTKRSAVIPNIGYIPSRIKGILNSTIRSSPGKKSEF +VRIACRYVKHS +>Seq6(3C.3a) +QNSSIEIDSQLENIQGQNKKLFVNKYSVPRTNNSNAGVTQNTSSIGSKSSRNTHLNSKAL +NTMNNEQFDKLIVGTDKDISLAQSRTKRSAVIPNIGSIPSRIKGILNSTIRSSPGKKSEF +VRIACRYVKQS +>Seq7(3C.3a) +QNSSIEIDSQLENIQGQNKKLFVNKYNVPRTNNSNAGVTQNTSSIGSKSSRNTHLNSKAL +NTMNNEQFDKLIVGTDKDISLAQSRTKRSAVIPNIGSIPSRIKGILNSTIRSSPGKKSEF +VRIACRYVKQS +>Seq8(3C.2a.1) +QNSSIEIDSQLENIQGQNKKLFVSKYNVPRTKNSNTGVTQNKSAIRSSSSRNTHLNYTAL +NTMNKEQFDKLIVGTDKDIFLAQPRTKRSAVIPNIGSIPSRIKGILNSTIRSSPGKKSEF +VRIACRYVKHS +>Clade_3C.2a_A/Hong_Kong/5738/2014 +QNSSIEIDSQLENIQGQNKKLFVSKYNVPRTNNSNTGVTQNTSAIRSSSSRNTHLNYTAL +NTMNNEQFDKLIVGTDKDIFLAQSRTKRSAVIPNIGSIPSRIKGILNSTIRSSPGKKSEF +VRIACRYVKH? +>Clade_3C.3a_A/Switzerland/9715293/2013 +QNSSIEIDSQLENIQGQNKKLFVSKYNVPRTNNSNAGVTQNTSSIGSNSSRNTHLNSKAL +NTMNNEQFDKLIVGTDKDIFLAQSRTKRSAVIPNIGSIPSRIKGILNSTIRSSPGKKSEF +VRIACRYVKQS +>Seq9(3C.2a.1) +QNSSIEIDSQLENIQGQNKKLFVSKYNVPRTNNSNTGVTQNTSAIRSSSSRNTHLNYTAL +NTMNKEQFDKLIVGTDKDIFLAQSRTKRSAVIPNIGSIPSRIKGILNSTIRSSPGKKSEF +VRIACRYVKHS +>Seq10(3C.2a.1) +QNSSIEIDSQLENIQGQNKKLFVSKYNVPRTKNSNTGVTQNKSAIRSSSSRNTHLNYTAL +NTMNKEQFDKLIVGTDKDIFLAQSRTKRSAVIPNIGSIPSRIKGILNSTIRSSPGKKSEF +VRIACRYVKHS +>Clade_3C.2a.1_A/Bolzano/7/2016 +QNSSIEIDSQLENIQGQNKKLFVSKYNVPRTNNSNTGVTQNTSAIRSSSSRNTHLNYTAL +NTMNKEQFDKLIVGTDKDIFLAQSRTKRSAVIPNIGSIPSRIKGILNSTIRSSPGKKSEF +VRIACRYVKHS