comparison antigenic_site_extraction.py @ 3:f5522fa90c03 draft default tip

planemo upload for repository https://github.com/Public-Health-Bioinformatics/flu_classification_suite commit 6f09c69c51ec3d6bd7487f55384b97155355c456
author public-health-bioinformatics
date Mon, 04 Feb 2019 18:33:43 -0500
parents 89deee583d6d
children
comparison
equal deleted inserted replaced
2:f879bf22aa12 3:f5522fa90c03
52 positionList = strippedLine.split(',') 52 positionList = strippedLine.split(',')
53 #Convert string items in positionList from strings to int and add to indexArray 53 #Convert string items in positionList from strings to int and add to indexArray
54 for item in positionList: 54 for item in positionList:
55 indexArray.append(int(item)) 55 indexArray.append(int(item))
56 #print number of amino acids to extract and array to console as user check 56 #print number of amino acids to extract and array to console as user check
57 print "Amino Acid positions to extract: %i " %(len(indexArray)) 57 print("Amino Acid positions to extract: %i " %(len(indexArray)))
58 print indexArray 58 print(indexArray)
59 59
60 with open(args.inFileHandle1,'r') as inFile: 60 with open(args.inFileHandle1,'r') as inFile:
61 '''Open fasta of amino acid sequences to parse, uppercase and add to protein Sequence list.''' 61 '''Open fasta of amino acid sequences to parse, uppercase and add to protein Sequence list.'''
62 #read in Sequences from fasta file, uppercase and add to seqList 62 #read in Sequences from fasta file, uppercase and add to seqList
63 for record in SeqIO.parse(inFile, "fasta", alphabet=IUPAC.protein): 63 for record in SeqIO.parse(inFile, "fasta", alphabet=IUPAC.protein):
64 record = record.upper() 64 record = record.upper()
65 seqList.append(record) #add Seq to list of Sequences 65 seqList.append(record) #add Seq to list of Sequences
66 #print number of sequences to be process as user check 66 #print number of sequences to be process as user check
67 print "\n%i flu sequences will be extracted for antigenic sites..." % len(seqList) 67 print("\n%i flu sequences will be extracted for antigenic sites..." % len(seqList))
68 #parse each target sequence object 68 #parse each target sequence object
69 for record in seqList: 69 for record in seqList:
70 extract_aa_from_sequence(record) 70 extract_aa_from_sequence(record)
71 71
72 #print original and extracted sequence 72 #print original and extracted sequence
73 for x in range(0, len(seqList)): 73 for x in range(0, len(seqList)):
74 print "Original %s: %i amino acids,\tExtracted: %i" % (seqList[x].id,len(seqList[x]),len(extractedSeqList[x])) 74 print("Original %s: %i amino acids,\tExtracted: %i" % (seqList[x].id,len(seqList[x]),len(extractedSeqList[x])))
75 75
76 #determine if output format is fasta (default) or csv 76 #determine if output format is fasta (default) or csv
77 if args.csv: 77 if args.csv:
78 #write csv file of extracted antigenic sits 78 #write csv file of extracted antigenic sits
79 for record in extractedSeqList: 79 for record in extractedSeqList:
80 #outFile.write(record.id),"," 80 #outFile.write(record.id),","
81 name_part = (record.id).rstrip() + ',' 81 name_part = (record.id).rstrip() + ','
82 sequence = str(record.seq).strip() 82 sequence = str(record.seq).strip()
83 csv_seq = ",".join(sequence) 83 csv_seq = ",".join(sequence)
84 comma_separated_sequence = name_part + csv_seq + "\n" 84 comma_separated_sequence = name_part + csv_seq + "\n"
85 print comma_separated_sequence 85 print(comma_separated_sequence)
86 outFile.write(comma_separated_sequence) 86 outFile.write(comma_separated_sequence)
87 else: 87 else:
88 #write fasta file of extracted antigenic sites 88 #write fasta file of extracted antigenic sites
89 SeqIO.write(extractedSeqList,outFile,"fasta") 89 SeqIO.write(extractedSeqList,outFile,"fasta")
90 90