Mercurial > repos > public-health-bioinformatics > antigenic_site_extraction
comparison antigenic_site_extraction.py @ 3:f5522fa90c03 draft default tip
planemo upload for repository https://github.com/Public-Health-Bioinformatics/flu_classification_suite commit 6f09c69c51ec3d6bd7487f55384b97155355c456
author | public-health-bioinformatics |
---|---|
date | Mon, 04 Feb 2019 18:33:43 -0500 |
parents | 89deee583d6d |
children |
comparison
equal
deleted
inserted
replaced
2:f879bf22aa12 | 3:f5522fa90c03 |
---|---|
52 positionList = strippedLine.split(',') | 52 positionList = strippedLine.split(',') |
53 #Convert string items in positionList from strings to int and add to indexArray | 53 #Convert string items in positionList from strings to int and add to indexArray |
54 for item in positionList: | 54 for item in positionList: |
55 indexArray.append(int(item)) | 55 indexArray.append(int(item)) |
56 #print number of amino acids to extract and array to console as user check | 56 #print number of amino acids to extract and array to console as user check |
57 print "Amino Acid positions to extract: %i " %(len(indexArray)) | 57 print("Amino Acid positions to extract: %i " %(len(indexArray))) |
58 print indexArray | 58 print(indexArray) |
59 | 59 |
60 with open(args.inFileHandle1,'r') as inFile: | 60 with open(args.inFileHandle1,'r') as inFile: |
61 '''Open fasta of amino acid sequences to parse, uppercase and add to protein Sequence list.''' | 61 '''Open fasta of amino acid sequences to parse, uppercase and add to protein Sequence list.''' |
62 #read in Sequences from fasta file, uppercase and add to seqList | 62 #read in Sequences from fasta file, uppercase and add to seqList |
63 for record in SeqIO.parse(inFile, "fasta", alphabet=IUPAC.protein): | 63 for record in SeqIO.parse(inFile, "fasta", alphabet=IUPAC.protein): |
64 record = record.upper() | 64 record = record.upper() |
65 seqList.append(record) #add Seq to list of Sequences | 65 seqList.append(record) #add Seq to list of Sequences |
66 #print number of sequences to be process as user check | 66 #print number of sequences to be process as user check |
67 print "\n%i flu sequences will be extracted for antigenic sites..." % len(seqList) | 67 print("\n%i flu sequences will be extracted for antigenic sites..." % len(seqList)) |
68 #parse each target sequence object | 68 #parse each target sequence object |
69 for record in seqList: | 69 for record in seqList: |
70 extract_aa_from_sequence(record) | 70 extract_aa_from_sequence(record) |
71 | 71 |
72 #print original and extracted sequence | 72 #print original and extracted sequence |
73 for x in range(0, len(seqList)): | 73 for x in range(0, len(seqList)): |
74 print "Original %s: %i amino acids,\tExtracted: %i" % (seqList[x].id,len(seqList[x]),len(extractedSeqList[x])) | 74 print("Original %s: %i amino acids,\tExtracted: %i" % (seqList[x].id,len(seqList[x]),len(extractedSeqList[x]))) |
75 | 75 |
76 #determine if output format is fasta (default) or csv | 76 #determine if output format is fasta (default) or csv |
77 if args.csv: | 77 if args.csv: |
78 #write csv file of extracted antigenic sits | 78 #write csv file of extracted antigenic sits |
79 for record in extractedSeqList: | 79 for record in extractedSeqList: |
80 #outFile.write(record.id),"," | 80 #outFile.write(record.id),"," |
81 name_part = (record.id).rstrip() + ',' | 81 name_part = (record.id).rstrip() + ',' |
82 sequence = str(record.seq).strip() | 82 sequence = str(record.seq).strip() |
83 csv_seq = ",".join(sequence) | 83 csv_seq = ",".join(sequence) |
84 comma_separated_sequence = name_part + csv_seq + "\n" | 84 comma_separated_sequence = name_part + csv_seq + "\n" |
85 print comma_separated_sequence | 85 print(comma_separated_sequence) |
86 outFile.write(comma_separated_sequence) | 86 outFile.write(comma_separated_sequence) |
87 else: | 87 else: |
88 #write fasta file of extracted antigenic sites | 88 #write fasta file of extracted antigenic sites |
89 SeqIO.write(extractedSeqList,outFile,"fasta") | 89 SeqIO.write(extractedSeqList,outFile,"fasta") |
90 | 90 |