Mercurial > repos > public-health-bioinformatics > antigenic_site_extraction
annotate antigenic_site_extraction.py @ 2:f879bf22aa12 draft
planemo upload for repository https://github.com/Public-Health-Bioinformatics/flu_classification_suite commit e6d31e4b3666b5e1d322e22f44526f23c66692fb
author | public-health-bioinformatics |
---|---|
date | Thu, 17 Jan 2019 19:08:56 -0500 |
parents | 89deee583d6d |
children | f5522fa90c03 |
rev | line source |
---|---|
0
89deee583d6d
planemo upload for repository https://github.com/Public-Health-Bioinformatics/flu_classification_suite commit 561cde8c8bd4a6164b1bef19ecff9809ac3340e0
public-health-bioinformatics
parents:
diff
changeset
|
1 #!/usr/bin/env python |
89deee583d6d
planemo upload for repository https://github.com/Public-Health-Bioinformatics/flu_classification_suite commit 561cde8c8bd4a6164b1bef19ecff9809ac3340e0
public-health-bioinformatics
parents:
diff
changeset
|
2 |
89deee583d6d
planemo upload for repository https://github.com/Public-Health-Bioinformatics/flu_classification_suite commit 561cde8c8bd4a6164b1bef19ecff9809ac3340e0
public-health-bioinformatics
parents:
diff
changeset
|
3 '''Accepts fasta files of amino acid sequence, extracts specific amino acids (defined in a csv index array), |
89deee583d6d
planemo upload for repository https://github.com/Public-Health-Bioinformatics/flu_classification_suite commit 561cde8c8bd4a6164b1bef19ecff9809ac3340e0
public-health-bioinformatics
parents:
diff
changeset
|
4 and outputs extracted sequences - representing flu antigenic sites - to fasta (default) or csv.''' |
89deee583d6d
planemo upload for repository https://github.com/Public-Health-Bioinformatics/flu_classification_suite commit 561cde8c8bd4a6164b1bef19ecff9809ac3340e0
public-health-bioinformatics
parents:
diff
changeset
|
5 |
89deee583d6d
planemo upload for repository https://github.com/Public-Health-Bioinformatics/flu_classification_suite commit 561cde8c8bd4a6164b1bef19ecff9809ac3340e0
public-health-bioinformatics
parents:
diff
changeset
|
6 '''Author: Diane Eisler, Molecular Microbiology & Genomics, BCCDC Public Health Laboratory,Sept 2017''' |
89deee583d6d
planemo upload for repository https://github.com/Public-Health-Bioinformatics/flu_classification_suite commit 561cde8c8bd4a6164b1bef19ecff9809ac3340e0
public-health-bioinformatics
parents:
diff
changeset
|
7 |
89deee583d6d
planemo upload for repository https://github.com/Public-Health-Bioinformatics/flu_classification_suite commit 561cde8c8bd4a6164b1bef19ecff9809ac3340e0
public-health-bioinformatics
parents:
diff
changeset
|
8 import sys,string,os, time, Bio, argparse |
89deee583d6d
planemo upload for repository https://github.com/Public-Health-Bioinformatics/flu_classification_suite commit 561cde8c8bd4a6164b1bef19ecff9809ac3340e0
public-health-bioinformatics
parents:
diff
changeset
|
9 from Bio import Seq, SeqIO, SeqUtils, Alphabet, SeqRecord |
89deee583d6d
planemo upload for repository https://github.com/Public-Health-Bioinformatics/flu_classification_suite commit 561cde8c8bd4a6164b1bef19ecff9809ac3340e0
public-health-bioinformatics
parents:
diff
changeset
|
10 from Bio.SeqRecord import SeqRecord |
89deee583d6d
planemo upload for repository https://github.com/Public-Health-Bioinformatics/flu_classification_suite commit 561cde8c8bd4a6164b1bef19ecff9809ac3340e0
public-health-bioinformatics
parents:
diff
changeset
|
11 from Bio.Alphabet import IUPAC |
89deee583d6d
planemo upload for repository https://github.com/Public-Health-Bioinformatics/flu_classification_suite commit 561cde8c8bd4a6164b1bef19ecff9809ac3340e0
public-health-bioinformatics
parents:
diff
changeset
|
12 from Bio.Seq import Seq |
89deee583d6d
planemo upload for repository https://github.com/Public-Health-Bioinformatics/flu_classification_suite commit 561cde8c8bd4a6164b1bef19ecff9809ac3340e0
public-health-bioinformatics
parents:
diff
changeset
|
13 |
89deee583d6d
planemo upload for repository https://github.com/Public-Health-Bioinformatics/flu_classification_suite commit 561cde8c8bd4a6164b1bef19ecff9809ac3340e0
public-health-bioinformatics
parents:
diff
changeset
|
14 #parse command line arguments |
89deee583d6d
planemo upload for repository https://github.com/Public-Health-Bioinformatics/flu_classification_suite commit 561cde8c8bd4a6164b1bef19ecff9809ac3340e0
public-health-bioinformatics
parents:
diff
changeset
|
15 parser = argparse.ArgumentParser() |
89deee583d6d
planemo upload for repository https://github.com/Public-Health-Bioinformatics/flu_classification_suite commit 561cde8c8bd4a6164b1bef19ecff9809ac3340e0
public-health-bioinformatics
parents:
diff
changeset
|
16 parser.add_argument("-c","--csv",help="export extracted antigenic sites to csv file",action="store_true") |
89deee583d6d
planemo upload for repository https://github.com/Public-Health-Bioinformatics/flu_classification_suite commit 561cde8c8bd4a6164b1bef19ecff9809ac3340e0
public-health-bioinformatics
parents:
diff
changeset
|
17 parser.add_argument("inFileHandle1") #batch fasta file with sequences to be parsed |
89deee583d6d
planemo upload for repository https://github.com/Public-Health-Bioinformatics/flu_classification_suite commit 561cde8c8bd4a6164b1bef19ecff9809ac3340e0
public-health-bioinformatics
parents:
diff
changeset
|
18 parser.add_argument("inFileHandle2") # .csv file containing positions of aa's to extract |
89deee583d6d
planemo upload for repository https://github.com/Public-Health-Bioinformatics/flu_classification_suite commit 561cde8c8bd4a6164b1bef19ecff9809ac3340e0
public-health-bioinformatics
parents:
diff
changeset
|
19 parser.add_argument("outFileHandle") #user-specified name for output file of extracted aa seq's |
89deee583d6d
planemo upload for repository https://github.com/Public-Health-Bioinformatics/flu_classification_suite commit 561cde8c8bd4a6164b1bef19ecff9809ac3340e0
public-health-bioinformatics
parents:
diff
changeset
|
20 args = parser.parse_args() |
89deee583d6d
planemo upload for repository https://github.com/Public-Health-Bioinformatics/flu_classification_suite commit 561cde8c8bd4a6164b1bef19ecff9809ac3340e0
public-health-bioinformatics
parents:
diff
changeset
|
21 |
89deee583d6d
planemo upload for repository https://github.com/Public-Health-Bioinformatics/flu_classification_suite commit 561cde8c8bd4a6164b1bef19ecff9809ac3340e0
public-health-bioinformatics
parents:
diff
changeset
|
22 #inFileHandle1 = sys.argv[1] #batch fasta file with sequences to be parsed |
89deee583d6d
planemo upload for repository https://github.com/Public-Health-Bioinformatics/flu_classification_suite commit 561cde8c8bd4a6164b1bef19ecff9809ac3340e0
public-health-bioinformatics
parents:
diff
changeset
|
23 #inFileHandle2 = sys.argv[2] # .csv file containing positions of aa's to extract |
89deee583d6d
planemo upload for repository https://github.com/Public-Health-Bioinformatics/flu_classification_suite commit 561cde8c8bd4a6164b1bef19ecff9809ac3340e0
public-health-bioinformatics
parents:
diff
changeset
|
24 #outFileHandle = sys.argv[3] #user-specified name for output file of extracted aa seq's |
89deee583d6d
planemo upload for repository https://github.com/Public-Health-Bioinformatics/flu_classification_suite commit 561cde8c8bd4a6164b1bef19ecff9809ac3340e0
public-health-bioinformatics
parents:
diff
changeset
|
25 |
89deee583d6d
planemo upload for repository https://github.com/Public-Health-Bioinformatics/flu_classification_suite commit 561cde8c8bd4a6164b1bef19ecff9809ac3340e0
public-health-bioinformatics
parents:
diff
changeset
|
26 outFile= open(args.outFileHandle,'w') #open a writable, appendable output file |
89deee583d6d
planemo upload for repository https://github.com/Public-Health-Bioinformatics/flu_classification_suite commit 561cde8c8bd4a6164b1bef19ecff9809ac3340e0
public-health-bioinformatics
parents:
diff
changeset
|
27 localtime = time.asctime(time.localtime(time.time())) #date and time of analysis |
89deee583d6d
planemo upload for repository https://github.com/Public-Health-Bioinformatics/flu_classification_suite commit 561cde8c8bd4a6164b1bef19ecff9809ac3340e0
public-health-bioinformatics
parents:
diff
changeset
|
28 seqList = [] #list of aa sequence objects to parse for oligo sequences |
89deee583d6d
planemo upload for repository https://github.com/Public-Health-Bioinformatics/flu_classification_suite commit 561cde8c8bd4a6164b1bef19ecff9809ac3340e0
public-health-bioinformatics
parents:
diff
changeset
|
29 indexArray = [] # .csv list of aa's corresponding to antigenic site positions |
89deee583d6d
planemo upload for repository https://github.com/Public-Health-Bioinformatics/flu_classification_suite commit 561cde8c8bd4a6164b1bef19ecff9809ac3340e0
public-health-bioinformatics
parents:
diff
changeset
|
30 extractedSeqList = [] #list of extracted antigenic sites extracted from seqList |
89deee583d6d
planemo upload for repository https://github.com/Public-Health-Bioinformatics/flu_classification_suite commit 561cde8c8bd4a6164b1bef19ecff9809ac3340e0
public-health-bioinformatics
parents:
diff
changeset
|
31 |
89deee583d6d
planemo upload for repository https://github.com/Public-Health-Bioinformatics/flu_classification_suite commit 561cde8c8bd4a6164b1bef19ecff9809ac3340e0
public-health-bioinformatics
parents:
diff
changeset
|
32 def extract_aa_from_sequence(record): |
89deee583d6d
planemo upload for repository https://github.com/Public-Health-Bioinformatics/flu_classification_suite commit 561cde8c8bd4a6164b1bef19ecff9809ac3340e0
public-health-bioinformatics
parents:
diff
changeset
|
33 """Extract specific amino acids from SeqRecord, create new SeqRecord and append to list.""" |
89deee583d6d
planemo upload for repository https://github.com/Public-Health-Bioinformatics/flu_classification_suite commit 561cde8c8bd4a6164b1bef19ecff9809ac3340e0
public-health-bioinformatics
parents:
diff
changeset
|
34 original_sequence = str(record.seq) #pull out the SeqRecord's Seq object and ToString it |
89deee583d6d
planemo upload for repository https://github.com/Public-Health-Bioinformatics/flu_classification_suite commit 561cde8c8bd4a6164b1bef19ecff9809ac3340e0
public-health-bioinformatics
parents:
diff
changeset
|
35 new_sequence = "" #set variable to empty |
89deee583d6d
planemo upload for repository https://github.com/Public-Health-Bioinformatics/flu_classification_suite commit 561cde8c8bd4a6164b1bef19ecff9809ac3340e0
public-health-bioinformatics
parents:
diff
changeset
|
36 new_id = record.id #store the same sequence id as the original sequence |
89deee583d6d
planemo upload for repository https://github.com/Public-Health-Bioinformatics/flu_classification_suite commit 561cde8c8bd4a6164b1bef19ecff9809ac3340e0
public-health-bioinformatics
parents:
diff
changeset
|
37 #iterate over each position in index array, extract corresponding aa and add to string |
89deee583d6d
planemo upload for repository https://github.com/Public-Health-Bioinformatics/flu_classification_suite commit 561cde8c8bd4a6164b1bef19ecff9809ac3340e0
public-health-bioinformatics
parents:
diff
changeset
|
38 for pos in indexArray: |
89deee583d6d
planemo upload for repository https://github.com/Public-Health-Bioinformatics/flu_classification_suite commit 561cde8c8bd4a6164b1bef19ecff9809ac3340e0
public-health-bioinformatics
parents:
diff
changeset
|
39 char = original_sequence[pos-1] #aa positions must be zero indexed |
89deee583d6d
planemo upload for repository https://github.com/Public-Health-Bioinformatics/flu_classification_suite commit 561cde8c8bd4a6164b1bef19ecff9809ac3340e0
public-health-bioinformatics
parents:
diff
changeset
|
40 new_sequence = new_sequence + char |
89deee583d6d
planemo upload for repository https://github.com/Public-Health-Bioinformatics/flu_classification_suite commit 561cde8c8bd4a6164b1bef19ecff9809ac3340e0
public-health-bioinformatics
parents:
diff
changeset
|
41 rec = SeqRecord(Seq(new_sequence,IUPAC.protein), id = record.id, name = "", description = "") |
89deee583d6d
planemo upload for repository https://github.com/Public-Health-Bioinformatics/flu_classification_suite commit 561cde8c8bd4a6164b1bef19ecff9809ac3340e0
public-health-bioinformatics
parents:
diff
changeset
|
42 extractedSeqList.append(rec) #add new SeqRecord object to the list |
89deee583d6d
planemo upload for repository https://github.com/Public-Health-Bioinformatics/flu_classification_suite commit 561cde8c8bd4a6164b1bef19ecff9809ac3340e0
public-health-bioinformatics
parents:
diff
changeset
|
43 |
89deee583d6d
planemo upload for repository https://github.com/Public-Health-Bioinformatics/flu_classification_suite commit 561cde8c8bd4a6164b1bef19ecff9809ac3340e0
public-health-bioinformatics
parents:
diff
changeset
|
44 with open (args.inFileHandle2,'r') as inFile2: |
89deee583d6d
planemo upload for repository https://github.com/Public-Health-Bioinformatics/flu_classification_suite commit 561cde8c8bd4a6164b1bef19ecff9809ac3340e0
public-health-bioinformatics
parents:
diff
changeset
|
45 '''Open csv file containing amino acid positions to extract and add to list.''' |
89deee583d6d
planemo upload for repository https://github.com/Public-Health-Bioinformatics/flu_classification_suite commit 561cde8c8bd4a6164b1bef19ecff9809ac3340e0
public-health-bioinformatics
parents:
diff
changeset
|
46 #read items separated by comma's to position list |
89deee583d6d
planemo upload for repository https://github.com/Public-Health-Bioinformatics/flu_classification_suite commit 561cde8c8bd4a6164b1bef19ecff9809ac3340e0
public-health-bioinformatics
parents:
diff
changeset
|
47 positionList = "" |
89deee583d6d
planemo upload for repository https://github.com/Public-Health-Bioinformatics/flu_classification_suite commit 561cde8c8bd4a6164b1bef19ecff9809ac3340e0
public-health-bioinformatics
parents:
diff
changeset
|
48 for line in inFile2: |
89deee583d6d
planemo upload for repository https://github.com/Public-Health-Bioinformatics/flu_classification_suite commit 561cde8c8bd4a6164b1bef19ecff9809ac3340e0
public-health-bioinformatics
parents:
diff
changeset
|
49 #remove whitespace from the end of each line |
89deee583d6d
planemo upload for repository https://github.com/Public-Health-Bioinformatics/flu_classification_suite commit 561cde8c8bd4a6164b1bef19ecff9809ac3340e0
public-health-bioinformatics
parents:
diff
changeset
|
50 strippedLine = line.rstrip() |
89deee583d6d
planemo upload for repository https://github.com/Public-Health-Bioinformatics/flu_classification_suite commit 561cde8c8bd4a6164b1bef19ecff9809ac3340e0
public-health-bioinformatics
parents:
diff
changeset
|
51 #split the line at commas and assigned the returned list as indexArray |
89deee583d6d
planemo upload for repository https://github.com/Public-Health-Bioinformatics/flu_classification_suite commit 561cde8c8bd4a6164b1bef19ecff9809ac3340e0
public-health-bioinformatics
parents:
diff
changeset
|
52 positionList = strippedLine.split(',') |
89deee583d6d
planemo upload for repository https://github.com/Public-Health-Bioinformatics/flu_classification_suite commit 561cde8c8bd4a6164b1bef19ecff9809ac3340e0
public-health-bioinformatics
parents:
diff
changeset
|
53 #Convert string items in positionList from strings to int and add to indexArray |
89deee583d6d
planemo upload for repository https://github.com/Public-Health-Bioinformatics/flu_classification_suite commit 561cde8c8bd4a6164b1bef19ecff9809ac3340e0
public-health-bioinformatics
parents:
diff
changeset
|
54 for item in positionList: |
89deee583d6d
planemo upload for repository https://github.com/Public-Health-Bioinformatics/flu_classification_suite commit 561cde8c8bd4a6164b1bef19ecff9809ac3340e0
public-health-bioinformatics
parents:
diff
changeset
|
55 indexArray.append(int(item)) |
89deee583d6d
planemo upload for repository https://github.com/Public-Health-Bioinformatics/flu_classification_suite commit 561cde8c8bd4a6164b1bef19ecff9809ac3340e0
public-health-bioinformatics
parents:
diff
changeset
|
56 #print number of amino acids to extract and array to console as user check |
89deee583d6d
planemo upload for repository https://github.com/Public-Health-Bioinformatics/flu_classification_suite commit 561cde8c8bd4a6164b1bef19ecff9809ac3340e0
public-health-bioinformatics
parents:
diff
changeset
|
57 print "Amino Acid positions to extract: %i " %(len(indexArray)) |
89deee583d6d
planemo upload for repository https://github.com/Public-Health-Bioinformatics/flu_classification_suite commit 561cde8c8bd4a6164b1bef19ecff9809ac3340e0
public-health-bioinformatics
parents:
diff
changeset
|
58 print indexArray |
89deee583d6d
planemo upload for repository https://github.com/Public-Health-Bioinformatics/flu_classification_suite commit 561cde8c8bd4a6164b1bef19ecff9809ac3340e0
public-health-bioinformatics
parents:
diff
changeset
|
59 |
89deee583d6d
planemo upload for repository https://github.com/Public-Health-Bioinformatics/flu_classification_suite commit 561cde8c8bd4a6164b1bef19ecff9809ac3340e0
public-health-bioinformatics
parents:
diff
changeset
|
60 with open(args.inFileHandle1,'r') as inFile: |
89deee583d6d
planemo upload for repository https://github.com/Public-Health-Bioinformatics/flu_classification_suite commit 561cde8c8bd4a6164b1bef19ecff9809ac3340e0
public-health-bioinformatics
parents:
diff
changeset
|
61 '''Open fasta of amino acid sequences to parse, uppercase and add to protein Sequence list.''' |
89deee583d6d
planemo upload for repository https://github.com/Public-Health-Bioinformatics/flu_classification_suite commit 561cde8c8bd4a6164b1bef19ecff9809ac3340e0
public-health-bioinformatics
parents:
diff
changeset
|
62 #read in Sequences from fasta file, uppercase and add to seqList |
89deee583d6d
planemo upload for repository https://github.com/Public-Health-Bioinformatics/flu_classification_suite commit 561cde8c8bd4a6164b1bef19ecff9809ac3340e0
public-health-bioinformatics
parents:
diff
changeset
|
63 for record in SeqIO.parse(inFile, "fasta", alphabet=IUPAC.protein): |
89deee583d6d
planemo upload for repository https://github.com/Public-Health-Bioinformatics/flu_classification_suite commit 561cde8c8bd4a6164b1bef19ecff9809ac3340e0
public-health-bioinformatics
parents:
diff
changeset
|
64 record = record.upper() |
89deee583d6d
planemo upload for repository https://github.com/Public-Health-Bioinformatics/flu_classification_suite commit 561cde8c8bd4a6164b1bef19ecff9809ac3340e0
public-health-bioinformatics
parents:
diff
changeset
|
65 seqList.append(record) #add Seq to list of Sequences |
89deee583d6d
planemo upload for repository https://github.com/Public-Health-Bioinformatics/flu_classification_suite commit 561cde8c8bd4a6164b1bef19ecff9809ac3340e0
public-health-bioinformatics
parents:
diff
changeset
|
66 #print number of sequences to be process as user check |
89deee583d6d
planemo upload for repository https://github.com/Public-Health-Bioinformatics/flu_classification_suite commit 561cde8c8bd4a6164b1bef19ecff9809ac3340e0
public-health-bioinformatics
parents:
diff
changeset
|
67 print "\n%i flu sequences will be extracted for antigenic sites..." % len(seqList) |
89deee583d6d
planemo upload for repository https://github.com/Public-Health-Bioinformatics/flu_classification_suite commit 561cde8c8bd4a6164b1bef19ecff9809ac3340e0
public-health-bioinformatics
parents:
diff
changeset
|
68 #parse each target sequence object |
89deee583d6d
planemo upload for repository https://github.com/Public-Health-Bioinformatics/flu_classification_suite commit 561cde8c8bd4a6164b1bef19ecff9809ac3340e0
public-health-bioinformatics
parents:
diff
changeset
|
69 for record in seqList: |
89deee583d6d
planemo upload for repository https://github.com/Public-Health-Bioinformatics/flu_classification_suite commit 561cde8c8bd4a6164b1bef19ecff9809ac3340e0
public-health-bioinformatics
parents:
diff
changeset
|
70 extract_aa_from_sequence(record) |
89deee583d6d
planemo upload for repository https://github.com/Public-Health-Bioinformatics/flu_classification_suite commit 561cde8c8bd4a6164b1bef19ecff9809ac3340e0
public-health-bioinformatics
parents:
diff
changeset
|
71 |
89deee583d6d
planemo upload for repository https://github.com/Public-Health-Bioinformatics/flu_classification_suite commit 561cde8c8bd4a6164b1bef19ecff9809ac3340e0
public-health-bioinformatics
parents:
diff
changeset
|
72 #print original and extracted sequence |
89deee583d6d
planemo upload for repository https://github.com/Public-Health-Bioinformatics/flu_classification_suite commit 561cde8c8bd4a6164b1bef19ecff9809ac3340e0
public-health-bioinformatics
parents:
diff
changeset
|
73 for x in range(0, len(seqList)): |
89deee583d6d
planemo upload for repository https://github.com/Public-Health-Bioinformatics/flu_classification_suite commit 561cde8c8bd4a6164b1bef19ecff9809ac3340e0
public-health-bioinformatics
parents:
diff
changeset
|
74 print "Original %s: %i amino acids,\tExtracted: %i" % (seqList[x].id,len(seqList[x]),len(extractedSeqList[x])) |
89deee583d6d
planemo upload for repository https://github.com/Public-Health-Bioinformatics/flu_classification_suite commit 561cde8c8bd4a6164b1bef19ecff9809ac3340e0
public-health-bioinformatics
parents:
diff
changeset
|
75 |
89deee583d6d
planemo upload for repository https://github.com/Public-Health-Bioinformatics/flu_classification_suite commit 561cde8c8bd4a6164b1bef19ecff9809ac3340e0
public-health-bioinformatics
parents:
diff
changeset
|
76 #determine if output format is fasta (default) or csv |
89deee583d6d
planemo upload for repository https://github.com/Public-Health-Bioinformatics/flu_classification_suite commit 561cde8c8bd4a6164b1bef19ecff9809ac3340e0
public-health-bioinformatics
parents:
diff
changeset
|
77 if args.csv: |
89deee583d6d
planemo upload for repository https://github.com/Public-Health-Bioinformatics/flu_classification_suite commit 561cde8c8bd4a6164b1bef19ecff9809ac3340e0
public-health-bioinformatics
parents:
diff
changeset
|
78 #write csv file of extracted antigenic sits |
89deee583d6d
planemo upload for repository https://github.com/Public-Health-Bioinformatics/flu_classification_suite commit 561cde8c8bd4a6164b1bef19ecff9809ac3340e0
public-health-bioinformatics
parents:
diff
changeset
|
79 for record in extractedSeqList: |
89deee583d6d
planemo upload for repository https://github.com/Public-Health-Bioinformatics/flu_classification_suite commit 561cde8c8bd4a6164b1bef19ecff9809ac3340e0
public-health-bioinformatics
parents:
diff
changeset
|
80 #outFile.write(record.id),"," |
89deee583d6d
planemo upload for repository https://github.com/Public-Health-Bioinformatics/flu_classification_suite commit 561cde8c8bd4a6164b1bef19ecff9809ac3340e0
public-health-bioinformatics
parents:
diff
changeset
|
81 name_part = (record.id).rstrip() + ',' |
89deee583d6d
planemo upload for repository https://github.com/Public-Health-Bioinformatics/flu_classification_suite commit 561cde8c8bd4a6164b1bef19ecff9809ac3340e0
public-health-bioinformatics
parents:
diff
changeset
|
82 sequence = str(record.seq).strip() |
89deee583d6d
planemo upload for repository https://github.com/Public-Health-Bioinformatics/flu_classification_suite commit 561cde8c8bd4a6164b1bef19ecff9809ac3340e0
public-health-bioinformatics
parents:
diff
changeset
|
83 csv_seq = ",".join(sequence) |
89deee583d6d
planemo upload for repository https://github.com/Public-Health-Bioinformatics/flu_classification_suite commit 561cde8c8bd4a6164b1bef19ecff9809ac3340e0
public-health-bioinformatics
parents:
diff
changeset
|
84 comma_separated_sequence = name_part + csv_seq + "\n" |
89deee583d6d
planemo upload for repository https://github.com/Public-Health-Bioinformatics/flu_classification_suite commit 561cde8c8bd4a6164b1bef19ecff9809ac3340e0
public-health-bioinformatics
parents:
diff
changeset
|
85 print comma_separated_sequence |
89deee583d6d
planemo upload for repository https://github.com/Public-Health-Bioinformatics/flu_classification_suite commit 561cde8c8bd4a6164b1bef19ecff9809ac3340e0
public-health-bioinformatics
parents:
diff
changeset
|
86 outFile.write(comma_separated_sequence) |
89deee583d6d
planemo upload for repository https://github.com/Public-Health-Bioinformatics/flu_classification_suite commit 561cde8c8bd4a6164b1bef19ecff9809ac3340e0
public-health-bioinformatics
parents:
diff
changeset
|
87 else: |
89deee583d6d
planemo upload for repository https://github.com/Public-Health-Bioinformatics/flu_classification_suite commit 561cde8c8bd4a6164b1bef19ecff9809ac3340e0
public-health-bioinformatics
parents:
diff
changeset
|
88 #write fasta file of extracted antigenic sites |
89deee583d6d
planemo upload for repository https://github.com/Public-Health-Bioinformatics/flu_classification_suite commit 561cde8c8bd4a6164b1bef19ecff9809ac3340e0
public-health-bioinformatics
parents:
diff
changeset
|
89 SeqIO.write(extractedSeqList,outFile,"fasta") |
89deee583d6d
planemo upload for repository https://github.com/Public-Health-Bioinformatics/flu_classification_suite commit 561cde8c8bd4a6164b1bef19ecff9809ac3340e0
public-health-bioinformatics
parents:
diff
changeset
|
90 |
89deee583d6d
planemo upload for repository https://github.com/Public-Health-Bioinformatics/flu_classification_suite commit 561cde8c8bd4a6164b1bef19ecff9809ac3340e0
public-health-bioinformatics
parents:
diff
changeset
|
91 print("\n%i Sequences Extracted to Output file: %s" % ((len(extractedSeqList),args.outFileHandle))) |
89deee583d6d
planemo upload for repository https://github.com/Public-Health-Bioinformatics/flu_classification_suite commit 561cde8c8bd4a6164b1bef19ecff9809ac3340e0
public-health-bioinformatics
parents:
diff
changeset
|
92 inFile.close() |
89deee583d6d
planemo upload for repository https://github.com/Public-Health-Bioinformatics/flu_classification_suite commit 561cde8c8bd4a6164b1bef19ecff9809ac3340e0
public-health-bioinformatics
parents:
diff
changeset
|
93 inFile2.close() |
89deee583d6d
planemo upload for repository https://github.com/Public-Health-Bioinformatics/flu_classification_suite commit 561cde8c8bd4a6164b1bef19ecff9809ac3340e0
public-health-bioinformatics
parents:
diff
changeset
|
94 outFile.close() |
89deee583d6d
planemo upload for repository https://github.com/Public-Health-Bioinformatics/flu_classification_suite commit 561cde8c8bd4a6164b1bef19ecff9809ac3340e0
public-health-bioinformatics
parents:
diff
changeset
|
95 |