|
1
|
1 import requests
|
|
|
2 import time
|
|
|
3 import sys
|
|
|
4
|
|
|
5 baseurl="http://www.ncbi.nlm.nih.gov/blast/Blast.cgi"
|
|
|
6
|
|
|
7 OTUfile=sys.argv[0]
|
|
|
8 BLASTfile=sys.argv[1]
|
|
|
9 fastafile=sys.argv[2]
|
|
|
10
|
|
|
11
|
|
|
12 def make_url(seq):
|
|
|
13 return baseurl+"?DATABASE=nr&PERC_IDENT=97&EXCLUDE_SEQ_UNCULT=on&HITLIST_SIZE=10&FILTER=L&FILTER=m&FILTER=R&EXPECT=10&FORMAT_TYPE=HTML&PROGRAM=blastn&CLIENT=web&SERVICE=megablast&PAGE=Nucleotides&CMD=Put&QUERY="+seq.lower()
|
|
|
14
|
|
|
15 def make_RIDlink(RID):
|
|
|
16 return "<a target=\"_blank\" href=\""+baseurl+"?CMD=Get&RID="+RID+"\">view results</a>"
|
|
|
17
|
|
|
18 def make_rerun_link(seq):
|
|
|
19 return "<a target=\"_blank\" href=\""+baseurl+"?DATABASE=nr&HITLIST_SIZE=10&EXCLUDE_SEQ_UNCULT=true&FILTER=L&EXPECT=10&FORMAT_TYPE=HTML&PROGRAM=blastn&CLIENT=web&SERVICE=megablast&PAGE=Nucleotides&CMD=Put&QUERY="+seq.lower()+"\">resubmit query</a>"
|
|
|
20
|
|
|
21
|
|
|
22
|
|
|
23 ### for each fasta sequence create blast search
|
|
|
24 sequences = [line.rstrip('\n').replace('-','') for line in open(fastafile) if '>' not in line]
|
|
|
25 urls = [make_url(seq) for seq in sequences]
|
|
|
26
|
|
|
27 RIDs = []
|
|
|
28 for url in urls:
|
|
|
29 r=requests.get(url)
|
|
|
30 RID = r.text[r.text.find("RID"):r.text.find("RTOE")]
|
|
|
31 RID = RID[6:-3].lstrip().rstrip()
|
|
|
32 RIDs.append(RID)
|
|
|
33 print "Submitted request, RID: "+ RID
|
|
|
34 time.sleep(3) # be nice to the server
|
|
|
35
|
|
|
36
|
|
|
37
|
|
|
38 ### Get top hits from local BLAST results file, add to OTUtable file
|
|
|
39 blastf = open(BLASTfile, "r")
|
|
|
40 otuf = open(OTUfile, "r")
|
|
|
41 outfile = open("newtable.tsv","w+")
|
|
|
42
|
|
|
43 linenum=0
|
|
|
44 for line in otuf:
|
|
|
45 if linenum == 0:
|
|
|
46 outfile.write( line.rstrip()+"\tBLAST Top Hit\n" )
|
|
|
47 else:
|
|
|
48 outfile.write( line.rstrip() +"\t"+ blastf.readline().strip().split("\t")[-1]+"\n" )
|
|
|
49 linenum +=1
|
|
|
50
|
|
|
51 blastf.close()
|
|
|
52 otuf.close()
|
|
|
53 outfile.close()
|
|
|
54
|
|
|
55
|
|
|
56
|
|
|
57 ### Add RID link and rerun link to table
|
|
|
58 otuf = open("newtable.tsv","r")
|
|
|
59 outfile = open("newtable2.tsv","w+")
|
|
|
60
|
|
|
61 print len(sequences)
|
|
|
62 print len (RIDs)
|
|
|
63 linenum=-1
|
|
|
64 for line in otuf:
|
|
|
65 if linenum == -1:
|
|
|
66 outfile.write( line.rstrip()+"\tBLAST result\tBLAST resubmit\n" )
|
|
|
67 else:
|
|
|
68 outfile.write( line.rstrip() +"\t"+ make_RIDlink(RIDs[linenum]) + "\t" + make_rerun_link(sequences[linenum])+"\n" )
|
|
|
69 linenum +=1
|
|
|
70
|
|
|
71
|