annotate OTUtable_addblast.py @ 1:4afa63644ac3 draft default tip

Uploaded
author saskia-hiltemann
date Mon, 09 Nov 2015 09:50:15 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
4afa63644ac3 Uploaded
saskia-hiltemann
parents:
diff changeset
1 import requests
4afa63644ac3 Uploaded
saskia-hiltemann
parents:
diff changeset
2 import time
4afa63644ac3 Uploaded
saskia-hiltemann
parents:
diff changeset
3 import sys
4afa63644ac3 Uploaded
saskia-hiltemann
parents:
diff changeset
4
4afa63644ac3 Uploaded
saskia-hiltemann
parents:
diff changeset
5 baseurl="http://www.ncbi.nlm.nih.gov/blast/Blast.cgi"
4afa63644ac3 Uploaded
saskia-hiltemann
parents:
diff changeset
6
4afa63644ac3 Uploaded
saskia-hiltemann
parents:
diff changeset
7 OTUfile=sys.argv[0]
4afa63644ac3 Uploaded
saskia-hiltemann
parents:
diff changeset
8 BLASTfile=sys.argv[1]
4afa63644ac3 Uploaded
saskia-hiltemann
parents:
diff changeset
9 fastafile=sys.argv[2]
4afa63644ac3 Uploaded
saskia-hiltemann
parents:
diff changeset
10
4afa63644ac3 Uploaded
saskia-hiltemann
parents:
diff changeset
11
4afa63644ac3 Uploaded
saskia-hiltemann
parents:
diff changeset
12 def make_url(seq):
4afa63644ac3 Uploaded
saskia-hiltemann
parents:
diff changeset
13 return baseurl+"?DATABASE=nr&PERC_IDENT=97&EXCLUDE_SEQ_UNCULT=on&HITLIST_SIZE=10&FILTER=L&FILTER=m&FILTER=R&EXPECT=10&FORMAT_TYPE=HTML&PROGRAM=blastn&CLIENT=web&SERVICE=megablast&PAGE=Nucleotides&CMD=Put&QUERY="+seq.lower()
4afa63644ac3 Uploaded
saskia-hiltemann
parents:
diff changeset
14
4afa63644ac3 Uploaded
saskia-hiltemann
parents:
diff changeset
15 def make_RIDlink(RID):
4afa63644ac3 Uploaded
saskia-hiltemann
parents:
diff changeset
16 return "<a target=\"_blank\" href=\""+baseurl+"?CMD=Get&RID="+RID+"\">view results</a>"
4afa63644ac3 Uploaded
saskia-hiltemann
parents:
diff changeset
17
4afa63644ac3 Uploaded
saskia-hiltemann
parents:
diff changeset
18 def make_rerun_link(seq):
4afa63644ac3 Uploaded
saskia-hiltemann
parents:
diff changeset
19 return "<a target=\"_blank\" href=\""+baseurl+"?DATABASE=nr&HITLIST_SIZE=10&EXCLUDE_SEQ_UNCULT=true&FILTER=L&EXPECT=10&FORMAT_TYPE=HTML&PROGRAM=blastn&CLIENT=web&SERVICE=megablast&PAGE=Nucleotides&CMD=Put&QUERY="+seq.lower()+"\">resubmit query</a>"
4afa63644ac3 Uploaded
saskia-hiltemann
parents:
diff changeset
20
4afa63644ac3 Uploaded
saskia-hiltemann
parents:
diff changeset
21
4afa63644ac3 Uploaded
saskia-hiltemann
parents:
diff changeset
22
4afa63644ac3 Uploaded
saskia-hiltemann
parents:
diff changeset
23 ### for each fasta sequence create blast search
4afa63644ac3 Uploaded
saskia-hiltemann
parents:
diff changeset
24 sequences = [line.rstrip('\n').replace('-','') for line in open(fastafile) if '>' not in line]
4afa63644ac3 Uploaded
saskia-hiltemann
parents:
diff changeset
25 urls = [make_url(seq) for seq in sequences]
4afa63644ac3 Uploaded
saskia-hiltemann
parents:
diff changeset
26
4afa63644ac3 Uploaded
saskia-hiltemann
parents:
diff changeset
27 RIDs = []
4afa63644ac3 Uploaded
saskia-hiltemann
parents:
diff changeset
28 for url in urls:
4afa63644ac3 Uploaded
saskia-hiltemann
parents:
diff changeset
29 r=requests.get(url)
4afa63644ac3 Uploaded
saskia-hiltemann
parents:
diff changeset
30 RID = r.text[r.text.find("RID"):r.text.find("RTOE")]
4afa63644ac3 Uploaded
saskia-hiltemann
parents:
diff changeset
31 RID = RID[6:-3].lstrip().rstrip()
4afa63644ac3 Uploaded
saskia-hiltemann
parents:
diff changeset
32 RIDs.append(RID)
4afa63644ac3 Uploaded
saskia-hiltemann
parents:
diff changeset
33 print "Submitted request, RID: "+ RID
4afa63644ac3 Uploaded
saskia-hiltemann
parents:
diff changeset
34 time.sleep(3) # be nice to the server
4afa63644ac3 Uploaded
saskia-hiltemann
parents:
diff changeset
35
4afa63644ac3 Uploaded
saskia-hiltemann
parents:
diff changeset
36
4afa63644ac3 Uploaded
saskia-hiltemann
parents:
diff changeset
37
4afa63644ac3 Uploaded
saskia-hiltemann
parents:
diff changeset
38 ### Get top hits from local BLAST results file, add to OTUtable file
4afa63644ac3 Uploaded
saskia-hiltemann
parents:
diff changeset
39 blastf = open(BLASTfile, "r")
4afa63644ac3 Uploaded
saskia-hiltemann
parents:
diff changeset
40 otuf = open(OTUfile, "r")
4afa63644ac3 Uploaded
saskia-hiltemann
parents:
diff changeset
41 outfile = open("newtable.tsv","w+")
4afa63644ac3 Uploaded
saskia-hiltemann
parents:
diff changeset
42
4afa63644ac3 Uploaded
saskia-hiltemann
parents:
diff changeset
43 linenum=0
4afa63644ac3 Uploaded
saskia-hiltemann
parents:
diff changeset
44 for line in otuf:
4afa63644ac3 Uploaded
saskia-hiltemann
parents:
diff changeset
45 if linenum == 0:
4afa63644ac3 Uploaded
saskia-hiltemann
parents:
diff changeset
46 outfile.write( line.rstrip()+"\tBLAST Top Hit\n" )
4afa63644ac3 Uploaded
saskia-hiltemann
parents:
diff changeset
47 else:
4afa63644ac3 Uploaded
saskia-hiltemann
parents:
diff changeset
48 outfile.write( line.rstrip() +"\t"+ blastf.readline().strip().split("\t")[-1]+"\n" )
4afa63644ac3 Uploaded
saskia-hiltemann
parents:
diff changeset
49 linenum +=1
4afa63644ac3 Uploaded
saskia-hiltemann
parents:
diff changeset
50
4afa63644ac3 Uploaded
saskia-hiltemann
parents:
diff changeset
51 blastf.close()
4afa63644ac3 Uploaded
saskia-hiltemann
parents:
diff changeset
52 otuf.close()
4afa63644ac3 Uploaded
saskia-hiltemann
parents:
diff changeset
53 outfile.close()
4afa63644ac3 Uploaded
saskia-hiltemann
parents:
diff changeset
54
4afa63644ac3 Uploaded
saskia-hiltemann
parents:
diff changeset
55
4afa63644ac3 Uploaded
saskia-hiltemann
parents:
diff changeset
56
4afa63644ac3 Uploaded
saskia-hiltemann
parents:
diff changeset
57 ### Add RID link and rerun link to table
4afa63644ac3 Uploaded
saskia-hiltemann
parents:
diff changeset
58 otuf = open("newtable.tsv","r")
4afa63644ac3 Uploaded
saskia-hiltemann
parents:
diff changeset
59 outfile = open("newtable2.tsv","w+")
4afa63644ac3 Uploaded
saskia-hiltemann
parents:
diff changeset
60
4afa63644ac3 Uploaded
saskia-hiltemann
parents:
diff changeset
61 print len(sequences)
4afa63644ac3 Uploaded
saskia-hiltemann
parents:
diff changeset
62 print len (RIDs)
4afa63644ac3 Uploaded
saskia-hiltemann
parents:
diff changeset
63 linenum=-1
4afa63644ac3 Uploaded
saskia-hiltemann
parents:
diff changeset
64 for line in otuf:
4afa63644ac3 Uploaded
saskia-hiltemann
parents:
diff changeset
65 if linenum == -1:
4afa63644ac3 Uploaded
saskia-hiltemann
parents:
diff changeset
66 outfile.write( line.rstrip()+"\tBLAST result\tBLAST resubmit\n" )
4afa63644ac3 Uploaded
saskia-hiltemann
parents:
diff changeset
67 else:
4afa63644ac3 Uploaded
saskia-hiltemann
parents:
diff changeset
68 outfile.write( line.rstrip() +"\t"+ make_RIDlink(RIDs[linenum]) + "\t" + make_rerun_link(sequences[linenum])+"\n" )
4afa63644ac3 Uploaded
saskia-hiltemann
parents:
diff changeset
69 linenum +=1
4afa63644ac3 Uploaded
saskia-hiltemann
parents:
diff changeset
70
4afa63644ac3 Uploaded
saskia-hiltemann
parents:
diff changeset
71