Mercurial > repos > saskia-hiltemann > testrepo
comparison OTUtable_addblast.py @ 1:4afa63644ac3 draft default tip
Uploaded
| author | saskia-hiltemann |
|---|---|
| date | Mon, 09 Nov 2015 09:50:15 -0500 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 0:a8b089f5a429 | 1:4afa63644ac3 |
|---|---|
| 1 import requests | |
| 2 import time | |
| 3 import sys | |
| 4 | |
| 5 baseurl="http://www.ncbi.nlm.nih.gov/blast/Blast.cgi" | |
| 6 | |
| 7 OTUfile=sys.argv[0] | |
| 8 BLASTfile=sys.argv[1] | |
| 9 fastafile=sys.argv[2] | |
| 10 | |
| 11 | |
| 12 def make_url(seq): | |
| 13 return baseurl+"?DATABASE=nr&PERC_IDENT=97&EXCLUDE_SEQ_UNCULT=on&HITLIST_SIZE=10&FILTER=L&FILTER=m&FILTER=R&EXPECT=10&FORMAT_TYPE=HTML&PROGRAM=blastn&CLIENT=web&SERVICE=megablast&PAGE=Nucleotides&CMD=Put&QUERY="+seq.lower() | |
| 14 | |
| 15 def make_RIDlink(RID): | |
| 16 return "<a target=\"_blank\" href=\""+baseurl+"?CMD=Get&RID="+RID+"\">view results</a>" | |
| 17 | |
| 18 def make_rerun_link(seq): | |
| 19 return "<a target=\"_blank\" href=\""+baseurl+"?DATABASE=nr&HITLIST_SIZE=10&EXCLUDE_SEQ_UNCULT=true&FILTER=L&EXPECT=10&FORMAT_TYPE=HTML&PROGRAM=blastn&CLIENT=web&SERVICE=megablast&PAGE=Nucleotides&CMD=Put&QUERY="+seq.lower()+"\">resubmit query</a>" | |
| 20 | |
| 21 | |
| 22 | |
| 23 ### for each fasta sequence create blast search | |
| 24 sequences = [line.rstrip('\n').replace('-','') for line in open(fastafile) if '>' not in line] | |
| 25 urls = [make_url(seq) for seq in sequences] | |
| 26 | |
| 27 RIDs = [] | |
| 28 for url in urls: | |
| 29 r=requests.get(url) | |
| 30 RID = r.text[r.text.find("RID"):r.text.find("RTOE")] | |
| 31 RID = RID[6:-3].lstrip().rstrip() | |
| 32 RIDs.append(RID) | |
| 33 print "Submitted request, RID: "+ RID | |
| 34 time.sleep(3) # be nice to the server | |
| 35 | |
| 36 | |
| 37 | |
| 38 ### Get top hits from local BLAST results file, add to OTUtable file | |
| 39 blastf = open(BLASTfile, "r") | |
| 40 otuf = open(OTUfile, "r") | |
| 41 outfile = open("newtable.tsv","w+") | |
| 42 | |
| 43 linenum=0 | |
| 44 for line in otuf: | |
| 45 if linenum == 0: | |
| 46 outfile.write( line.rstrip()+"\tBLAST Top Hit\n" ) | |
| 47 else: | |
| 48 outfile.write( line.rstrip() +"\t"+ blastf.readline().strip().split("\t")[-1]+"\n" ) | |
| 49 linenum +=1 | |
| 50 | |
| 51 blastf.close() | |
| 52 otuf.close() | |
| 53 outfile.close() | |
| 54 | |
| 55 | |
| 56 | |
| 57 ### Add RID link and rerun link to table | |
| 58 otuf = open("newtable.tsv","r") | |
| 59 outfile = open("newtable2.tsv","w+") | |
| 60 | |
| 61 print len(sequences) | |
| 62 print len (RIDs) | |
| 63 linenum=-1 | |
| 64 for line in otuf: | |
| 65 if linenum == -1: | |
| 66 outfile.write( line.rstrip()+"\tBLAST result\tBLAST resubmit\n" ) | |
| 67 else: | |
| 68 outfile.write( line.rstrip() +"\t"+ make_RIDlink(RIDs[linenum]) + "\t" + make_rerun_link(sequences[linenum])+"\n" ) | |
| 69 linenum +=1 | |
| 70 | |
| 71 |
