Mercurial > repos > saskia-hiltemann > testrepo
view OTUtable_addblast.py @ 1:4afa63644ac3 draft default tip
Uploaded
| author | saskia-hiltemann |
|---|---|
| date | Mon, 09 Nov 2015 09:50:15 -0500 |
| parents | |
| children |
line wrap: on
line source
import requests import time import sys baseurl="http://www.ncbi.nlm.nih.gov/blast/Blast.cgi" OTUfile=sys.argv[0] BLASTfile=sys.argv[1] fastafile=sys.argv[2] def make_url(seq): return baseurl+"?DATABASE=nr&PERC_IDENT=97&EXCLUDE_SEQ_UNCULT=on&HITLIST_SIZE=10&FILTER=L&FILTER=m&FILTER=R&EXPECT=10&FORMAT_TYPE=HTML&PROGRAM=blastn&CLIENT=web&SERVICE=megablast&PAGE=Nucleotides&CMD=Put&QUERY="+seq.lower() def make_RIDlink(RID): return "<a target=\"_blank\" href=\""+baseurl+"?CMD=Get&RID="+RID+"\">view results</a>" def make_rerun_link(seq): return "<a target=\"_blank\" href=\""+baseurl+"?DATABASE=nr&HITLIST_SIZE=10&EXCLUDE_SEQ_UNCULT=true&FILTER=L&EXPECT=10&FORMAT_TYPE=HTML&PROGRAM=blastn&CLIENT=web&SERVICE=megablast&PAGE=Nucleotides&CMD=Put&QUERY="+seq.lower()+"\">resubmit query</a>" ### for each fasta sequence create blast search sequences = [line.rstrip('\n').replace('-','') for line in open(fastafile) if '>' not in line] urls = [make_url(seq) for seq in sequences] RIDs = [] for url in urls: r=requests.get(url) RID = r.text[r.text.find("RID"):r.text.find("RTOE")] RID = RID[6:-3].lstrip().rstrip() RIDs.append(RID) print "Submitted request, RID: "+ RID time.sleep(3) # be nice to the server ### Get top hits from local BLAST results file, add to OTUtable file blastf = open(BLASTfile, "r") otuf = open(OTUfile, "r") outfile = open("newtable.tsv","w+") linenum=0 for line in otuf: if linenum == 0: outfile.write( line.rstrip()+"\tBLAST Top Hit\n" ) else: outfile.write( line.rstrip() +"\t"+ blastf.readline().strip().split("\t")[-1]+"\n" ) linenum +=1 blastf.close() otuf.close() outfile.close() ### Add RID link and rerun link to table otuf = open("newtable.tsv","r") outfile = open("newtable2.tsv","w+") print len(sequences) print len (RIDs) linenum=-1 for line in otuf: if linenum == -1: outfile.write( line.rstrip()+"\tBLAST result\tBLAST resubmit\n" ) else: outfile.write( line.rstrip() +"\t"+ make_RIDlink(RIDs[linenum]) + "\t" + make_rerun_link(sequences[linenum])+"\n" ) linenum +=1
