annotate libs/special_gene_test_assemblies.py @ 2:381e1e7109fc draft default tip

planemo upload commit 464b391afaa5819bc681452e85bea9d882730eb6-dirty
author charles_s_test
date Sun, 12 Nov 2017 02:26:17 -0500
parents 8db411fab3e1
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
8db411fab3e1 planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff changeset
1 #just an possible use, we can use it to replace H**.py, treat fliC and fljB as the target genes?
8db411fab3e1 planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff changeset
2
8db411fab3e1 planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff changeset
3 from __future__ import division
8db411fab3e1 planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff changeset
4 import sys
8db411fab3e1 planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff changeset
5 import os
8db411fab3e1 planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff changeset
6 from Bio.Blast import NCBIXML
8db411fab3e1 planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff changeset
7 from Bio import SeqIO
8db411fab3e1 planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff changeset
8
8db411fab3e1 planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff changeset
9 Makebltdb="/nfs/sw/apps/blast/ncbi-blast-2.6.0+/bin/makeblastdb"
8db411fab3e1 planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff changeset
10 Blastnpth="/nfs/sw/apps/blast/ncbi-blast-2.6.0+/bin/blastn"
8db411fab3e1 planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff changeset
11
8db411fab3e1 planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff changeset
12 def special_gene(target_fie,database,gene_list):
8db411fab3e1 planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff changeset
13 database=database.split("/")[-1]##########1/27/2015
8db411fab3e1 planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff changeset
14 os.system(Makebltdb+' -in database/'+database+' -out '+database+'_db -dbtype nucl')##########1/28/2015
8db411fab3e1 planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff changeset
15 os.system(Blastnpth+' -query '+target_file+' -db '+database+'_db -out '+database+'_vs_'+target_file+'.xml '+'-outfmt 5')##########1/28/2015
8db411fab3e1 planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff changeset
16 xml_file=database+'_vs_'+target_file+'.xml'
8db411fab3e1 planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff changeset
17 result_handle=open(xml_file)
8db411fab3e1 planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff changeset
18 blast_record=NCBIXML.parse(result_handle)
8db411fab3e1 planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff changeset
19 records=list(blast_record)
8db411fab3e1 planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff changeset
20 E_thresh=1e-10
8db411fab3e1 planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff changeset
21 for x in gene_list:
8db411fab3e1 planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff changeset
22 handle=SeqIO.parse("database/"+database,"fasta")##########1/28/2015
8db411fab3e1 planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff changeset
23 length_list=[]
8db411fab3e1 planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff changeset
24 for y in handle:
8db411fab3e1 planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff changeset
25 if x in y.description:
8db411fab3e1 planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff changeset
26 length_x=len(y.seq)
8db411fab3e1 planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff changeset
27 length_list.append(length_x)
8db411fab3e1 planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff changeset
28 aver_len=float(sum(length_list))/len(length_list)
8db411fab3e1 planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff changeset
29 hspbit=[]
8db411fab3e1 planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff changeset
30 alignmentlist=[]
8db411fab3e1 planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff changeset
31 for record in records:
8db411fab3e1 planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff changeset
32 for alignment in record.alignments:
8db411fab3e1 planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff changeset
33 if x in alignment.hit_def: #multi gene database, so...
8db411fab3e1 planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff changeset
34 print x,"got a hit, evaluating the hit quality..."
8db411fab3e1 planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff changeset
35 score=0
8db411fab3e1 planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff changeset
36 for hsp in alignment.hsps:
8db411fab3e1 planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff changeset
37 if hsp.expect<E_thresh:
8db411fab3e1 planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff changeset
38 score+=hsp.bits
8db411fab3e1 planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff changeset
39 alignment=alignment.hit_def+':'+str(score)
8db411fab3e1 planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff changeset
40 hspbit.append(score)
8db411fab3e1 planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff changeset
41 alignmentlist.append(alignment)
8db411fab3e1 planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff changeset
42 scorelist=dict(zip(alignmentlist,hspbit))
8db411fab3e1 planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff changeset
43 score=0
8db411fab3e1 planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff changeset
44 for Htype in scorelist:
8db411fab3e1 planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff changeset
45 if scorelist[Htype]>score:
8db411fab3e1 planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff changeset
46 First_Choice=Htype
8db411fab3e1 planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff changeset
47 score=scorelist[Htype]
8db411fab3e1 planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff changeset
48 if float(score)>=0.1*aver_len:
8db411fab3e1 planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff changeset
49 print "$$$",First_Choice,"got a hit, score:",score
8db411fab3e1 planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff changeset
50 else:
8db411fab3e1 planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff changeset
51 print "$$$No ",x,"exists"
8db411fab3e1 planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff changeset
52 os.system("rm "+database+"_db.*")##########1/28/2015
8db411fab3e1 planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff changeset
53 os.system("rm "+xml_file)##########1/28/2015
8db411fab3e1 planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff changeset
54
8db411fab3e1 planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff changeset
55 database=sys.argv[1]
8db411fab3e1 planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff changeset
56 target_file=sys.argv[2]
8db411fab3e1 planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff changeset
57 gene_list=[]
8db411fab3e1 planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff changeset
58 a=1
8db411fab3e1 planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff changeset
59 i=3
8db411fab3e1 planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff changeset
60 while a==1:
8db411fab3e1 planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff changeset
61 try:
8db411fab3e1 planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff changeset
62 gene_list.append(sys.argv[i])
8db411fab3e1 planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff changeset
63 i+=1
8db411fab3e1 planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff changeset
64 except:
8db411fab3e1 planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff changeset
65 a=0
8db411fab3e1 planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff changeset
66 special_gene(target_file,database,gene_list)