Mercurial > repos > charles_s_test > seqsero2
annotate libs/special_gene_test_assemblies.py @ 16:ed79f56d2184 draft default tip
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
author | charles_s_test |
---|---|
date | Thu, 30 Nov 2017 16:58:25 -0500 |
parents | 53efef402c51 |
children |
rev | line source |
---|---|
0
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
1 #just an possible use, we can use it to replace H**.py, treat fliC and fljB as the target genes? |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
2 |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
3 from __future__ import division |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
4 import sys |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
5 import os |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
6 from Bio.Blast import NCBIXML |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
7 from Bio import SeqIO |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
8 |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
9 Makebltdb="/nfs/sw/apps/blast/ncbi-blast-2.6.0+/bin/makeblastdb" |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
10 Blastnpth="/nfs/sw/apps/blast/ncbi-blast-2.6.0+/bin/blastn" |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
11 |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
12 def special_gene(target_fie,database,gene_list): |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
13 database=database.split("/")[-1]##########1/27/2015 |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
14 os.system(Makebltdb+' -in database/'+database+' -out '+database+'_db -dbtype nucl')##########1/28/2015 |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
15 os.system(Blastnpth+' -query '+target_file+' -db '+database+'_db -out '+database+'_vs_'+target_file+'.xml '+'-outfmt 5')##########1/28/2015 |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
16 xml_file=database+'_vs_'+target_file+'.xml' |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
17 result_handle=open(xml_file) |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
18 blast_record=NCBIXML.parse(result_handle) |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
19 records=list(blast_record) |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
20 E_thresh=1e-10 |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
21 for x in gene_list: |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
22 handle=SeqIO.parse("database/"+database,"fasta")##########1/28/2015 |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
23 length_list=[] |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
24 for y in handle: |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
25 if x in y.description: |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
26 length_x=len(y.seq) |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
27 length_list.append(length_x) |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
28 aver_len=float(sum(length_list))/len(length_list) |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
29 hspbit=[] |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
30 alignmentlist=[] |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
31 for record in records: |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
32 for alignment in record.alignments: |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
33 if x in alignment.hit_def: #multi gene database, so... |
10
53efef402c51
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
7
diff
changeset
|
34 print x,"got a hit, evaluating the hit quality..." |
0
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
35 score=0 |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
36 for hsp in alignment.hsps: |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
37 if hsp.expect<E_thresh: |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
38 score+=hsp.bits |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
39 alignment=alignment.hit_def+':'+str(score) |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
40 hspbit.append(score) |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
41 alignmentlist.append(alignment) |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
42 scorelist=dict(zip(alignmentlist,hspbit)) |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
43 score=0 |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
44 for Htype in scorelist: |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
45 if scorelist[Htype]>score: |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
46 First_Choice=Htype |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
47 score=scorelist[Htype] |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
48 if float(score)>=0.1*aver_len: |
10
53efef402c51
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
7
diff
changeset
|
49 print "$$$",First_Choice,"got a hit, score:",score |
0
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
50 else: |
10
53efef402c51
planemo upload commit a4fb57231f274270afbfebd47f67df05babffa4a-dirty
charles_s_test
parents:
7
diff
changeset
|
51 print "$$$No ",x,"exists" |
0
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
52 os.system("rm "+database+"_db.*")##########1/28/2015 |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
53 os.system("rm "+xml_file)##########1/28/2015 |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
54 |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
55 database=sys.argv[1] |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
56 target_file=sys.argv[2] |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
57 gene_list=[] |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
58 a=1 |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
59 i=3 |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
60 while a==1: |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
61 try: |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
62 gene_list.append(sys.argv[i]) |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
63 i+=1 |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
64 except: |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
65 a=0 |
6895de35a263
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
charles_s_test
parents:
diff
changeset
|
66 special_gene(target_file,database,gene_list) |