comparison libs/deletion_compare.py @ 0:6895de35a263 draft

planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
author charles_s_test
date Thu, 19 Oct 2017 18:16:51 -0400
parents
children 0d65b71ff8df
comparison
equal deleted inserted replaced
-1:000000000000 0:6895de35a263
1
2 import os
3 from Bio import SeqIO
4 import sys
5 from Initial_functions import Uniq
6 from Bio.Blast import NCBIXML
7
8 BwaPath="/nfs/sw/apps/bwa/bwa-0.7.15/bwa"
9 SamTlsPth="/nfs/sw/apps/samtools/samtools-1.3.1/bin/samtools"
10 Makebltdb="/nfs/sw/apps/blast/ncbi-blast-2.6.0+/bin/makeblastdb"
11 Blastnpth="/nfs/sw/apps/blast/ncbi-blast-2.6.0+/bin/blastn"
12
13 target=sys.argv[1] #should be sra format
14 test_gene=sys.argv[2]
15 mapping_mode=sys.argv[3]
16 if sys.argv[4] not in ("1","2","3"):
17 additional_file=sys.argv[4]
18 file_mode=sys.argv[5]
19 else:
20 additional_file=""
21 file_mode=sys.argv[4]
22
23
24
25
26 def Copenhagen(sra_name,additional_file,mapping_mode,file_mode):
27 if file_mode=="1":#interleaved
28 if sra_name[-3:]=="sra":
29 del_fastq=1
30 for_fq=sra_name.replace(".sra","_1.fastq")
31 rev_fq=sra_name.replace(".sra","_2.fastq")
32 for_sai=sra_name.replace(".sra","_1.sai")
33 rev_sai=sra_name.replace(".sra","_2.sai")
34 sam=sra_name.replace(".sra",".sam")
35 bam=sra_name.replace(".sra",".bam")
36 else:
37 del_fastq=0
38 core_id=sra_name.split(".fastq")[0]
39 for_fq=core_id+"-read1.fastq"
40 rev_fq=core_id+"-read2.fastq"
41 for_sai=core_id+"_1.sai"
42 rev_sai=core_id+"_2.sai"
43 sam=core_id+".sam"
44 bam=core_id+".bam"
45 elif file_mode=="2":#seperated
46 forword_seq=sra_name
47 reverse_seq=additional_file
48 for_core_id=forword_seq.split(".fastq")[0]
49 re_core_id=reverse_seq.split(".fastq")[0]
50 for_fq=for_core_id+".fastq"
51 rev_fq=re_core_id+".fastq"
52 for_sai=for_core_id+".sai"
53 rev_sai=re_core_id+".sai"
54 sam=for_core_id+".sam"
55 bam=sam.replace(".sam",".bam")
56 elif file_mode=="3":#single-end
57 if sra_name[-3:]=="sra":
58 del_fastq=1
59 for_fq=sra_name.replace(".sra","_1.fastq")
60 rev_fq=sra_name.replace(".sra","_2.fastq")
61 for_sai=sra_name.replace(".sra","_1.sai")
62 rev_sai=sra_name.replace(".sra","_2.sai")
63 sam=sra_name.replace(".sra",".sam")
64 bam=sra_name.replace(".sra",".bam")
65 else:
66 del_fastq=0
67 core_id=sra_name.split(".fastq")[0]
68 for_fq=core_id+".fastq"
69 rev_fq=core_id+".fastq"
70 for_sai=core_id+"_1.sai"
71 rev_sai=core_id+"_2.sai"
72 sam=core_id+".sam"
73 bam=core_id+".bam"
74
75 database="complete_oafA.fasta"
76 os.system(BwaPath+" index database/"+database)###01/28/2015
77 if mapping_mode=="mem":
78 os.system(BwaPath+" mem database/"+database+" "+for_fq+" "+rev_fq+" > "+sam) #2014/12/23
79 elif mapping_mode=="sam":
80 os.system(BwaPath+" aln database/"+database+" "+for_fq+" > "+for_sai)
81 os.system(BwaPath+" aln database/"+database+" "+rev_fq+" > "+rev_sai)
82 os.system(BwaPath+" sampe database/"+database+" "+for_sai+" "+ rev_sai+" "+for_fq+" "+rev_fq+" > "+sam)
83 os.system(SamTlsPth+" view -F 4 -Sbh "+sam+" > "+bam)
84 os.system(SamTlsPth+" view -h -o "+sam+" "+bam)
85 os.system("cat "+sam+"|awk '{if ($5>0) {print $10}}'>"+sam+"_seq.txt")
86 os.system("cat "+sam+"|awk '{if ($5>0) {print $1}}'>"+sam+"_title.txt")
87 file1=open(sam+"_title.txt","r")
88 file2=open(sam+"_seq.txt","r")
89 file1=file1.readlines()
90 file2=file2.readlines()
91 file=open(sam+".fasta","w")
92 for i in range(len(file1)):
93 title=">"+file1[i]
94 seq=file2[i]
95 if len(seq)>40 and (len(title)>5 or ("@" not in title)):
96 file.write(title)
97 file.write(seq)
98 file.close()
99 database2="oafA_of_O4_O5.fasta"
100 os.system(Makebltdb+' -in database/'+database2+' -out '+database2+'_db '+'-dbtype nucl')
101 os.system(Blastnpth+" -query "+sam+".fasta"+" -db "+database2+"_db -out "+sam+"_vs_O45.xml -outfmt 5")
102 handle=open(sam+"_vs_O45.xml")
103 handle=NCBIXML.parse(handle)
104 handle=list(handle)
105 O9_bigger=0
106 O2_bigger=0
107 for x in handle:
108 O9_score=0
109 O2_score=0
110 try:
111 if 'O-4_full' in x.alignments[0].hit_def:
112 O9_score=x.alignments[0].hsps[0].bits
113 O2_score=x.alignments[1].hsps[0].bits
114 elif 'O-4_5-' in x.alignments[0].hit_def:
115 O9_score=x.alignments[1].hsps[0].bits
116 O2_score=x.alignments[0].hsps[0].bits
117 if O9_score>O2_score:
118 O9_bigger+=1
119 if O9_score<O2_score:
120 O2_bigger+=1
121 except:
122 continue
123 print "$$$Genome:",sra_name
124 if O9_bigger>O2_bigger:
125 print "$$$Typhimurium"
126 elif O9_bigger<O2_bigger:
127 print "$$$Typhimurium_O5-"
128 else:
129 print "$$$Typhimurium, even no 7 bases difference"
130 print "O-4 number is:",O9_bigger
131 print "O-4_5- number is:",O2_bigger
132 os.system("rm "+sam+"_title.txt")###01/28/2015
133 os.system("rm "+sam+"_seq.txt")###01/28/2015
134 os.system("rm "+sam+".fasta")###01/28/2015
135 os.system("rm "+database2+"_db.*")###01/28/2015
136 os.system("rm "+sam+"_vs_O45.xml")###01/28/2015
137
138 if test_gene=="oafA":
139 Copenhagen(target,additional_file,mapping_mode,file_mode)