Mercurial > repos > charles_s_test > seqsero2
comparison libs/deletion_compare.py @ 0:6895de35a263 draft
planemo upload commit 844a891e4eaf732830043204ac636907eefb011d-dirty
| author | charles_s_test |
|---|---|
| date | Thu, 19 Oct 2017 18:16:51 -0400 |
| parents | |
| children | 0d65b71ff8df |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:6895de35a263 |
|---|---|
| 1 | |
| 2 import os | |
| 3 from Bio import SeqIO | |
| 4 import sys | |
| 5 from Initial_functions import Uniq | |
| 6 from Bio.Blast import NCBIXML | |
| 7 | |
| 8 BwaPath="/nfs/sw/apps/bwa/bwa-0.7.15/bwa" | |
| 9 SamTlsPth="/nfs/sw/apps/samtools/samtools-1.3.1/bin/samtools" | |
| 10 Makebltdb="/nfs/sw/apps/blast/ncbi-blast-2.6.0+/bin/makeblastdb" | |
| 11 Blastnpth="/nfs/sw/apps/blast/ncbi-blast-2.6.0+/bin/blastn" | |
| 12 | |
| 13 target=sys.argv[1] #should be sra format | |
| 14 test_gene=sys.argv[2] | |
| 15 mapping_mode=sys.argv[3] | |
| 16 if sys.argv[4] not in ("1","2","3"): | |
| 17 additional_file=sys.argv[4] | |
| 18 file_mode=sys.argv[5] | |
| 19 else: | |
| 20 additional_file="" | |
| 21 file_mode=sys.argv[4] | |
| 22 | |
| 23 | |
| 24 | |
| 25 | |
| 26 def Copenhagen(sra_name,additional_file,mapping_mode,file_mode): | |
| 27 if file_mode=="1":#interleaved | |
| 28 if sra_name[-3:]=="sra": | |
| 29 del_fastq=1 | |
| 30 for_fq=sra_name.replace(".sra","_1.fastq") | |
| 31 rev_fq=sra_name.replace(".sra","_2.fastq") | |
| 32 for_sai=sra_name.replace(".sra","_1.sai") | |
| 33 rev_sai=sra_name.replace(".sra","_2.sai") | |
| 34 sam=sra_name.replace(".sra",".sam") | |
| 35 bam=sra_name.replace(".sra",".bam") | |
| 36 else: | |
| 37 del_fastq=0 | |
| 38 core_id=sra_name.split(".fastq")[0] | |
| 39 for_fq=core_id+"-read1.fastq" | |
| 40 rev_fq=core_id+"-read2.fastq" | |
| 41 for_sai=core_id+"_1.sai" | |
| 42 rev_sai=core_id+"_2.sai" | |
| 43 sam=core_id+".sam" | |
| 44 bam=core_id+".bam" | |
| 45 elif file_mode=="2":#seperated | |
| 46 forword_seq=sra_name | |
| 47 reverse_seq=additional_file | |
| 48 for_core_id=forword_seq.split(".fastq")[0] | |
| 49 re_core_id=reverse_seq.split(".fastq")[0] | |
| 50 for_fq=for_core_id+".fastq" | |
| 51 rev_fq=re_core_id+".fastq" | |
| 52 for_sai=for_core_id+".sai" | |
| 53 rev_sai=re_core_id+".sai" | |
| 54 sam=for_core_id+".sam" | |
| 55 bam=sam.replace(".sam",".bam") | |
| 56 elif file_mode=="3":#single-end | |
| 57 if sra_name[-3:]=="sra": | |
| 58 del_fastq=1 | |
| 59 for_fq=sra_name.replace(".sra","_1.fastq") | |
| 60 rev_fq=sra_name.replace(".sra","_2.fastq") | |
| 61 for_sai=sra_name.replace(".sra","_1.sai") | |
| 62 rev_sai=sra_name.replace(".sra","_2.sai") | |
| 63 sam=sra_name.replace(".sra",".sam") | |
| 64 bam=sra_name.replace(".sra",".bam") | |
| 65 else: | |
| 66 del_fastq=0 | |
| 67 core_id=sra_name.split(".fastq")[0] | |
| 68 for_fq=core_id+".fastq" | |
| 69 rev_fq=core_id+".fastq" | |
| 70 for_sai=core_id+"_1.sai" | |
| 71 rev_sai=core_id+"_2.sai" | |
| 72 sam=core_id+".sam" | |
| 73 bam=core_id+".bam" | |
| 74 | |
| 75 database="complete_oafA.fasta" | |
| 76 os.system(BwaPath+" index database/"+database)###01/28/2015 | |
| 77 if mapping_mode=="mem": | |
| 78 os.system(BwaPath+" mem database/"+database+" "+for_fq+" "+rev_fq+" > "+sam) #2014/12/23 | |
| 79 elif mapping_mode=="sam": | |
| 80 os.system(BwaPath+" aln database/"+database+" "+for_fq+" > "+for_sai) | |
| 81 os.system(BwaPath+" aln database/"+database+" "+rev_fq+" > "+rev_sai) | |
| 82 os.system(BwaPath+" sampe database/"+database+" "+for_sai+" "+ rev_sai+" "+for_fq+" "+rev_fq+" > "+sam) | |
| 83 os.system(SamTlsPth+" view -F 4 -Sbh "+sam+" > "+bam) | |
| 84 os.system(SamTlsPth+" view -h -o "+sam+" "+bam) | |
| 85 os.system("cat "+sam+"|awk '{if ($5>0) {print $10}}'>"+sam+"_seq.txt") | |
| 86 os.system("cat "+sam+"|awk '{if ($5>0) {print $1}}'>"+sam+"_title.txt") | |
| 87 file1=open(sam+"_title.txt","r") | |
| 88 file2=open(sam+"_seq.txt","r") | |
| 89 file1=file1.readlines() | |
| 90 file2=file2.readlines() | |
| 91 file=open(sam+".fasta","w") | |
| 92 for i in range(len(file1)): | |
| 93 title=">"+file1[i] | |
| 94 seq=file2[i] | |
| 95 if len(seq)>40 and (len(title)>5 or ("@" not in title)): | |
| 96 file.write(title) | |
| 97 file.write(seq) | |
| 98 file.close() | |
| 99 database2="oafA_of_O4_O5.fasta" | |
| 100 os.system(Makebltdb+' -in database/'+database2+' -out '+database2+'_db '+'-dbtype nucl') | |
| 101 os.system(Blastnpth+" -query "+sam+".fasta"+" -db "+database2+"_db -out "+sam+"_vs_O45.xml -outfmt 5") | |
| 102 handle=open(sam+"_vs_O45.xml") | |
| 103 handle=NCBIXML.parse(handle) | |
| 104 handle=list(handle) | |
| 105 O9_bigger=0 | |
| 106 O2_bigger=0 | |
| 107 for x in handle: | |
| 108 O9_score=0 | |
| 109 O2_score=0 | |
| 110 try: | |
| 111 if 'O-4_full' in x.alignments[0].hit_def: | |
| 112 O9_score=x.alignments[0].hsps[0].bits | |
| 113 O2_score=x.alignments[1].hsps[0].bits | |
| 114 elif 'O-4_5-' in x.alignments[0].hit_def: | |
| 115 O9_score=x.alignments[1].hsps[0].bits | |
| 116 O2_score=x.alignments[0].hsps[0].bits | |
| 117 if O9_score>O2_score: | |
| 118 O9_bigger+=1 | |
| 119 if O9_score<O2_score: | |
| 120 O2_bigger+=1 | |
| 121 except: | |
| 122 continue | |
| 123 print "$$$Genome:",sra_name | |
| 124 if O9_bigger>O2_bigger: | |
| 125 print "$$$Typhimurium" | |
| 126 elif O9_bigger<O2_bigger: | |
| 127 print "$$$Typhimurium_O5-" | |
| 128 else: | |
| 129 print "$$$Typhimurium, even no 7 bases difference" | |
| 130 print "O-4 number is:",O9_bigger | |
| 131 print "O-4_5- number is:",O2_bigger | |
| 132 os.system("rm "+sam+"_title.txt")###01/28/2015 | |
| 133 os.system("rm "+sam+"_seq.txt")###01/28/2015 | |
| 134 os.system("rm "+sam+".fasta")###01/28/2015 | |
| 135 os.system("rm "+database2+"_db.*")###01/28/2015 | |
| 136 os.system("rm "+sam+"_vs_O45.xml")###01/28/2015 | |
| 137 | |
| 138 if test_gene=="oafA": | |
| 139 Copenhagen(target,additional_file,mapping_mode,file_mode) |
