1
|
1 #!/usr/bin/env
|
|
2
|
|
3 import sys
|
|
4 import glob
|
|
5 from decimal import Decimal
|
|
6
|
|
7 allHash = {}
|
|
8
|
|
9 # Read in Profile Database
|
|
10 profiles = open(sys.argv[1],"r")
|
|
11
|
|
12 # Minimum mean percent coverage for reporting profile
|
|
13 min_per=float(sys.argv[2])
|
|
14 # Maximum mean mismatch for reporting profile
|
|
15 max_mis=float(sys.argv[3])
|
|
16
|
|
17 # Read in Allele Scores
|
|
18 # Scores should be in srts2*.scores file
|
|
19 # Column 0:Allele, 1:Score, 2:Avg Depth, 5:% Coverage, 7:Mismatches, 8:Indels
|
|
20 scoreFile = open(glob.glob("srst2*.scores")[0],"r")
|
|
21 scoreFile.readline()
|
|
22
|
|
23 for line in scoreFile.readlines() :
|
|
24 els = line.split("\t")
|
|
25 vals = els[0].split("_")
|
|
26 allHash.update({els[0]:line})
|
|
27
|
|
28
|
|
29 # Allele names in first row
|
|
30 als = profiles.readline().rstrip()
|
|
31 filehead = als + str("\tMean_Score\tMean_Depth\tMean_%_Coverage\tTotal_Mismatches\tTotal_Indels")
|
|
32 print(filehead)
|
|
33
|
|
34 genes = als.split("\t")
|
|
35
|
|
36 for line in profiles.readlines() :
|
|
37 line = line.rstrip()
|
|
38 els = line.split("\t")
|
|
39 alleles = []
|
|
40 for i in range(1,len(genes)) :
|
|
41 alleles.append(genes[i] + "_" + els[i])
|
|
42 mscore=0
|
|
43 mdepth=0
|
|
44 mcover=0
|
|
45 mmisma=0
|
|
46 mindel=0
|
|
47 for i in alleles :
|
|
48 if i in allHash :
|
|
49 vals=str(allHash[i]).split("\t")
|
|
50 mscore+=float(vals[1])
|
|
51 mdepth+=float(vals[2])
|
|
52 mcover+=float(vals[5])
|
|
53 mmisma+=int(vals[7])
|
|
54 mindel+=int(vals[8])
|
|
55
|
|
56 mscore=round(Decimal(mscore/(len(genes)-1)),5)
|
|
57 mdepth=round(Decimal(mdepth/(len(genes)-1)),2)
|
|
58 mcover=round(Decimal(mcover/(len(genes)-1)),2)
|
|
59
|
|
60 if mmisma<=max_mis and mcover>=min_per :
|
|
61 print(line + "\t" + str(mscore) + "\t" + str(mdepth) + "\t" + str(mcover) + "\t" + str(mmisma) + "\t" + str(mindel))
|
|
62
|