annotate scoreProfiles.py @ 1:16c9fccf550d draft

planemo upload
author estrain
date Sat, 02 Dec 2017 13:42:43 -0500
parents
children a98ed567ea48
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
16c9fccf550d planemo upload
estrain
parents:
diff changeset
1 #!/usr/bin/env
16c9fccf550d planemo upload
estrain
parents:
diff changeset
2
16c9fccf550d planemo upload
estrain
parents:
diff changeset
3 import sys
16c9fccf550d planemo upload
estrain
parents:
diff changeset
4 import glob
16c9fccf550d planemo upload
estrain
parents:
diff changeset
5 from decimal import Decimal
16c9fccf550d planemo upload
estrain
parents:
diff changeset
6
16c9fccf550d planemo upload
estrain
parents:
diff changeset
7 allHash = {}
16c9fccf550d planemo upload
estrain
parents:
diff changeset
8
16c9fccf550d planemo upload
estrain
parents:
diff changeset
9 # Read in Profile Database
16c9fccf550d planemo upload
estrain
parents:
diff changeset
10 profiles = open(sys.argv[1],"r")
16c9fccf550d planemo upload
estrain
parents:
diff changeset
11
16c9fccf550d planemo upload
estrain
parents:
diff changeset
12 # Minimum mean percent coverage for reporting profile
16c9fccf550d planemo upload
estrain
parents:
diff changeset
13 min_per=float(sys.argv[2])
16c9fccf550d planemo upload
estrain
parents:
diff changeset
14 # Maximum mean mismatch for reporting profile
16c9fccf550d planemo upload
estrain
parents:
diff changeset
15 max_mis=float(sys.argv[3])
16c9fccf550d planemo upload
estrain
parents:
diff changeset
16
16c9fccf550d planemo upload
estrain
parents:
diff changeset
17 # Read in Allele Scores
16c9fccf550d planemo upload
estrain
parents:
diff changeset
18 # Scores should be in srts2*.scores file
16c9fccf550d planemo upload
estrain
parents:
diff changeset
19 # Column 0:Allele, 1:Score, 2:Avg Depth, 5:% Coverage, 7:Mismatches, 8:Indels
16c9fccf550d planemo upload
estrain
parents:
diff changeset
20 scoreFile = open(glob.glob("srst2*.scores")[0],"r")
16c9fccf550d planemo upload
estrain
parents:
diff changeset
21 scoreFile.readline()
16c9fccf550d planemo upload
estrain
parents:
diff changeset
22
16c9fccf550d planemo upload
estrain
parents:
diff changeset
23 for line in scoreFile.readlines() :
16c9fccf550d planemo upload
estrain
parents:
diff changeset
24 els = line.split("\t")
16c9fccf550d planemo upload
estrain
parents:
diff changeset
25 vals = els[0].split("_")
16c9fccf550d planemo upload
estrain
parents:
diff changeset
26 allHash.update({els[0]:line})
16c9fccf550d planemo upload
estrain
parents:
diff changeset
27
16c9fccf550d planemo upload
estrain
parents:
diff changeset
28
16c9fccf550d planemo upload
estrain
parents:
diff changeset
29 # Allele names in first row
16c9fccf550d planemo upload
estrain
parents:
diff changeset
30 als = profiles.readline().rstrip()
16c9fccf550d planemo upload
estrain
parents:
diff changeset
31 filehead = als + str("\tMean_Score\tMean_Depth\tMean_%_Coverage\tTotal_Mismatches\tTotal_Indels")
16c9fccf550d planemo upload
estrain
parents:
diff changeset
32 print(filehead)
16c9fccf550d planemo upload
estrain
parents:
diff changeset
33
16c9fccf550d planemo upload
estrain
parents:
diff changeset
34 genes = als.split("\t")
16c9fccf550d planemo upload
estrain
parents:
diff changeset
35
16c9fccf550d planemo upload
estrain
parents:
diff changeset
36 for line in profiles.readlines() :
16c9fccf550d planemo upload
estrain
parents:
diff changeset
37 line = line.rstrip()
16c9fccf550d planemo upload
estrain
parents:
diff changeset
38 els = line.split("\t")
16c9fccf550d planemo upload
estrain
parents:
diff changeset
39 alleles = []
16c9fccf550d planemo upload
estrain
parents:
diff changeset
40 for i in range(1,len(genes)) :
16c9fccf550d planemo upload
estrain
parents:
diff changeset
41 alleles.append(genes[i] + "_" + els[i])
16c9fccf550d planemo upload
estrain
parents:
diff changeset
42 mscore=0
16c9fccf550d planemo upload
estrain
parents:
diff changeset
43 mdepth=0
16c9fccf550d planemo upload
estrain
parents:
diff changeset
44 mcover=0
16c9fccf550d planemo upload
estrain
parents:
diff changeset
45 mmisma=0
16c9fccf550d planemo upload
estrain
parents:
diff changeset
46 mindel=0
16c9fccf550d planemo upload
estrain
parents:
diff changeset
47 for i in alleles :
16c9fccf550d planemo upload
estrain
parents:
diff changeset
48 if i in allHash :
16c9fccf550d planemo upload
estrain
parents:
diff changeset
49 vals=str(allHash[i]).split("\t")
16c9fccf550d planemo upload
estrain
parents:
diff changeset
50 mscore+=float(vals[1])
16c9fccf550d planemo upload
estrain
parents:
diff changeset
51 mdepth+=float(vals[2])
16c9fccf550d planemo upload
estrain
parents:
diff changeset
52 mcover+=float(vals[5])
16c9fccf550d planemo upload
estrain
parents:
diff changeset
53 mmisma+=int(vals[7])
16c9fccf550d planemo upload
estrain
parents:
diff changeset
54 mindel+=int(vals[8])
16c9fccf550d planemo upload
estrain
parents:
diff changeset
55
16c9fccf550d planemo upload
estrain
parents:
diff changeset
56 mscore=round(Decimal(mscore/(len(genes)-1)),5)
16c9fccf550d planemo upload
estrain
parents:
diff changeset
57 mdepth=round(Decimal(mdepth/(len(genes)-1)),2)
16c9fccf550d planemo upload
estrain
parents:
diff changeset
58 mcover=round(Decimal(mcover/(len(genes)-1)),2)
16c9fccf550d planemo upload
estrain
parents:
diff changeset
59
16c9fccf550d planemo upload
estrain
parents:
diff changeset
60 if mmisma<=max_mis and mcover>=min_per :
16c9fccf550d planemo upload
estrain
parents:
diff changeset
61 print(line + "\t" + str(mscore) + "\t" + str(mdepth) + "\t" + str(mcover) + "\t" + str(mmisma) + "\t" + str(mindel))
16c9fccf550d planemo upload
estrain
parents:
diff changeset
62