diff scoreProfiles.py @ 3:a98ed567ea48 draft

planemo upload
author estrain
date Sat, 02 Dec 2017 21:23:44 -0500
parents 16c9fccf550d
children
line wrap: on
line diff
--- a/scoreProfiles.py	Sat Dec 02 13:52:16 2017 -0500
+++ b/scoreProfiles.py	Sat Dec 02 21:23:44 2017 -0500
@@ -1,18 +1,38 @@
 #!/usr/bin/env
 
+## Generate basic summary stats for SRST2 (v2) allele score output. Generate summaries for each profile defined in the database
+## author: errol strain, estrain@gmail.com
+
+from argparse import (ArgumentParser, FileType)
 import sys
 import glob 
 from decimal import Decimal
 
+def parse_args():
+  "Parse the input arguments, use '-h' for help."
+
+  parser = ArgumentParser(description='Generate Summary Scores for SRST2 Allele Score Output')
+
+  # Read inputs
+  parser.add_argument('--mlst_definitions', type=str, required=True, nargs=1, help='MLST Definitions')
+  parser.add_argument('--output', type=str, required=True, nargs=1, help='MLST Definitions')
+  parser.add_argument('--profile_cov', type=str, required=False, help='Minimum Average Coverage to Report ST Profile',default="98")
+  parser.add_argument('--profile_max_mismatch', type=str, required=False, help='Maximum Number of Mismatches (SNPs)', default="1")
+
+  return parser.parse_args()
+
+args =parse_args()
+
 allHash = {}
 
 # Read in Profile Database
-profiles = open(sys.argv[1],"r")
+profiles = open(args.mlst_definitions[0],"r")
+output = open(args.output[0],"w")
 
 # Minimum mean percent coverage for reporting profile
-min_per=float(sys.argv[2])
+min_per=float(args.profile_cov)
 # Maximum mean mismatch for reporting profile
-max_mis=float(sys.argv[3])
+max_mis=float(args.profile_max_mismatch)
 
 # Read in Allele Scores
 # Scores should be in srts2*.scores file
@@ -28,8 +48,8 @@
 
 # Allele names in first row
 als = profiles.readline().rstrip()
-filehead = als + str("\tMean_Score\tMean_Depth\tMean_%_Coverage\tTotal_Mismatches\tTotal_Indels")
-print(filehead)
+filehead = als + str("\tMean_Score\tMean_Depth\tMean_%_Coverage\tTotal_Mismatches\tTotal_Indels\n")
+output.write(filehead)
 
 genes = als.split("\t")
 
@@ -58,5 +78,5 @@
   mcover=round(Decimal(mcover/(len(genes)-1)),2)
 
   if mmisma<=max_mis and mcover>=min_per :
-    print(line + "\t" + str(mscore) + "\t" + str(mdepth) + "\t" + str(mcover) + "\t" + str(mmisma) + "\t" + str(mindel)) 
+    output.write(line + "\t" + str(mscore) + "\t" + str(mdepth) + "\t" + str(mcover) + "\t" + str(mmisma) + "\t" + str(mindel) + "\n")