Mercurial > repos > estrain > srst2v2

--- a/scoreProfiles.py	Sat Dec 02 13:52:16 2017 -0500
+++ b/scoreProfiles.py	Sat Dec 02 21:23:44 2017 -0500
@@ -1,18 +1,38 @@
 #!/usr/bin/env

+## Generate basic summary stats for SRST2 (v2) allele score output. Generate summaries for each profile defined in the database
+## author: errol strain, estrain@gmail.com
+
+from argparse import (ArgumentParser, FileType)
 import sys
 import glob
 from decimal import Decimal

+def parse_args():
+  "Parse the input arguments, use '-h' for help."
+
+  parser = ArgumentParser(description='Generate Summary Scores for SRST2 Allele Score Output')
+
+  # Read inputs
+  parser.add_argument('--mlst_definitions', type=str, required=True, nargs=1, help='MLST Definitions')
+  parser.add_argument('--output', type=str, required=True, nargs=1, help='MLST Definitions')
+  parser.add_argument('--profile_cov', type=str, required=False, help='Minimum Average Coverage to Report ST Profile',default="98")
+  parser.add_argument('--profile_max_mismatch', type=str, required=False, help='Maximum Number of Mismatches (SNPs)', default="1")
+
+  return parser.parse_args()
+
+args =parse_args()
+
 allHash = {}

 # Read in Profile Database
-profiles = open(sys.argv[1],"r")
+profiles = open(args.mlst_definitions[0],"r")
+output = open(args.output[0],"w")

 # Minimum mean percent coverage for reporting profile
-min_per=float(sys.argv[2])
+min_per=float(args.profile_cov)
 # Maximum mean mismatch for reporting profile
-max_mis=float(sys.argv[3])
+max_mis=float(args.profile_max_mismatch)

 # Read in Allele Scores
 # Scores should be in srts2*.scores file
@@ -28,8 +48,8 @@

 # Allele names in first row
 als = profiles.readline().rstrip()
-filehead = als + str("\tMean_Score\tMean_Depth\tMean_%_Coverage\tTotal_Mismatches\tTotal_Indels")
-print(filehead)
+filehead = als + str("\tMean_Score\tMean_Depth\tMean_%_Coverage\tTotal_Mismatches\tTotal_Indels\n")
+output.write(filehead)

 genes = als.split("\t")

@@ -58,5 +78,5 @@
   mcover=round(Decimal(mcover/(len(genes)-1)),2)

   if mmisma<=max_mis and mcover>=min_per :
-    print(line + "\t" + str(mscore) + "\t" + str(mdepth) + "\t" + str(mcover) + "\t" + str(mmisma) + "\t" + str(mindel))
+    output.write(line + "\t" + str(mscore) + "\t" + str(mdepth) + "\t" + str(mcover) + "\t" + str(mmisma) + "\t" + str(mindel) + "\n")
--- a/srst2v2.xml	Sat Dec 02 13:52:16 2017 -0500
+++ b/srst2v2.xml	Sat Dec 02 21:23:44 2017 -0500
@@ -63,9 +63,9 @@
       #else
           --other "'-p \${GALAXY_SLOTS:-1}'"
       #end if
-      ;
+
       #if $job_type.job == "mlst"
-        python $__tool_directory__/scoreProfiles.py $job_type.mlst_definitions $job_type.profile_cov $job_type.profile_max_mismatch > srst2.pscores
+        ; python $__tool_directory__/scoreProfiles.py --mlst_definitions $job_type.mlst_definitions --profile_cov $job_type.profile_cov --profile_max_mismatch $job_type.profile_max_mismatch --output srst2.pscores
       #end if

     ]]></command>
@@ -93,9 +93,9 @@
           <param type="data" name="mlst_db" label="Fasta file of MLST alleles" format="fasta" />
           <param type="data" name="mlst_definitions" label="ST definitions for MLST scheme" format="tabular" />
           <param type="text" name="mlstdelim" value="_" format="txt" label="Character(s) separating gene name from allele number in MLST database (default &apos;_&apos;)" />
-          <param type="integer" name="mlst_max_mismatch" value="10" format="txt" label="Maximum number of mismatches per read (default 10)" />
-          <param type="float" name="profile_max_mismatch" value="1" format="txt" label="Mean mismatches for reporting profile" />
-          <param type="float" name="profile_cov" value="98" format="txt" label="Mean % Coverage for reporting profile)" />
+          <param type="integer" name="mlst_max_mismatch" value="10" format="txt" label="Maximum number of mismatches per read" />
+          <param type="float" name="profile_max_mismatch" value="1" format="txt" label="Maximum number of mismatches for reporting ST profile" />
+          <param type="float" name="profile_cov" value="98" format="txt" label="Minimum mean % coverage for reporting ST profile" />

         </when>
         <when value="gene">
@@ -131,9 +131,19 @@
     </inputs>

     <outputs>
-      <data format="tabular" type="text" label="Allele Scores" name="scores" from_work_dir="*.scores"/>
-      <data format="tabular" type="text" label="Profile Scores" name="pscores" from_work_dir="*.pscores"/>
-      <data format="tabular" type="text" label="Predicted Alleles" name="alleles" from_work_dir="*results.txt"/>
+      <data format="tabular" label="SRST2 Results" name="results" from_work_dir="*.txt"/>
+      <data format="tabular" label="SRST2 Allele Scores" name="scores" from_work_dir="*.scores">
+        <filter>job_type['job'] == "mlst"</filter>
+      </data>
+      <data format="tabular" label="SRST2 Profile Scores" name="pscores" from_work_dir="*.pscores">
+        <filter> job_type['job'] == "mlst"</filter>
+      </data>
+      <data format="tabular" label="SRST2 Predicted Alleles" name="alleles" from_work_dir="*results.txt">
+        <filter> job_type['job'] == "mlst"</filter>
+      </data>
+      <data format="tabular" label="SRST2 Gene Scores" name="gscores" from_work_dir="*.scores">
+        <filter>job_type['job'] == "gene"</filter>
+      </data>
     </outputs>

     <help><![CDATA[