changeset 3:a98ed567ea48 draft

planemo upload
author estrain
date Sat, 02 Dec 2017 21:23:44 -0500
parents e372e42148de
children 2b6c54603b6e
files scoreProfiles.py srst2v2.xml
diffstat 2 files changed, 44 insertions(+), 14 deletions(-) [+]
line wrap: on
line diff
--- a/scoreProfiles.py	Sat Dec 02 13:52:16 2017 -0500
+++ b/scoreProfiles.py	Sat Dec 02 21:23:44 2017 -0500
@@ -1,18 +1,38 @@
 #!/usr/bin/env
 
+## Generate basic summary stats for SRST2 (v2) allele score output. Generate summaries for each profile defined in the database
+## author: errol strain, estrain@gmail.com
+
+from argparse import (ArgumentParser, FileType)
 import sys
 import glob 
 from decimal import Decimal
 
+def parse_args():
+  "Parse the input arguments, use '-h' for help."
+
+  parser = ArgumentParser(description='Generate Summary Scores for SRST2 Allele Score Output')
+
+  # Read inputs
+  parser.add_argument('--mlst_definitions', type=str, required=True, nargs=1, help='MLST Definitions')
+  parser.add_argument('--output', type=str, required=True, nargs=1, help='MLST Definitions')
+  parser.add_argument('--profile_cov', type=str, required=False, help='Minimum Average Coverage to Report ST Profile',default="98")
+  parser.add_argument('--profile_max_mismatch', type=str, required=False, help='Maximum Number of Mismatches (SNPs)', default="1")
+
+  return parser.parse_args()
+
+args =parse_args()
+
 allHash = {}
 
 # Read in Profile Database
-profiles = open(sys.argv[1],"r")
+profiles = open(args.mlst_definitions[0],"r")
+output = open(args.output[0],"w")
 
 # Minimum mean percent coverage for reporting profile
-min_per=float(sys.argv[2])
+min_per=float(args.profile_cov)
 # Maximum mean mismatch for reporting profile
-max_mis=float(sys.argv[3])
+max_mis=float(args.profile_max_mismatch)
 
 # Read in Allele Scores
 # Scores should be in srts2*.scores file
@@ -28,8 +48,8 @@
 
 # Allele names in first row
 als = profiles.readline().rstrip()
-filehead = als + str("\tMean_Score\tMean_Depth\tMean_%_Coverage\tTotal_Mismatches\tTotal_Indels")
-print(filehead)
+filehead = als + str("\tMean_Score\tMean_Depth\tMean_%_Coverage\tTotal_Mismatches\tTotal_Indels\n")
+output.write(filehead)
 
 genes = als.split("\t")
 
@@ -58,5 +78,5 @@
   mcover=round(Decimal(mcover/(len(genes)-1)),2)
 
   if mmisma<=max_mis and mcover>=min_per :
-    print(line + "\t" + str(mscore) + "\t" + str(mdepth) + "\t" + str(mcover) + "\t" + str(mmisma) + "\t" + str(mindel)) 
+    output.write(line + "\t" + str(mscore) + "\t" + str(mdepth) + "\t" + str(mcover) + "\t" + str(mmisma) + "\t" + str(mindel) + "\n") 
 
--- a/srst2v2.xml	Sat Dec 02 13:52:16 2017 -0500
+++ b/srst2v2.xml	Sat Dec 02 21:23:44 2017 -0500
@@ -63,9 +63,9 @@
       #else
           --other "'-p \${GALAXY_SLOTS:-1}'"
       #end if
-      ;
+
       #if $job_type.job == "mlst" 
-        python $__tool_directory__/scoreProfiles.py $job_type.mlst_definitions $job_type.profile_cov $job_type.profile_max_mismatch > srst2.pscores
+        ; python $__tool_directory__/scoreProfiles.py --mlst_definitions $job_type.mlst_definitions --profile_cov $job_type.profile_cov --profile_max_mismatch $job_type.profile_max_mismatch --output srst2.pscores
       #end if
 
     ]]></command>
@@ -93,9 +93,9 @@
           <param type="data" name="mlst_db" label="Fasta file of MLST alleles" format="fasta" />
           <param type="data" name="mlst_definitions" label="ST definitions for MLST scheme" format="tabular" />
           <param type="text" name="mlstdelim" value="_" format="txt" label="Character(s) separating gene name from allele number in MLST database (default &apos;_&apos;)" />
-          <param type="integer" name="mlst_max_mismatch" value="10" format="txt" label="Maximum number of mismatches per read (default 10)" />
-          <param type="float" name="profile_max_mismatch" value="1" format="txt" label="Mean mismatches for reporting profile" />
-          <param type="float" name="profile_cov" value="98" format="txt" label="Mean % Coverage for reporting profile)" />
+          <param type="integer" name="mlst_max_mismatch" value="10" format="txt" label="Maximum number of mismatches per read" />
+          <param type="float" name="profile_max_mismatch" value="1" format="txt" label="Maximum number of mismatches for reporting ST profile" />
+          <param type="float" name="profile_cov" value="98" format="txt" label="Minimum mean % coverage for reporting ST profile" />
    
         </when>
         <when value="gene">
@@ -131,9 +131,19 @@
     </inputs>
 
     <outputs>
-      <data format="tabular" type="text" label="Allele Scores" name="scores" from_work_dir="*.scores"/>
-      <data format="tabular" type="text" label="Profile Scores" name="pscores" from_work_dir="*.pscores"/>
-      <data format="tabular" type="text" label="Predicted Alleles" name="alleles" from_work_dir="*results.txt"/>
+      <data format="tabular" label="SRST2 Results" name="results" from_work_dir="*.txt"/>
+      <data format="tabular" label="SRST2 Allele Scores" name="scores" from_work_dir="*.scores">
+        <filter>job_type['job'] == "mlst"</filter>
+      </data>  
+      <data format="tabular" label="SRST2 Profile Scores" name="pscores" from_work_dir="*.pscores">
+        <filter> job_type['job'] == "mlst"</filter>
+      </data>  
+      <data format="tabular" label="SRST2 Predicted Alleles" name="alleles" from_work_dir="*results.txt">
+        <filter> job_type['job'] == "mlst"</filter>
+      </data>  
+      <data format="tabular" label="SRST2 Gene Scores" name="gscores" from_work_dir="*.scores">
+        <filter>job_type['job'] == "gene"</filter>
+      </data>  
     </outputs>
 
     <help><![CDATA[