Mercurial > repos > estrain > srst2v2
changeset 3:a98ed567ea48 draft
planemo upload
author | estrain |
---|---|
date | Sat, 02 Dec 2017 21:23:44 -0500 |
parents | e372e42148de |
children | 2b6c54603b6e |
files | scoreProfiles.py srst2v2.xml |
diffstat | 2 files changed, 44 insertions(+), 14 deletions(-) [+] |
line wrap: on
line diff
--- a/scoreProfiles.py Sat Dec 02 13:52:16 2017 -0500 +++ b/scoreProfiles.py Sat Dec 02 21:23:44 2017 -0500 @@ -1,18 +1,38 @@ #!/usr/bin/env +## Generate basic summary stats for SRST2 (v2) allele score output. Generate summaries for each profile defined in the database +## author: errol strain, estrain@gmail.com + +from argparse import (ArgumentParser, FileType) import sys import glob from decimal import Decimal +def parse_args(): + "Parse the input arguments, use '-h' for help." + + parser = ArgumentParser(description='Generate Summary Scores for SRST2 Allele Score Output') + + # Read inputs + parser.add_argument('--mlst_definitions', type=str, required=True, nargs=1, help='MLST Definitions') + parser.add_argument('--output', type=str, required=True, nargs=1, help='MLST Definitions') + parser.add_argument('--profile_cov', type=str, required=False, help='Minimum Average Coverage to Report ST Profile',default="98") + parser.add_argument('--profile_max_mismatch', type=str, required=False, help='Maximum Number of Mismatches (SNPs)', default="1") + + return parser.parse_args() + +args =parse_args() + allHash = {} # Read in Profile Database -profiles = open(sys.argv[1],"r") +profiles = open(args.mlst_definitions[0],"r") +output = open(args.output[0],"w") # Minimum mean percent coverage for reporting profile -min_per=float(sys.argv[2]) +min_per=float(args.profile_cov) # Maximum mean mismatch for reporting profile -max_mis=float(sys.argv[3]) +max_mis=float(args.profile_max_mismatch) # Read in Allele Scores # Scores should be in srts2*.scores file @@ -28,8 +48,8 @@ # Allele names in first row als = profiles.readline().rstrip() -filehead = als + str("\tMean_Score\tMean_Depth\tMean_%_Coverage\tTotal_Mismatches\tTotal_Indels") -print(filehead) +filehead = als + str("\tMean_Score\tMean_Depth\tMean_%_Coverage\tTotal_Mismatches\tTotal_Indels\n") +output.write(filehead) genes = als.split("\t") @@ -58,5 +78,5 @@ mcover=round(Decimal(mcover/(len(genes)-1)),2) if mmisma<=max_mis and mcover>=min_per : - print(line + "\t" + str(mscore) + "\t" + str(mdepth) + "\t" + str(mcover) + "\t" + str(mmisma) + "\t" + str(mindel)) + output.write(line + "\t" + str(mscore) + "\t" + str(mdepth) + "\t" + str(mcover) + "\t" + str(mmisma) + "\t" + str(mindel) + "\n")
--- a/srst2v2.xml Sat Dec 02 13:52:16 2017 -0500 +++ b/srst2v2.xml Sat Dec 02 21:23:44 2017 -0500 @@ -63,9 +63,9 @@ #else --other "'-p \${GALAXY_SLOTS:-1}'" #end if - ; + #if $job_type.job == "mlst" - python $__tool_directory__/scoreProfiles.py $job_type.mlst_definitions $job_type.profile_cov $job_type.profile_max_mismatch > srst2.pscores + ; python $__tool_directory__/scoreProfiles.py --mlst_definitions $job_type.mlst_definitions --profile_cov $job_type.profile_cov --profile_max_mismatch $job_type.profile_max_mismatch --output srst2.pscores #end if ]]></command> @@ -93,9 +93,9 @@ <param type="data" name="mlst_db" label="Fasta file of MLST alleles" format="fasta" /> <param type="data" name="mlst_definitions" label="ST definitions for MLST scheme" format="tabular" /> <param type="text" name="mlstdelim" value="_" format="txt" label="Character(s) separating gene name from allele number in MLST database (default '_')" /> - <param type="integer" name="mlst_max_mismatch" value="10" format="txt" label="Maximum number of mismatches per read (default 10)" /> - <param type="float" name="profile_max_mismatch" value="1" format="txt" label="Mean mismatches for reporting profile" /> - <param type="float" name="profile_cov" value="98" format="txt" label="Mean % Coverage for reporting profile)" /> + <param type="integer" name="mlst_max_mismatch" value="10" format="txt" label="Maximum number of mismatches per read" /> + <param type="float" name="profile_max_mismatch" value="1" format="txt" label="Maximum number of mismatches for reporting ST profile" /> + <param type="float" name="profile_cov" value="98" format="txt" label="Minimum mean % coverage for reporting ST profile" /> </when> <when value="gene"> @@ -131,9 +131,19 @@ </inputs> <outputs> - <data format="tabular" type="text" label="Allele Scores" name="scores" from_work_dir="*.scores"/> - <data format="tabular" type="text" label="Profile Scores" name="pscores" from_work_dir="*.pscores"/> - <data format="tabular" type="text" label="Predicted Alleles" name="alleles" from_work_dir="*results.txt"/> + <data format="tabular" label="SRST2 Results" name="results" from_work_dir="*.txt"/> + <data format="tabular" label="SRST2 Allele Scores" name="scores" from_work_dir="*.scores"> + <filter>job_type['job'] == "mlst"</filter> + </data> + <data format="tabular" label="SRST2 Profile Scores" name="pscores" from_work_dir="*.pscores"> + <filter> job_type['job'] == "mlst"</filter> + </data> + <data format="tabular" label="SRST2 Predicted Alleles" name="alleles" from_work_dir="*results.txt"> + <filter> job_type['job'] == "mlst"</filter> + </data> + <data format="tabular" label="SRST2 Gene Scores" name="gscores" from_work_dir="*.scores"> + <filter>job_type['job'] == "gene"</filter> + </data> </outputs> <help><![CDATA[