Mercurial > repos > estrain > srst2v2
changeset 1:16c9fccf550d draft
planemo upload
author | estrain |
---|---|
date | Sat, 02 Dec 2017 13:42:43 -0500 |
parents | ffd5306daa4c |
children | e372e42148de |
files | scoreProfiles.py srst2v2.xml |
diffstat | 2 files changed, 159 insertions(+), 112 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scoreProfiles.py Sat Dec 02 13:42:43 2017 -0500 @@ -0,0 +1,62 @@ +#!/usr/bin/env + +import sys +import glob +from decimal import Decimal + +allHash = {} + +# Read in Profile Database +profiles = open(sys.argv[1],"r") + +# Minimum mean percent coverage for reporting profile +min_per=float(sys.argv[2]) +# Maximum mean mismatch for reporting profile +max_mis=float(sys.argv[3]) + +# Read in Allele Scores +# Scores should be in srts2*.scores file +# Column 0:Allele, 1:Score, 2:Avg Depth, 5:% Coverage, 7:Mismatches, 8:Indels +scoreFile = open(glob.glob("srst2*.scores")[0],"r") +scoreFile.readline() + +for line in scoreFile.readlines() : + els = line.split("\t") + vals = els[0].split("_") + allHash.update({els[0]:line}) + + +# Allele names in first row +als = profiles.readline().rstrip() +filehead = als + str("\tMean_Score\tMean_Depth\tMean_%_Coverage\tTotal_Mismatches\tTotal_Indels") +print(filehead) + +genes = als.split("\t") + +for line in profiles.readlines() : + line = line.rstrip() + els = line.split("\t") + alleles = [] + for i in range(1,len(genes)) : + alleles.append(genes[i] + "_" + els[i]) + mscore=0 + mdepth=0 + mcover=0 + mmisma=0 + mindel=0 + for i in alleles : + if i in allHash : + vals=str(allHash[i]).split("\t") + mscore+=float(vals[1]) + mdepth+=float(vals[2]) + mcover+=float(vals[5]) + mmisma+=int(vals[7]) + mindel+=int(vals[8]) + + mscore=round(Decimal(mscore/(len(genes)-1)),5) + mdepth=round(Decimal(mdepth/(len(genes)-1)),2) + mcover=round(Decimal(mcover/(len(genes)-1)),2) + + if mmisma<=max_mis and mcover>=min_per : + print(line + "\t" + str(mscore) + "\t" + str(mdepth) + "\t" + str(mcover) + "\t" + str(mmisma) + "\t" + str(mindel)) +
--- a/srst2v2.xml Tue Nov 28 22:46:01 2017 -0500 +++ b/srst2v2.xml Sat Dec 02 13:42:43 2017 -0500 @@ -1,4 +1,4 @@ -<tool id="srst2v2" name="SRST2 - Short Read Sequence Typer (v2)" version="0.2.0"> +<tool id="srst2" name="SRST2 - Short Read Sequence Typer (v2)" version="0.2.0"> <requirements> <requirement type="package" version="0.2.0">srst2</requirement> </requirements> @@ -18,10 +18,55 @@ --output srst2out --save_scores - --mlst_definitions "$mlst_definitions" - --mlst_db "$mlst_db" - --mlst_delimiter $mlstdelim - --mlst_max_mismatch $mlst_max_mismatch + + #if $job_type.job == "mlst" + --mlst_definitions $job_type.mlst_definitions + --mlst_db $job_type.mlst_db + --mlst_delimiter $job_type.mlstdelim + --mlst_max_mismatch $job_type.mlst_max_mismatch + #else if $job_type.job == "gene" + --gene_db $job_type.genedb + --gene_max_mismatch $job_type.gene_max_mismatch + #end if + + #if $options.select == "advanced" + #if $options.min_coverage + --min_coverage $options.min_coverage + #end if + #if $options.max_divergence + --max_divergence $options.max_divergence + #end if + #if $options.min_depth + --min_depth $options.min_depth + #end if + #if $options.min_edge_depth + --min_edge_depth $options.min_edge_depth + #end if + #if $options.prob_err + --prob_err $options.prob_err + #end if + #if $options.stop_after + --stop_after $options.stop_after + #end if + --other "'-p \${GALAXY_SLOTS:-1} + #if $options.maxins + --maxins $options.maxins + --minins $options.minins + #end if + '" + #if $options.mapq + --mapq $options.mapq + #end if + #if $options.baseq + --baseq $options.baseq + #end if + #else + --other "'-p \${GALAXY_SLOTS:-1}'" + #end if + ; + #if $job_type.job == "mlst" + python $__tool_directory__/scoreProfiles.py $job_type.mlst_definitions $job_type.profile_cov $job_type.profile_max_mismatch > srst2.pscores + #end if ]]></command> <inputs> @@ -39,17 +84,56 @@ </when> </conditional> - <param type="data" name="mlst_db" label="Fasta file of MLST alleles" format="fasta" /> - <param type="data" name="mlst_definitions" label="ST definitions for MLST scheme" format="tabular" /> - <param type="text" name="mlstdelim" value="_" format="txt" label="Character(s) separating gene name from allele number in MLST database (default '_')" /> - <param type="integer" name="mlst_max_mismatch" value="10" format="txt" label="Maximum number of mismatches per read for MLST allele calling (default 10)" /> + <conditional name="job_type"> + <param name="job" type="select" label="MLST or Gene Presence/Absence?"> + <option value="mlst">MLST</option> + <option value="gene">Gene</option> + </param> + <when value="mlst"> + <param type="data" name="mlst_db" label="Fasta file of MLST alleles" format="fasta" /> + <param type="data" name="mlst_definitions" label="ST definitions for MLST scheme" format="tabular" /> + <param type="text" name="mlstdelim" value="_" format="txt" label="Character(s) separating gene name from allele number in MLST database (default '_')" /> + <param type="integer" name="mlst_max_mismatch" value="10" format="txt" label="Maximum number of mismatches per read (default 10)" /> + <param type="float" name="profile_max_mismatch" value="3" format="txt" label="Mean mismatches for reporting profile" /> + <param type="float" name="profile_cov" value="90" format="txt" label="Mean % Coverage for reporting profile)" /> + + </when> + <when value="gene"> + <param name="genedb" type="data" format="fasta" label="Fasta file for gene database" /> + <param name="gene_max_mismatch" type="integer" value="10" format="txt" label="Maximum number of mistaches per read (default 10)" /> + </when> + </conditional> + + <conditional name="options"> + <param name="select" type="select" label="Options Type"> + <option value="basic">Basic</option> + <option value="advanced">Advanced</option> + </param> + <when value="advanced"> + <param name="min_coverage" type="integer" label="Minimum %coverage cutoff for gene reporting" value="90"/> + <param name="max_divergence" type="integer" label="Maximum %divergence cutoff for gene reporting" value="10"/> + <param name="min_depth" type="integer" label="Minimum mean depth to flag as dubious allele call" value="5"/> + <param name="min_edge_depth" type="integer" label="Minimum edge depth to flag as dubious allele call" value="2"/> + <param name="prob_err" type="float" label="Probability of sequencing error" value="0.01"/> + <param name="stop_after" type="integer" label="Stop mapping after this number of reads have been mapped (otherwise map all)" optional="true"/> + <param name="mapq" type="integer" label="Samtools -q parameter" value="1"/> + <param name="baseq" type="integer" label="Samtools -Q parameter" value="20"/> + <param name="minins" type="integer" label="Bowtie 2 -I parameter. The minimum fragment length for valid paired-end alignments." value="0" > + <validator type="in_range" message="Must be less than -X parameter." min="0"/> + </param> + <param name="maxins" type="integer" label="Bowtie 2 -X parameter. The maximum fragment length for valid paired-end alignments." value="1000" > + <validator type="in_range" message="Must be greater than -I parameter." min="0"/> + </param> + </when> + <when value="basic"/> + </conditional> + </inputs> <outputs> - <data format="txt" label="Scores" name="scores" from_work_dir="*.scores"/> - <data format="bam" label="bam alignment" name="bam" from_work_dir="*.sorted.bam"/> - <data format="pileup" label="pileup" name="pileup" from_work_dir="*.pileup"/> - <data format="txt" label="Alleles" name="alleles" from_work_dir="*results.txt"/> + <data format="txt" label="Allele Scores" name="scores" from_work_dir="*.scores"/> + <data format="txt" label="Profile Scores" name="pscores" from_work_dir="*.pscores"/> + <data format="txt" label="Predicted Alleles" name="alleles" from_work_dir="*results.txt"/> </outputs> <help><![CDATA[ @@ -58,105 +142,6 @@ This program is designed to take Illumina sequence data, a MLST database and/or a database of gene sequences (e.g. resistance genes, virulence genes, etc) and report the presence of STs and/or reference genes. - - -optional arguments: - -h, --help show this help message and exit - --version show program's version number and exit - --input_se INPUT_SE - Single end read file(s) for analysing (may be gzipped) - --input_pe INPUT_PE - Paired end read files for analysing (may be gzipped) - --merge_paired Switch on if all the input read sets belong to a - single sample, and you want to merge their data to get - a single result - --forward FORWARD Designator for forward reads (only used if NOT in - MiSeq format sample_S1_L001_R1_001.fastq.gz; otherwise - default is _1, i.e. expect forward reads as - sample_1.fastq.gz) - --reverse REVERSE Designator for reverse reads (only used if NOT in - MiSeq format sample_S1_L001_R2_001.fastq.gz; otherwise - default is _2, i.e. expect forward reads as - sample_2.fastq.gz - --read_type READ_TYPE - Read file type (for bowtie2; default is q=fastq; other - options: qseq=solexa, f=fasta). - --mlst_db MLST_DB Fasta file of MLST alleles (optional) - --mlst_delimiter MLST_DELIMITER - Character(s) separating gene name from allele number - in MLST database (default "-", as in arcc-1) - --mlst_definitions MLST_DEFINITIONS - ST definitions for MLST scheme (required if mlst_db - supplied and you want to calculate STs) - --mlst_max_mismatch MLST_MAX_MISMATCH - Maximum number of mismatches per read for MLST allele - calling (default 10) - --gene_db GENE_DB - Fasta file/s for gene databases (optional) - --no_gene_details Switch OFF verbose reporting of gene typing - --gene_max_mismatch GENE_MAX_MISMATCH - Maximum number of mismatches per read for gene - detection and allele calling (default 10) - --min_coverage MIN_COVERAGE - Minimum %coverage cutoff for gene reporting (default - 90) - --max_divergence MAX_DIVERGENCE - Maximum %divergence cutoff for gene reporting (default - 10) - --min_depth MIN_DEPTH - Minimum mean depth to flag as dubious allele call - (default 5) - --min_edge_depth MIN_EDGE_DEPTH - Minimum edge depth to flag as dubious allele call - (default 2) - --prob_err PROB_ERR Probability of sequencing error (default 0.01) - --truncation_score_tolerance TRUNCATION_SCORE_TOLERANCE - % increase in score allowed to choose non-truncated - allele - --stop_after STOP_AFTER - Stop mapping after this number of reads have been - mapped (otherwise map all) - --other OTHER - Other arguments to pass to bowtie2 (must be escaped, - e.g. "\\--no-mixed".) - --max_unaligned_overlap MAX_UNALIGNED_OVERLAP - Read discarded from alignment if either of its ends - has unaligned overlap with the reference that is - longer than this value (default 10) - --mapq MAPQ Samtools -q parameter (default 1) - --baseq BASEQ Samtools -Q parameter (default 20) - --samtools_args SAMTOOLS_ARGS - Other arguments to pass to samtools mpileup (must be - escaped, e.g. "\\-A"). - --output OUTPUT Prefix for srst2 output files - --log Switch ON logging to file (otherwise log to stdout) - --save_scores Switch ON verbose reporting of all scores - --report_new_consensus - If a matching alleles is not found, report the - consensus allele. Note, only SNP differences are - considered, not indels. - --report_all_consensus - Report the consensus allele for the most likely - allele. Note, only SNP differences are considered, not - indels. - --use_existing_bowtie2_sam - Use existing SAM file generated by Bowtie2 if - available, otherwise they will be generated - --use_existing_pileup - Use existing pileups if available, otherwise they will - be generated - --use_existing_scores - Use existing scores files if available, otherwise they - will be generated - --keep_interim_alignment - Keep interim files (sam & unsorted bam), otherwise - they will be deleted after sorted bam is created - --threads THREADS Use multiple threads in Bowtie and Samtools - --prev_output PREV_OUTPUT - SRST2 results files to compile (any new results from - this run will also be incorporated) - - ]]></help>