changeset 1:16c9fccf550d draft

planemo upload
author estrain
date Sat, 02 Dec 2017 13:42:43 -0500
parents ffd5306daa4c
children e372e42148de
files scoreProfiles.py srst2v2.xml
diffstat 2 files changed, 159 insertions(+), 112 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/scoreProfiles.py	Sat Dec 02 13:42:43 2017 -0500
@@ -0,0 +1,62 @@
+#!/usr/bin/env
+
+import sys
+import glob 
+from decimal import Decimal
+
+allHash = {}
+
+# Read in Profile Database
+profiles = open(sys.argv[1],"r")
+
+# Minimum mean percent coverage for reporting profile
+min_per=float(sys.argv[2])
+# Maximum mean mismatch for reporting profile
+max_mis=float(sys.argv[3])
+
+# Read in Allele Scores
+# Scores should be in srts2*.scores file
+# Column 0:Allele, 1:Score, 2:Avg Depth, 5:% Coverage, 7:Mismatches, 8:Indels
+scoreFile = open(glob.glob("srst2*.scores")[0],"r")
+scoreFile.readline()
+
+for line in scoreFile.readlines() :
+  els = line.split("\t")
+  vals = els[0].split("_")
+  allHash.update({els[0]:line})
+  
+
+# Allele names in first row
+als = profiles.readline().rstrip()
+filehead = als + str("\tMean_Score\tMean_Depth\tMean_%_Coverage\tTotal_Mismatches\tTotal_Indels")
+print(filehead)
+
+genes = als.split("\t")
+
+for line in profiles.readlines() :
+  line = line.rstrip()
+  els = line.split("\t")
+  alleles = [] 
+  for i in range(1,len(genes)) :
+    alleles.append(genes[i] + "_" + els[i])  
+  mscore=0
+  mdepth=0
+  mcover=0
+  mmisma=0
+  mindel=0
+  for i in alleles :
+    if i in allHash :
+      vals=str(allHash[i]).split("\t")
+      mscore+=float(vals[1])
+      mdepth+=float(vals[2])
+      mcover+=float(vals[5])
+      mmisma+=int(vals[7])
+      mindel+=int(vals[8])
+
+  mscore=round(Decimal(mscore/(len(genes)-1)),5)
+  mdepth=round(Decimal(mdepth/(len(genes)-1)),2)
+  mcover=round(Decimal(mcover/(len(genes)-1)),2)
+
+  if mmisma<=max_mis and mcover>=min_per :
+    print(line + "\t" + str(mscore) + "\t" + str(mdepth) + "\t" + str(mcover) + "\t" + str(mmisma) + "\t" + str(mindel)) 
+
--- a/srst2v2.xml	Tue Nov 28 22:46:01 2017 -0500
+++ b/srst2v2.xml	Sat Dec 02 13:42:43 2017 -0500
@@ -1,4 +1,4 @@
-<tool id="srst2v2" name="SRST2 - Short Read Sequence Typer (v2)" version="0.2.0">
+<tool id="srst2" name="SRST2 - Short Read Sequence Typer (v2)" version="0.2.0">
     <requirements>
         <requirement type="package" version="0.2.0">srst2</requirement>
     </requirements>
@@ -18,10 +18,55 @@
 
         --output srst2out 
         --save_scores 
-        --mlst_definitions "$mlst_definitions" 
-        --mlst_db "$mlst_db" 
-        --mlst_delimiter $mlstdelim
-        --mlst_max_mismatch $mlst_max_mismatch
+
+        #if $job_type.job == "mlst"
+          --mlst_definitions $job_type.mlst_definitions
+          --mlst_db $job_type.mlst_db 
+          --mlst_delimiter $job_type.mlstdelim
+          --mlst_max_mismatch $job_type.mlst_max_mismatch
+        #else if $job_type.job == "gene"
+          --gene_db $job_type.genedb
+          --gene_max_mismatch $job_type.gene_max_mismatch
+        #end if
+
+        #if $options.select == "advanced"
+          #if $options.min_coverage
+              --min_coverage $options.min_coverage
+          #end if
+          #if $options.max_divergence
+              --max_divergence $options.max_divergence
+          #end if
+          #if $options.min_depth
+              --min_depth $options.min_depth
+          #end if
+          #if $options.min_edge_depth
+              --min_edge_depth $options.min_edge_depth
+          #end if
+          #if $options.prob_err
+              --prob_err $options.prob_err
+          #end if
+          #if $options.stop_after
+              --stop_after $options.stop_after
+          #end if
+              --other "'-p \${GALAXY_SLOTS:-1}
+          #if $options.maxins
+              --maxins $options.maxins
+              --minins $options.minins
+          #end if
+              '"
+          #if $options.mapq
+              --mapq $options.mapq
+          #end if
+          #if $options.baseq
+              --baseq $options.baseq
+          #end if
+      #else
+          --other "'-p \${GALAXY_SLOTS:-1}'"
+      #end if
+      ;
+      #if $job_type.job == "mlst" 
+        python $__tool_directory__/scoreProfiles.py $job_type.mlst_definitions $job_type.profile_cov $job_type.profile_max_mismatch > srst2.pscores
+      #end if
 
     ]]></command>
     <inputs>
@@ -39,17 +84,56 @@
         </when>
       </conditional>
 
-        <param type="data" name="mlst_db" label="Fasta file of MLST alleles" format="fasta" />
-        <param type="data" name="mlst_definitions" label="ST definitions for MLST scheme" format="tabular" />
-        <param type="text" name="mlstdelim" value="_" format="txt" label="Character(s) separating gene name from allele number in MLST database (default &apos;_&apos;)" />
-        <param type="integer" name="mlst_max_mismatch" value="10" format="txt" label="Maximum number of mismatches per read for MLST allele calling (default 10)" />
+      <conditional name="job_type">
+        <param name="job" type="select" label="MLST or Gene Presence/Absence?">
+          <option value="mlst">MLST</option>
+          <option value="gene">Gene</option>
+        </param>
+        <when value="mlst">
+          <param type="data" name="mlst_db" label="Fasta file of MLST alleles" format="fasta" />
+          <param type="data" name="mlst_definitions" label="ST definitions for MLST scheme" format="tabular" />
+          <param type="text" name="mlstdelim" value="_" format="txt" label="Character(s) separating gene name from allele number in MLST database (default &apos;_&apos;)" />
+          <param type="integer" name="mlst_max_mismatch" value="10" format="txt" label="Maximum number of mismatches per read (default 10)" />
+          <param type="float" name="profile_max_mismatch" value="3" format="txt" label="Mean mismatches for reporting profile" />
+          <param type="float" name="profile_cov" value="90" format="txt" label="Mean % Coverage for reporting profile)" />
+   
+        </when>
+        <when value="gene">
+          <param name="genedb" type="data" format="fasta" label="Fasta file for gene database" />
+          <param name="gene_max_mismatch" type="integer" value="10" format="txt" label="Maximum number of mistaches per read (default 10)" />
+        </when>
+      </conditional>
+
+      <conditional name="options">
+        <param name="select" type="select" label="Options Type">
+          <option value="basic">Basic</option>
+          <option value="advanced">Advanced</option>
+        </param>
+        <when value="advanced">
+          <param name="min_coverage" type="integer" label="Minimum %coverage cutoff for gene reporting" value="90"/>
+          <param name="max_divergence" type="integer" label="Maximum %divergence cutoff for gene reporting" value="10"/>
+          <param name="min_depth" type="integer" label="Minimum mean depth to flag as dubious allele call" value="5"/>
+          <param name="min_edge_depth" type="integer" label="Minimum edge depth to flag as dubious allele call" value="2"/>
+          <param name="prob_err" type="float" label="Probability of sequencing error" value="0.01"/>
+          <param name="stop_after" type="integer" label="Stop mapping after this number of reads have been mapped (otherwise map all)" optional="true"/>
+          <param name="mapq" type="integer" label="Samtools -q parameter" value="1"/>
+          <param name="baseq" type="integer" label="Samtools -Q parameter" value="20"/>
+          <param name="minins" type="integer" label="Bowtie 2 -I parameter. The minimum fragment length for valid paired-end alignments." value="0" >
+            <validator type="in_range" message="Must be less than -X parameter." min="0"/>
+          </param>
+          <param name="maxins" type="integer" label="Bowtie 2 -X parameter. The maximum fragment length for valid paired-end alignments." value="1000" >
+            <validator type="in_range" message="Must be greater than -I parameter." min="0"/>
+          </param>
+          </when>
+        <when value="basic"/>
+      </conditional>
+
     </inputs>
 
     <outputs>
-      <data format="txt" label="Scores" name="scores" from_work_dir="*.scores"/>
-      <data format="bam" label="bam alignment" name="bam" from_work_dir="*.sorted.bam"/>
-      <data format="pileup" label="pileup" name="pileup" from_work_dir="*.pileup"/>
-      <data format="txt" label="Alleles" name="alleles" from_work_dir="*results.txt"/>
+      <data format="txt" label="Allele Scores" name="scores" from_work_dir="*.scores"/>
+      <data format="txt" label="Profile Scores" name="pscores" from_work_dir="*.pscores"/>
+      <data format="txt" label="Predicted Alleles" name="alleles" from_work_dir="*results.txt"/>
     </outputs>
 
     <help><![CDATA[
@@ -58,105 +142,6 @@
 
 This program is designed to take Illumina sequence data, a MLST database and/or a database of gene sequences (e.g. resistance genes, virulence genes, etc) and report the presence of STs and/or reference genes.
 
-
-
-optional arguments:
-  -h, --help            show this help message and exit
-  --version             show program's version number and exit
-  --input_se INPUT_SE 
-                        Single end read file(s) for analysing (may be gzipped)
-  --input_pe INPUT_PE 
-                        Paired end read files for analysing (may be gzipped)
-  --merge_paired        Switch on if all the input read sets belong to a
-                        single sample, and you want to merge their data to get
-                        a single result
-  --forward FORWARD     Designator for forward reads (only used if NOT in
-                        MiSeq format sample_S1_L001_R1_001.fastq.gz; otherwise
-                        default is _1, i.e. expect forward reads as
-                        sample_1.fastq.gz)
-  --reverse REVERSE     Designator for reverse reads (only used if NOT in
-                        MiSeq format sample_S1_L001_R2_001.fastq.gz; otherwise
-                        default is _2, i.e. expect forward reads as
-                        sample_2.fastq.gz
-  --read_type READ_TYPE
-                        Read file type (for bowtie2; default is q=fastq; other
-                        options: qseq=solexa, f=fasta).
-  --mlst_db MLST_DB     Fasta file of MLST alleles (optional)
-  --mlst_delimiter MLST_DELIMITER
-                        Character(s) separating gene name from allele number
-                        in MLST database (default "-", as in arcc-1)
-  --mlst_definitions MLST_DEFINITIONS
-                        ST definitions for MLST scheme (required if mlst_db
-                        supplied and you want to calculate STs)
-  --mlst_max_mismatch MLST_MAX_MISMATCH
-                        Maximum number of mismatches per read for MLST allele
-                        calling (default 10)
-  --gene_db GENE_DB 
-                        Fasta file/s for gene databases (optional)
-  --no_gene_details     Switch OFF verbose reporting of gene typing
-  --gene_max_mismatch GENE_MAX_MISMATCH
-                        Maximum number of mismatches per read for gene
-                        detection and allele calling (default 10)
-  --min_coverage MIN_COVERAGE
-                        Minimum %coverage cutoff for gene reporting (default
-                        90)
-  --max_divergence MAX_DIVERGENCE
-                        Maximum %divergence cutoff for gene reporting (default
-                        10)
-  --min_depth MIN_DEPTH
-                        Minimum mean depth to flag as dubious allele call
-                        (default 5)
-  --min_edge_depth MIN_EDGE_DEPTH
-                        Minimum edge depth to flag as dubious allele call
-                        (default 2)
-  --prob_err PROB_ERR   Probability of sequencing error (default 0.01)
-  --truncation_score_tolerance TRUNCATION_SCORE_TOLERANCE
-                        % increase in score allowed to choose non-truncated
-                        allele
-  --stop_after STOP_AFTER
-                        Stop mapping after this number of reads have been
-                        mapped (otherwise map all)
-  --other OTHER        			 
-			Other arguments to pass to bowtie2 (must be escaped,
-                        e.g. "\\--no-mixed".)
-  --max_unaligned_overlap MAX_UNALIGNED_OVERLAP
-                        Read discarded from alignment if either of its ends
-                        has unaligned overlap with the reference that is
-                        longer than this value (default 10)
-  --mapq MAPQ           Samtools -q parameter (default 1)
-  --baseq BASEQ         Samtools -Q parameter (default 20)
-  --samtools_args SAMTOOLS_ARGS
-                        Other arguments to pass to samtools mpileup (must be
-                        escaped, e.g. "\\-A").
-  --output OUTPUT       Prefix for srst2 output files
-  --log                 Switch ON logging to file (otherwise log to stdout)
-  --save_scores         Switch ON verbose reporting of all scores
-  --report_new_consensus
-                        If a matching alleles is not found, report the
-                        consensus allele. Note, only SNP differences are
-                        considered, not indels.
-  --report_all_consensus
-                        Report the consensus allele for the most likely
-                        allele. Note, only SNP differences are considered, not
-                        indels.
-  --use_existing_bowtie2_sam
-                        Use existing SAM file generated by Bowtie2 if
-                        available, otherwise they will be generated
-  --use_existing_pileup
-                        Use existing pileups if available, otherwise they will
-                        be generated
-  --use_existing_scores
-                        Use existing scores files if available, otherwise they
-                        will be generated
-  --keep_interim_alignment
-                        Keep interim files (sam & unsorted bam), otherwise
-                        they will be deleted after sorted bam is created
-  --threads THREADS     Use multiple threads in Bowtie and Samtools
-  --prev_output PREV_OUTPUT 
-                        SRST2 results files to compile (any new results from
-                        this run will also be incorporated)
-
-
     ]]></help>