changeset 0:2f5a8da98896 draft

planemo upload
author alperkucukural
date Wed, 04 Nov 2015 17:05:03 -0500
parents
children 6c7fb72cc734
files compare2sequences.xml crisprseek_macros.xml offtargetanalysis.xml test_data/inputfile.fa tool_dependencies.xml
diffstat 5 files changed, 354 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/compare2sequences.xml	Wed Nov 04 17:05:03 2015 -0500
@@ -0,0 +1,116 @@
+<tool id="compare2sequences" name="compare2sequences" version="1.0.0">
+  <description>CRISPRSeek compare2sequences</description>
+    <macros>
+        <import>crisprseek_macros.xml</import>
+    </macros>
+    <expand macro="requirements" />
+    <command detect_errors="exit_code">
+        Rscript "${compare2sequences}"
+    </command>
+    <configfiles>
+        <configfile name="compare2sequences"><![CDATA[
+## code chunk: Load all required libraries quietly
+library(CRISPRseek, quietly=TRUE, warn.conflicts=FALSE,verbose = FALSE)
+
+compare2Sequences("${inputfile1}", "${inputfile2}", inputNames=c("Seq1", "Seq2"), REpatternFile = system.file('extdata', 'NEBenzymes.fa', package = 'CRISPRseek'), max.mismatch="${maxmismatch}", searchDirection="both", findPairedgRNAOnly=${a.fg.fpg_only}, min.gap = "${a.fg.mingap}", max.gap = "${a.fg.maxgap}", gRNA.name.prefix = "gRNA", PAM.size = "${a.PAMsize}", gRNA.size = "${a.gRNAsize}", PAM = "${a.PAM}", PAM.pattern = "${a.PAMPattern}", outputDir="./", weights="${a.weights}" )
+
+     ]]></configfile>
+    </configfiles>
+  <inputs>
+    <param format="fastq,fa,fasta" name="inputfile1" type="data" label="Input File Path" />
+    <param format="fastq,fa,fasta" name="inputfile2" type="data" label="Input File Path" />
+    <param name="maxmismatch" type="text" label="Max Mismatch" value="3" help="Maximum mismatch allowed in off target search, default 3. Warning: will be considerably slower if set > 3"/>
+    <conditional name="a">
+       <param name="advanced" type="select" label="Advanced Options">
+           <option value="No" selected="True">No</option>
+           <option value="Yes">Yes</option>
+       </param>
+       <when value="Yes">
+          <param name="searchDirection" type="select" label="Search Direction">
+             <option value="both" selected="True">both</option>
+             <option value="1to2">1to2</option>
+             <option value="2to1">2to1</option>
+          </param>
+         <conditional name="fg">
+           <param name="fpg_only" type="select" label="Find Paired gRNA Only" help="Choose whether to only search for paired gRNAs in such an orientation that the first one is on minus strand called reverse gRNA and the second one is on plus strand called forward gRNA. TRUE or FALSE, default FALSE">
+              <option value="FALSE" selected="True">No</option>
+              <option value="TRUE">Yes</option>
+           </param>
+           <when value="True">
+              <param name="mingap" type="text" label="Min Gap" value="0" />
+              <param name="maxgap" type="text" label="Max Gap" value="20" />
+           </when>
+         </conditional>
+         <param name="PAMsize" type="text" label="PAM Size" value="3" help="PAM length, default 3"/>
+         <param name="PAM" type="text" label="PAM" value="NGG" help="PAM sequence after the gRNA, default NGG"/>
+         <param name="PAMPattern" type="text" label="PAM Pattern" value="N[A_p_G]G_d_"  help="Regular expression of protospacer-adjacent motif (PAM), default N[A|G]G$. Please use _p_ for | and _d_ for $ symbols"/>
+         <param name="gRNAsize" type="text" label="gRNA Size" value="20" help="The size of the gRNA, default 20"/>
+         <param name="weights" type="text" area="true" size="5x60" value="0, 0, 0.014, 0, 0, 0.395, 0.317, 0, 0.389, 0.079, 0.445, 0.508, 0.613, 0.851, 0.732, 0.828, 0.615, 0.804, 0.685, 0.583" label="Off Target Weights" help="a numeric vector size of gRNA length, default 0, 0, 0.014, 0, 0, 0.395, 0.317, 0, 0.389, 0.079, 0.445, 0.508, 0.613, 0.851, 0.732, 0.828, 0.615, 0.804, 0.685, 0.583 which is used in Hsu et al., 2013 cited in the reference section"/>
+       </when>
+    </conditional>
+  </inputs>
+  <outputs>
+    <data format="gz" name="output"/>
+  </outputs>
+  <help>
+
+**What it does**
+
+The package includes functions to find potential guide RNAs for input target se-
+quences, optionally filter guide RNAs without restriction enzyme cut site, or with-
+out paired guide RNAs, genome-wide search for off-targets, score, rank, fetch flank se-
+quence and indicate whether the target and off-targets are located in exon region or not. Poten-
+tial guide RNAs are annotated with total score of the top5 and topN off-targets, de-
+tailed topN mismatch sites, restriction enzyme cut sites, and paired guide RNAs.
+
+**Description**
+
+Generate all possible guide RNAs (gRNAs) for two input sequences, or two sets of sequences and generate scores for potential off-targets in the other sequence.
+
+**Usage**
+
+   compare2Sequences(inputFile1Path, inputFile2Path, inputNames=c("Seq1", "Seq2"), format = "fasta", findgRNAsWithREcutOnly = FALSE, searchDirection=c("both","1to2", "2to1"), REpatternFile=system.file("extdata", "NEBenzymes.fa", package = "CRISPRseek"), minREpatternSize = 6, overlap.gRNA.positions = c(17, 18), findPairedgRNAOnly = FALSE, min.gap = 0, max.gap = 20, gRNA.name.prefix = "gRNA", PAM.size = 3, gRNA.size = 20, PAM = "NGG", PAM.pattern = "N[A|G]G$", max.mismatch = 3, outputDir, weights = c(0, 0, 0.014, 0, 0, 0.395, 0.317, 0, 0.389, 0.079, 0.445, 0.508, 0.613, 0.851, 0.732, 0.828, 0.615, 0.804, 0.685, 0.583), overwrite = FALSE)
+
+**Author(s)**
+
+Lihua Julie Zhu and Michael Brodsky Maintainer: julie.zhu@umassmed.edu
+
+Alper Kucukural, Galaxy Maintainer: alper.kucukural@umassmed.edu
+
+**Citation**
+
+(from within R, enter citation("CRISPRseek")):
+
+Zhu LJ, Holmes BR, Aronin N and Brodsky MH (2014). “CRISPRseek: A Bioconductor Package to Identify Target-Specific Guide RNAs for CRISPR-Cas9 Genome-Editing Systems.” PLoS one, 9(9). http://www.ncbi.nlm.nih.gov/pmc/articles/PMC4172692/.
+
+**References**
+
+Patrick D Hsu, David A Scott, Joshua A Weinstein, F Ann Ran, Silvana Konermann, Vineeta Agar-
+wala, Yinqing Li, Eli J Fine, Xuebing Wu, Ophir Shalem, Thomas J Cradick, Luciano A Marraffini,
+Gang Bao, Feng Zhang (2013) DNA targeting specificity of rNA-guided Cas9 nucleases. Nature
+Biotechnology 31:827-83
+
+Mali P, Aach J, Stranges PB, Esvelt KM, Moosburner M, Kosuri S, Yang L, Church GM.CAS9
+transcriptional activators for target specificity screening and paired nickases for cooperative genome
+engineering. Nat Biotechnol. 2013. 31(9):833-8 Patrick D Hsu, David A Scott, Joshua A Wein-
+stein, F Ann Ran, Silvana Konermann, Vineeta Agarwala, Yinqing Li, Eli J Fine, Xuebing Wu,
+Ophir Shalem, Thomas J Cradick, Luciano A Marraffini, Gang Bao, Feng Zhang. DNA targeting
+specificity of rNA-guided Cas9 nucleases. Nat Biotechnol. 2013. 31:827-834
+
+**Reference Manual and Materials**
+
+http://www.bioconductor.org/packages/release/bioc/vignettes/CRISPRseek/inst/doc/CRISPRseek.pdf
+
+http://www.bioconductor.org/packages/release/bioc/manuals/CRISPRseek/man/CRISPRseek.pdf
+
+http://www.bioconductor.org/packages/release/bioc/vignettes/CRISPRseek/inst/doc/CRISPRseek.R
+
+
+
+
+
+  </help>
+  <expand macro="citations" />
+
+</tool>
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/crisprseek_macros.xml	Wed Nov 04 17:05:03 2015 -0500
@@ -0,0 +1,13 @@
+<macros>
+	<xml name="requirements">
+	  	<requirements>
+	  		<requirement type="package" version="3.2.1">R</requirement>
+	        <requirement type="package" version="1.11.0">CRISPRseek</requirement>
+	    </requirements>
+	  </xml>
+	<xml name="citations">
+        <citations>
+            <citation type="doi">10.1371/journal.pone.0108424</citation>
+        </citations>
+	</xml>
+</macros>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/offtargetanalysis.xml	Wed Nov 04 17:05:03 2015 -0500
@@ -0,0 +1,162 @@
+<tool id="offTargetAnalysis" name="Off Target Analysis" version="1.0.0">
+  <description>CRISPRSeek offTargetAnalysis</description>
+    <macros>
+        <import>crisprseek_macros.xml</import>
+    </macros>
+    <expand macro="requirements" />
+    <command detect_errors="exit_code">
+        Rscript "${offTargetAnalysis}"
+    </command>
+    <configfiles>
+        <configfile name="offTargetAnalysis"><![CDATA[
+## code chunk: Load all required libraries quietly
+library(CRISPRseek, quietly=TRUE, warn.conflicts=FALSE,verbose = FALSE)
+
+offTargetAnalysis("${s.inputfile}", BSgenomeName="BSgenome.Hsapiens.UCSC.hg19", txdb ="TxDb.Hsapiens.UCSC.hg19.knownGene",orgAnn="org.Hs.egSYMBOL", REpatternFile = system.file('extdata', 'NEBenzymes.fa', package = 'CRISPRseek'), max.mismatch="${maxmismatch}", exportAllgRNAs="${a.exportAllgRNAs}", searchDirection="${a.searchDirection}", findPairedgRNAOnly=${a.fg.fpg_only}, min.gap = "${a.fg.mingap}", max.gap = "${a.fg.maxgap}", gRNA.name.prefix = "gRNA", PAM.size = "${a.PAMsize}", gRNA.size = "${a.gRNAsize}", PAM = "${a.PAM}", PAM.pattern = "${a.PAMPattern}", chromToSearch="${a.chromToSearch}", minscore="${a.minscore}", topN="${a.topN}", topNOfftargetTotalScore="${a.topNOfftargetTotalScore}", annotateExon="${a.annotateExon}", fetchSequence="${a.fs.fetchSequence}", downstream="${a.fs.downstream}", upstream="${a.fs.upstream}", outputDir, weights="${a.weights}" )
+
+     ]]></configfile>
+    </configfiles>
+  <inputs>
+     <param name="bsgenomename" type="select" label="BS Genome Name" help="BSgenome object. Please refer to available.genomes in BSgenome package. For example, BSgenome.Hsapiens.UCSC.hg19 for hg19, BSgenome.Mmusculus.UCSC.mm10 for mm10, BSgenome.Celegans.UCSC.ce6 for ce6, BSgenome.Rnorvegicus.UCSC.rn5 for rn5, BSgenome.Drerio.UCSC.danRer7 for Zv9, and BSgenome.Dmelanogaster.UCSC.dm3 for dm3">
+           <option value="hg19">hg19</option>
+           <option value="mm10">mm10</option>
+           <option value="ce10">ce10</option>
+     </param> 
+     <conditional name="s">
+       <param name="input_source" type="select" label="Select Input File Path" help="Sequence input file path that contains sequences to be searched for potential gRNAs">
+           <option value="history">History</option>
+           <option value="Fasta">Fasta</option>
+           <option value="FullPath">Full Path</option>
+       </param>
+       <when value="history">
+       <param format="fastq,fa,fasta" name="inputfile" type="data" label="Input File Path" />
+       </when>
+       <when value="FullPath">
+          <param name="inputfile" type="text" area="true" size="2x60" label="Input File Path" help="Full path in the cluster"/>
+       </when>
+       <when value="Fasta">
+          <param name="inputfile" type="text" area="true" size="2x60" label="Input Fasta Sequence/s" help="Free text fasta sequences"/>
+       </when>
+    </conditional>
+    <param name="maxmismatch" type="text" label="Max Mismatch" value="3" help="Maximum mismatch allowed in off target search, default 3. Warning: will be considerably slower if set > 3"/>
+    <param name="outputdir" type="text"  area="true" size="2x60" label="Output Directory" help="Output directory in the cluster"/>
+    <conditional name="a">
+       <param name="advanced" type="select" label="Advanced Options">
+           <option value="No" selected="True">No</option>
+           <option value="Yes">Yes</option>
+       </param>
+       <when value="Yes">
+          <param name="exportAllgRNAs" type="select" label="Export All gRNAs" help="Indicate whether to output all potential gRNAs to a file in fasta format, genbank format or both. Default to both.">
+            <option value="all" selected="True">all</option>
+            <option value="fasta">fasta</option>
+            <option value="genbank">genbank</option>
+            <option value="no">no</option>
+         </param>
+         <conditional name="fg">
+           <param name="fpg_only" type="select" label="Find Paired gRNA Only" help="Choose whether to only search for paired gRNAs in such an orientation that the first one is on minus strand called reverse gRNA and the second one is on plus strand called forward gRNA. TRUE or FALSE, default FALSE">
+              <option value="False" selected="True">No</option>
+              <option value="True">Yes</option>
+           </param>
+           <when value="True">
+              <param name="mingap" type="text" label="Min Gap" value="0" />
+              <param name="maxgap" type="text" label="Max Gap" value="20" />
+           </when>
+         </conditional>
+         <param name="PAMsize" type="text" label="PAM Size" value="3" help="PAM length, default 3"/>
+         <param name="PAM" type="text" label="PAM" value="NGG" help="PAM sequence after the gRNA, default NGG"/>
+         <param name="PAMPattern" type="text" label="PAM Pattern" value="N[A_p_G]G_d_"  help="Regular expression of protospacer-adjacent motif (PAM), default N[A|G]G$. Please use _p_ for | and _d_ for $ symbols"/>
+         <param name="gRNAsize" type="text" label="gRNA Size" value="20" help="The size of the gRNA, default 20"/>
+         <param name="minscore" type="text" label="min Score" value="0.5" help="minimum score of an off target to included in the final output, default 0.5"/>
+         <param name="topN" type="text" label="topN" value="100" help="top N off targets to be included in the final output, default 100"/>
+         <param name="topNOfftargetTotalScore" type="text" label="topN OfftargetTotalScore" value="10" help="top N off target used to calculate the total off target score, default 10"/>
+         <param name="annotateExon" type="select" label="Annotate Exon" help="Choose whether or not to indicate whether the off target is inside an exon or not, default TRUE">
+            <option value="true" selected="True">True</option>
+            <option value="false">False</option>
+         </param>
+         <param name="chromToSearch" type="text" label="chromToSearch" value="all" help="Type chromosome (Ex: chrX)"/>
+
+         <conditional name="fs">
+           <param name="fetchSequence" type="select" label="Fetch Sequence" help="Fetch flank sequence of off target or not, default TRUE">
+               <option value="True" selected="True">True</option>
+               <option value="False">False</option>
+           </param>
+           <when value="True">
+             <param name="upstream" type="text" label="upstream" value="200" help="upstream offset from the off target start, default 200"/>
+             <param name="downstream" type="text" label="downstream" value="200" help="downstream offset from the off target end default 200"/>
+           </when>
+           </conditional>
+           <param name="weights" type="text" area="true" size="5x60" value="0, 0, 0.014, 0, 0, 0.395, 0.317, 0, 0.389, 0.079, 0.445, 0.508, 0.613, 0.851, 0.732, 0.828, 0.615, 0.804, 0.685, 0.583" label="Off Target Weights" help="a numeric vector size of gRNA length, default 0, 0, 0.014, 0, 0, 0.395, 0.317, 0, 0.389, 0.079, 0.445, 0.508, 0.613, 0.851, 0.732, 0.828, 0.615, 0.804, 0.685, 0.583 which is used in Hsu et al., 2013 cited in the reference section"/>
+       </when>
+    </conditional>
+  </inputs>
+  <outputs>
+    <data format="gz" name="output"/>
+  </outputs>
+  <help>
+
+**What it does**
+
+The package includes functions to find potential guide RNAs for input target se-
+quences, optionally filter guide RNAs without restriction enzyme cut site, or with-
+out paired guide RNAs, genome-wide search for off-targets, score, rank, fetch flank se-
+quence and indicate whether the target and off-targets are located in exon region or not. Poten-
+tial guide RNAs are annotated with total score of the top5 and topN off-targets, de-
+tailed topN mismatch sites, restriction enzyme cut sites, and paired guide RNAs.
+
+**Description**
+
+Design of target-specific gRNAs for the CRISPR-Cas9 system by automatically finding potential
+gRNAs (paired/not paired), with/without restriction enzyme cut site(s) in a given sequence, search-
+ing for off targets with user defined maximum number of mismatches, calculating score of each
+off target based on mismatch positions in the off target and a penalty weight matrix, filtering off
+targets with user-defined criteria, and annotating off targets with flank sequences, whether located
+in exon or not. Summary report is also generated with gRNAs ranked by total topN off target
+score, annotated with restriction enzyme cut sites and possible paired gRNAs. Detailed paired gR-
+NAs information and restriction enzyme cut sites are stored in separate files in the output directory
+specified by the user. In total, four tab delimited files are generated in the output directory: Off-
+targetAnalysis.xls (off target details), Summary.xls (gRNA summary), REcutDetails.xls (restriction
+enzyme cut sites of each gRNA), and pairedgRNAs.xls (potential paired gRNAs).
+
+**Author(s)**
+
+Lihua Julie Zhu and Michael Brodsky Maintainer: julie.zhu@umassmed.edu
+
+**Citation**
+
+(from within R, enter citation("CRISPRseek")):
+
+Zhu LJ, Holmes BR, Aronin N and Brodsky MH (2014). “CRISPRseek: A Bioconductor Package to Identify Target-Specific Guide RNAs for CRISPR-Cas9 Genome-Editing Systems.” PLoS one, 9(9). http://www.ncbi.nlm.nih.gov/pmc/articles/PMC4172692/.
+
+**References**
+
+http://bioconductor.org/packages/2.8/bioc/vignettes/BSgenome/inst/doc/GenomeSearching.pdf
+
+Patrick D Hsu, David A Scott, Joshua A Weinstein, F Ann Ran, Silvana Konermann, Vineeta Agar-
+wala, Yinqing Li, Eli J Fine, Xuebing Wu, Ophir Shalem, Thomas J Cradick, Luciano A Marraffini,
+Gang Bao, Feng Zhang (2013) DNA targeting specificity of rNA-guided Cas9 nucleases. Nature
+Biotechnology 31:827-83
+
+Mali P, Aach J, Stranges PB, Esvelt KM, Moosburner M, Kosuri S, Yang L, Church GM.CAS9
+transcriptional activators for target specificity screening and paired nickases for cooperative genome
+engineering. Nat Biotechnol. 2013. 31(9):833-8 Patrick D Hsu, David A Scott, Joshua A Wein-
+stein, F Ann Ran, Silvana Konermann, Vineeta Agarwala, Yinqing Li, Eli J Fine, Xuebing Wu,
+Ophir Shalem, Thomas J Cradick, Luciano A Marraffini, Gang Bao, Feng Zhang. DNA targeting
+specificity of rNA-guided Cas9 nucleases. Nat Biotechnol. 2013. 31:827-834
+
+**Reference Manual and Materials**
+
+http://www.bioconductor.org/packages/release/bioc/vignettes/CRISPRseek/inst/doc/CRISPRseek.pdf
+
+http://www.bioconductor.org/packages/release/bioc/manuals/CRISPRseek/man/CRISPRseek.pdf
+
+http://www.bioconductor.org/packages/release/bioc/vignettes/CRISPRseek/inst/doc/CRISPRseek.R
+
+
+
+
+
+  </help>
+  <expand macro="citations" />
+
+</tool>
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test_data/inputfile.fa	Wed Nov 04 17:05:03 2015 -0500
@@ -0,0 +1,54 @@
+>mRIPK1cds1_gR36f
+AATGCAACCAGACATGTCCTTGG
+>mRIPK1cds1_gR51f
+GTCCTTGGACAATATTAAGATGG
+>mRIPK1cds1_gR69f
+GATGGCATCCAGTGACCTGCTGG
+>mRIPK1cds1_gR91f
+GAGAAGACAGACCTAGACAGCGG
+>mRIPK1cds1_gR94f
+AAGACAGACCTAGACAGCGGAGG
+>mRIPK1cds1_gR100f
+GACCTAGACAGCGGAGGCTTCGG
+>mRIPK1cds1_gR101f
+ACCTAGACAGCGGAGGCTTCGGG
+>mRIPK1cds1_gR105f
+AGACAGCGGAGGCTTCGGGAAGG
+>mRIPK1cds1_gR133f
+TTGTGTTACCACAGAAGCCATGG
+>mRIPK1cds1_gR163f
+ATCCTGAAAAAAGTATACACAGG
+>mRIPK1cds1_gR164f
+TCCTGAAAAAAGTATACACAGGG
+>mRIPK1cds1_gR187f
+CCCAACCGCGCTGAGTGAGTTGG
+>mRIPK1cds1_gR188f
+CCAACCGCGCTGAGTGAGTTGGG
+>mRIPK1cds1_gR189f
+CAACCGCGCTGAGTGAGTTGGGG
+>mRIPK1cds1_gR190f
+AACCGCGCTGAGTGAGTTGGGGG
+>mRIPK1cds1_gR182r
+TGCCCCCAACTCACTCAGCGCGG
+>mRIPK1cds1_gR178r
+CCCAACTCACTCAGCGCGGTTGG
+>mRIPK1cds1_gR177r
+CCAACTCACTCAGCGCGGTTGGG
+>mRIPK1cds1_gR155r
+GCCCTGTGTATACTTTTTTCAGG
+>mRIPK1cds1_gR140r
+TTTTCAGGATGACAAATCCATGG
+>mRIPK1cds1_gR131r
+TGACAAATCCATGGCTTCTGTGG
+>mRIPK1cds1_gR121r
+ATGGCTTCTGTGGTAACACAAGG
+>mRIPK1cds1_gR92r
+TCCCGAAGCCTCCGCTGTCTAGG
+>mRIPK1cds1_gR74r
+CTAGGTCTGTCTTCTCCAGCAGG
+>mRIPK1cds1_gR67r
+TGTCTTCTCCAGCAGGTCACTGG
+>mRIPK1cds1_gR43r
+TGCCATCTTAATATTGTCCAAGG
+>mRIPK1cds1_gR33r
+ATATTGTCCAAGGACATGTCTGG
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_dependencies.xml	Wed Nov 04 17:05:03 2015 -0500
@@ -0,0 +1,9 @@
+<?xml version="1.0"?>
+<tool_dependency>
+        <package name="crisprseek" version="1.11.0">
+           <repository changeset_revision="f14db4120426" name="package_r_crisprseek_1_11_0" owner="iuc" toolshed="https://testtoolshed.g2.bx.psu.edu" />
+        </package>
+        <package name="R" version="3.2.1">
+           <repository changeset_revision="e36e1db5e729" name="package_r_3_2_1" owner="iuc" toolshed="https://testtoolshed.g2.bx.psu.edu" />
+        </package>
+</tool_dependency>