# HG changeset patch # User peterjc # Date 1383220661 14400 # Node ID 7b96d8a3262f098560c142c169a4956b96b80780 Uploaded v0.0.0, wrappers for the CLCbio assember and mapper only. diff -r 000000000000 -r 7b96d8a3262f tools/clc_assembly_cell/README.rst --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/clc_assembly_cell/README.rst Thu Oct 31 07:57:41 2013 -0400 @@ -0,0 +1,121 @@ +Galaxy wrapper for the CLC Assembly Cell suite from CLCbio +========================================================== + +This wrapper is copyright 2013 by Peter Cock, The James Hutton Institute +(formerly SCRI, Scottish Crop Research Institute), UK. All rights reserved. +See the licence text below. + +CLC Assembly Cell is the commercial command line assembly suite from CLCbio. +It uses SIMD instructions to parallelize and accelerate their assembly +algorithms, and is also very memory efficient making it an appealing choice +for complex genomes where the RAM requirements exclude other popular tools. + +For more information: +http://www.clcbio.com/products/clc-assembly-cell/ + +You can download the CLC Assembly Cell User Manual here, currently v4.2 +http://www.clcbio.com/files/usermanuals/CLC_Assembly_Cell_User_Manual.pdf + +There is also an online manual here: +http://clcsupport.com/clcassemblycell/current/index.php?manual=Introduction.html + +There is currently a free trial download here: +http://www.clcbio.com/?action=transfer_user&productVersion=4.2&productID=6982&productName=CLC+Assembly+Cell&nonce=db842e3f95 + +This wrapper is available from the Galaxy Tool Shed at: +http://toolshed.g2.bx.psu.edu/view/peterjc/clc_assembly_cell + +This Galaxy wrapper was written and tested using CLC Assembly Cell +version 4.10.86742 + + +Automated Installation +====================== + +This should be straightforward, Galaxy should automatically download and +install the wrapper from the Galaxy Tool Shed. However, you will need to +manually install the CLC Assembly Cell software, and setup the environment +variable ``$CLC_ASSEMBLY_CELL`` to the directory containing the binaries +(and in particular, the ``clc_assembler`` binary). For example: + +$ export CLC_ASSEMBLY_CELL=/opt/clcbio/clc-assembly-cell-4.1.0-linux_64/ + + +Manual Installation +=================== + +First install the CLC Assembly Cell sortware as described above. + +To install the wrapper copy or move the following files under the Galaxy tools +folder, e.g. in a tools/clcbio folder: + +* clc_assembler.xml (Galaxy tool definition) +* clc_mapper.xml (Galaxy tool definition) +* README.rst (this file) + +You will also need to modify the tools_conf.xml file to tell Galaxy to offer the +tools. Just all these line, for example next to other assembly tools:: + + + + +If you wish to run the unit tests, also add this to tools_conf.xml.sample +and move/copy the test-data files under Galaxy's test-data folder. Then:: + + $ ./run_functional_tests.sh -id clc_assembler + +That's it. + + +History +======= + +======= ====================================================================== +Version Changes +------- ---------------------------------------------------------------------- +v0.0.1 - Initial public release +======= ====================================================================== + + +Developers +========== + +Development is on this itHub repository: +https://github.com/peterjc/pico_galaxy/tree/master/tools/clc_assembly_cell + +For making the "Galaxy Tool Shed" http://toolshed.g2.bx.psu.edu/ tarball use +the following command from the Galaxy root folder:: + + $ tar -czf clcbio.tar.gz tools/clc_assembly_cell/README.rst tools/clc_assembly_cell/clc_assembler.xml tools/clc_assembly_cell/clc_mapper.xml + +Check this worked:: + + $ tar -tzf clcbio.tar.gz + tools/clc_assembly_cell/README.rst + tools/clc_assembly_cell/clc_assembler.xml + tools/clc_assembly_cell/clc_mapper.xml + + +Licence (MIT) +============= + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +NOTE: This is the licence for the Galaxy Wrapper only. The CLCbio tools are +commercial, and are available and licenced separately. diff -r 000000000000 -r 7b96d8a3262f tools/clc_assembly_cell/clc_assembler.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/clc_assembly_cell/clc_assembler.xml Thu Oct 31 07:57:41 2013 -0400 @@ -0,0 +1,122 @@ + + Assembles reads giving a FASTA file + + clc_assembler + + /mnt/apps/clcBio/clc-assembly-cell-4.1.0-linux_64/clc_assembler | grep -i version + /mnt/apps/clcBio/clc-assembly-cell-4.1.0-linux_64/clc_assembler +#for $rg in $read_group +##-------------------------------------- +#if str($rg.segments.type) == "paired" +-p $rg.segments.placement $rg.segments.dist_mode $rg.segments.min_size $rg.segments.max_size -q -i "$rg.segments.filename1" "$rg.segments.filename2" +#end if +##-------------------------------------- +#if str($rg.segments.type) == "interleaved" +-p $rg.segments.placement $rg.segments.dist_mode $rg.segments.min_size $rg.segments.max_size -q "$rg.segments.filename" +#end if +##-------------------------------------- +#if str($rg.segments.type) == "none" +-p no -q +#for $f in $rg.segments.filenames +"$f" +#end for +#end if +##-------------------------------------- +#end for +-o "$out_fasta" +--cpus \$GALAXY_SLOTS +-v | grep -v "^Progress: " + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +**What it does** + +Runs the ``clc_assembler`` tool giving a FASTA output file. You would then +typically map the same set of reads onto this assembly using ``cls_mapper`` +to any perform downstream analysis using the mapped reads. + + +**Citation** + +If you use this Galaxy tool in work leading to a scientific publication please +cite this wrapper as: + +Peter J.A. Cock (2013), Galaxy wrapper for the CLC Assembly Cell suite from CLCbio +http://toolshed.g2.bx.psu.edu/view/peterjc/clc_assembly_cell + +This wrapper is available to install into other Galaxy Instances via the Galaxy +Tool Shed at http://toolshed.g2.bx.psu.edu/view/peterjc/clc_assembly_cell + + diff -r 000000000000 -r 7b96d8a3262f tools/clc_assembly_cell/clc_mapper.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/clc_assembly_cell/clc_mapper.xml Thu Oct 31 07:57:41 2013 -0400 @@ -0,0 +1,157 @@ + + Maps reads giving a SAM/BAM file + + clc_mapper + clc_cas_to_sam + samtools + samtools + + /mnt/apps/clcBio/clc-assembly-cell-4.1.0-linux_64/clc_mapper | grep -i version + echo Mapping reads with clc_mapper... +&& /mnt/apps/clcBio/clc-assembly-cell-4.1.0-linux_64/clc_mapper +#for $ref in $references +#if str($ref.type)=="circular" +-d -z "$ref.ref_file" +#else +-d "$ref.ref_file" +#end if +#end for +#for $rg in $read_group +##-------------------------------------- +#if str($rg.segments.type) == "paired" +-p $rg.segments.placement $rg.segments.dist_mode $rg.segments.min_size $rg.segments.max_size -q -i "$rg.segments.filename1" "$rg.segments.filename2" +#end if +##-------------------------------------- +#if str($rg.segments.type) == "interleaved" +-p $rg.segments.placement $rg.segments.dist_mode $rg.segments.min_size $rg.segments.max_size -q "$rg.segments.filename" +#end if +##-------------------------------------- +#if str($rg.segments.type) == "none" +-p no -q +#for $f in $rg.segments.filenames +"$f" +#end for +#end if +##-------------------------------------- +#end for +-o "temp_job.cas" +--cpus \$GALAXY_SLOTS +## TODO - filtering out the progress lines seems to mess up the multiple commands +## | grep -v "^Progress: " +##=========================================== +## TODO - I've required all the input in Sanger FASTQ format (or FASTA) so can +## use the offset 33, rather then the CLCbio default of 64 which is only for +## obsolete Illumina FASTQ files. Really need this option per input file... +&& echo Converting CAS file to BAM with clc_cas_to_sam... +&& /mnt/apps/clcBio/clc-assembly-cell-4.1.0-linux_64/clc_cas_to_sam --cas "temp_job.cas" -o "temp_job.bam" --no-progress --qualityoffset 33 +&& rm "temp_job.cas" +##=========================================== +&& echo Sorting BAM file with samtools... +&& samtools sort "temp_job.bam" "temp_sorted" +&& mv "temp_sorted.bam" "$out_bam" +&& echo Indexing BAM file with samtools... +&& samtools index "$out_bam" + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +**What it does** + +Runs the CLCbio tool ``clc_mapper`` which produces a proprietary binary +CAS format file, which is immediately processed using ``cls_cas_to_sam`` +to generate a self-contained standard BAM file, which is then sorted +and indexed using ``samtools``. + + +**Citation** + +If you use this Galaxy tool in work leading to a scientific publication please +cite this wrapper as: + +Peter J.A. Cock (2013), Galaxy wrapper for the CLC Assembly Cell suite from CLCbio +http://toolshed.g2.bx.psu.edu/view/peterjc/clc_assembly_cell + +This wrapper is available to install into other Galaxy Instances via the Galaxy +Tool Shed at http://toolshed.g2.bx.psu.edu/view/peterjc/clc_assembly_cell + +