Mercurial > repos > bonsai > crac

<?xml version="1.0" encoding="utf-8"?>
<tool id="crac" name="CRAC" version="1.0">
  <requirements>
    <requirement type='package' version="1.3.0">crac</requirement>
  </requirements>
   <description>Analyzing RNAs in high-throughput sequencing data</description>
   <command interpreter="bash"> crac_wrapper.sh
              #if $Genome.which_genome == "prebuilt"
   <!--1-->      "$Genome.prebuilt_genome.fields.path"
              #else
   <!--1-->      "$Genome.index_input.extra_files_path"
              #end if
              #if $condi_compressed == "yes"
                --gz
              #end if
   <!--2-->   $output_name.extra_files_path		<!-- Usefull for submitting jobs on crac.sh-->
              -r $input -k $kmer_length --read-length $read_length --sam $output_name
              #if $condi_deep_snp.deepSNP == "yes"
                 --deep-snv --nb-nucleotides-snv-comparison $condi_deep_snp.nb_nucleotides_snp_comparison
              #end if
              #if $choixSettings.settings == "experimental"
                --max-splice-length $choixSettings.max_splice_length
                --max-bio-indel $choixSettings.max_bio_indel
                --min-duplication $choixSettings.min_duplication
                --max-duplication $choixSettings.max_duplication
                --min-percent-single-loc $choixSettings.min_percent_single_loc
                --min-percent-duplication-loc $choixSettings.min_percent_duplication_loc
                --max-bases-randomly-matched $choixSettings.max_bases_randomly_matched
                --max-extension-length $choixSettings.max_extension_length
                --min-support-no-cover $choixSettings.min_support_no_cover
                --min-break-length $choixSettings.min_break_length
              #end if
              #if str($detailed_sam) == "yes"
                --detailed-sam
              #end if

   </command>

   <inputs>
      <!-- Normal Setting -->

      <conditional name="Genome">			<!-- Conditional 3 (Which genome) -->
      <param name="which_genome" type="select" label="Do you want to use a pre-built reference genome or a Crac-index generated genome from your history?" help="Pre-built reference genomes are generated by Crac-index.">
         <option value="prebuilt"> Use a pre-built reference genome </option>
         <option value="history"> Use a Crac-index generated genome from my history</option>
      </param>

      <when value="prebuilt">
         <param name="prebuilt_genome" type="select" label="Select a reference genome" help="if your genome of interest is not listed - contact authors">
            <options from_data_table="crac_indexes">
            <filter type="sort_by" column="2" />
            <validator type="no_options" message="No indexes are available" />
            </options>
         </param>
      </when>

      <when value="history">
         <param name="index_input" format="crac_index" type="data" label="Reference Genome" help="Select an indexed Genome from your history"/>
      </when>
      </conditional>


      <param name="input" format="txt,raw,fastq" type="data" label="Reads File" help="Select a file"/>
      <param name="kmer_length" type="integer" min="12" max="32" value="21" label="k-mer length">
         <help>k-mer length must be carefully chosen. A k-mer of that length must map to a unique location in the genome with a high probability. Recommended value for the human genome: 22</help>
      </param>
      <param name="read_length" type="integer" label="Read length" value="0" help="Set read length when all reads have the same length to dramatically increase computation speed. Default value (no read length considered) : 0. Note : If read length is set, shorter reads will be ignored, longer reads will be cut."/>
      <param name="detailed_sam" type="select" label="Do you want a detailed sam output file ?" help="Detailed sam output file gives you information on the SNPs, Splice junctions, Sequencing errors, Chimeras, ..." >
         <option value="no"> No, I do not want detailed sam output file </option>
         <option value="yes"> Yes, I want detailed sam output file </option>
         <when value="yes"/>				<!-- Supress warnings-->
         <when value="no"/>				<!-- Supress warnings-->
      </param>
      <param name="condi_compressed" type="select" display="radio" label="Compress output files?">
        <option value="yes">Yes</option>
        <option value="no">No</option>
      </param>
      <conditional name="condi_deep_snp">		<!-- Conditional 1 -->
		<param type="select" name="deepSNP" label="Search hard for SNPs?">
			<option value="no" selected="true"> No, do not search hard for SNVs </option>
			<option value="yes"> Yes, search hard for SNVs (takes more time) </option>
 		</param>
		<when value="yes">
                   <param name="nb_nucleotides_snp_comparison" type="integer" value="8" label="Number of nucleotides for SNV comparison" help="Default value for human genome : 8. A smaller value will find more SNVs, but will be less accurate."/>
                </when>
		<when value="no"/> 			<!-- Suppress warnings -->
      </conditional>					<!-- End Conditional 1 -->

    <!-- Experimental Setting-->
    <conditional name="choixSettings">			<!-- Conditional 2 (setting choice) -->
    	<param name="settings" type="select" label="Advanced CRAC settings to use" help="If you want full control to optimize your experience, use Advanced Settings. Be careful, these settings are experimental and one single change can make Crac fail">
    	    <option value="normal" selected="true"> Normal settings </option>
    	    <option value="experimental"> Advanced Settings </option>
    	</param>
    	<when value="normal"/>				<!-- Supress warnings-->
    	<when value="experimental">			<!-- Supress warnings-->
           <param name="max_splice_length" type="integer" value="300000" label="Maximum splice length" help="Splices larger than this value, will not be considered as splices, but (if possible) as chimeras. Default value for human genome : 300,000 bp." />
           <param name="max_bio_indel" type="integer" value="15" label="Maximum indel length. Larger indels will be considered as splice junctions" help=" HELP. Default value for human genome : 15 bp." />
           <param name="min_duplication" type="integer" value="2" label="Minimum duplication occurrence" help=" Minimum number of k-mer occurrences in the genome to be considered as duplicated. Default value for human genome : 2." />
           <param name="max_duplication" type="integer" value="9" label="Maximum duplication occurrence" help=" Maximum number of k-mer occurrences to be considered as duplicated. Default value for human genome : 9." />
           <param name="min_percent_single_loc" type="float" value="0.15" label="Minimum unique location percentage" help=" Minimal percentage of k-mers that must be unique in the genome, to consider the read as unique. Default value for human genome : 0.15." />
           <param name="min_percent_duplication_loc" type="float" value="0.20" label="Minimum duplicated location percentage" help=" Minimal percentage of k-mers that must be duplicated in the genome, to consider the read as duplicated. Default value for human genome : 0.20." />
           <!--param name="min_percent_multiple_loc" type="float" value="0.20" label="Minimum percent multiple localisation" help=" HELP. Default value for human genome : 0.20." /-->
           <param name="max_bases_randomly_matched" type="integer" value="10" label="Maximum bases randomly matched" help=" Maximum number of bases that can be considered as randomly matched. Default value for human genome : 10." />
           <param name="max_bases_retrieved" type="integer" value="10" label="Maximum bases retrieved" help=" Maximum number of bases retrieved from the genome when outputting deletions. Default value for human genome : 10." />
           <param name="max_extension_length" type="integer" value="10" label="Maximum extension length" help=" Maximal number of nucleotides visited to extend a break and to make sure that the location is consistent. Default value for human genome : 10." />
           <param name="min_support_no_cover" type="float" value="1.30" label="Minimum suppot no cover" help=" Average coverage along the read to consider it as not covered. Default value for human genome : 1.30." />
           <param name="min_break_length" type="float" value="0.5" label="Minimum break length" help=" Breaks shorter than this ratio times the k-mer length will be considered as too short and will be merged if necessary. Default value for human genome : 0.5." />
        </when>						<!-- End "when experimental" -->
    </conditional>					<!-- End Conditional 2 -->
   </inputs>

   <outputs>
	<data name="output_name" format="sam" label="${tool.name} on ${on_string}: mapped reads" />
   </outputs>

   <tests>
      <test>
      </test>
   </tests>

   <help>
**What it does**

CRAC proposes a novel way of analyzing reads that integrates genomic locations
and local coverage, and delivers all above mentioned predictions in a single
step. CRAC uses a double k-mer profiling approach to detect candidate
mutations, indels, splice or fusion junctions in each single read.

.. _CRAC: http://crac.gforge.inria.fr/

If you use this tool, please cite:
  - Philippe N., Salson M., Commes T., Rivals E., `"CRAC: an integrated approach to the analysis of RNA-seq reads"`__, Genome Biology (2013), 14:R30, doi: 10.1186/gb-2013-14-3-r30.

.. __: http://genomebiology.com/2013/14/3/R30/

------

**Input formats**

CRAC accepts files in FASTA, FASTQ or any text format (txt, raw, ...).

------

**Output**

The output is in SAM format. If you choose the detailed SAM output, CRAC adds several flags to tell more informations. You can see the details here: http://crac.gforge.inria.fr/index.php?id=sam-documentation


------

**Crac settings**

Main options are displayed at the top of the page. If you're an experimented user, you can choose to display
the whole Crac setting.  Most of the options in Crac have been implemented here.

------
crac 1.3.0      Compiled on Sep 13 2013.

   -h, --help           &lt;none&gt;          print this help and exit
   -f, --full-help      &lt;none&gt;          print a complete help and exit
   -v                   &lt;none&gt;          print version and exit

Mandatory arguments
   -i                   &lt;FILE&gt;          set genome index file (without the extension filename)
   -r                   &lt;FILE&gt; [FILE2]  set read file. Specify FILE2 in case of paired-end reads
   -k                   &lt;INT&gt;           set k-mer length
   -o, --sam            &lt;FILE&gt;          set SAM output filename or print on STDOUT with "-o -" argument

Optional arguments
  * Protocol
   --stranded           &lt;none&gt;          set the read mapping with for a strand specific library (DEFAULT non-strand specific)

  * Efficiency
   --nb-threads         &lt;INT&gt;           set the number of worker threads (DEFAULT 1)
   --read-length, -m    &lt;INT&gt;           set read length in case of all reads have the same length to optimize
                                        CPU and memory times
   --treat-multiple     &lt;none&gt;          consider alignments with multiple locations (&gt;max-duplication) rather than considering a no-alignment in the SAM file
   --max-locs           &lt;INT&gt;           set the maximum number of locations on the reference index (DEFAULT 300)

  * Accuracy
   --no-ambiguity       &lt;none&gt;          discard biological events (splice, snv, indel, chimera) which have several matches on the reference index


Optional output arguments
   --all                              &lt;FILE&gt;     set output base filename for all causes following
   --gz                               &lt;none&gt;     all output files specified after this argument are gzipped

  * Summary and statistics
   --summary                          &lt;FILE&gt;     set output summary file
  * Mapping
   --single                           &lt;FILE&gt;     set output single file
   --duplicate                        &lt;FILE&gt;     set output duplication file
   --multiple                         &lt;FILE&gt;     set output multiple file
   --none                             &lt;FILE&gt;     set output none file
   --normal                           &lt;FILE&gt;     set output normal file
   --almost-normal                    &lt;FILE&gt;     set output almost normal file

  * Biological causes
   --snv                              &lt;FILE&gt;     set output SNV file
   --indel                            &lt;FILE&gt;     set output short indel file
   --splice                           &lt;FILE&gt;     set output splice junction file
   --weak-splice                      &lt;FILE&gt;     set output coverless splice junction file
   --chimera                          &lt;FILE&gt;     set output chimera junction file
   --paired-end-chimera               &lt;FILE&gt;     set output for paired-end chimera file
   --biological                       &lt;FILE&gt;     set output bio-undetermined file

  * Sequence errors
   --errors                           &lt;FILE&gt;     set output sequence errors file

  * Repetition
   --repeat                           &lt;FILE&gt;     set output repetition file

  * Other causes
   --undetermined                     &lt;FILE&gt;     set output undetermined file
   --nothing                          &lt;FILE&gt;     set output nothing file

Optional process for specific research
   --deep-snv                         &lt;none&gt;     will search hard to find SNPs
   --stringent-chimera                &lt;none&gt;     will search chimeras with more accuracy (but less sensitivity)

Optional process launcher (once must be selected)
  * Exact matching tool
   --emt                              &lt;none&gt;     launch CRAC-emt for exact mapping of short reads

  * Server tool (for debugging)
   --server                           &lt;none&gt;     launch CRAC server,the output arguments will
                                                 not be taken into account
   --input-name-server                &lt;STRING&gt;   DEFAULT classify.fifo
   --output-name-server               &lt;STRING&gt;   DEFAULT classify.out.fifo

Additional settings for users
  * Sam output file
   --detailed-sam                     &lt;none&gt;     more informations are added in SAM output file

  * Mapping
   --min-percent-single-loc           &lt;FLOAT&gt;    DEFAULT 0.15
   --min-duplication                  &lt;INT&gt;      DEFAULT 2
   --max-duplication                  &lt;INT&gt;      DEFAULT 9
   --min-percent-duplication-loc      &lt;FLOAT&gt;    DEFAULT 0.15
   --min-percent-multiple-loc         &lt;FLOAT&gt;    DEFAULT 0.50
   --min-repetition                   &lt;INT&gt;      DEFAULT 20
   --min-percent-repetition-loc       &lt;FLOAT&gt;    DEFAULT 0.20
  * Biological causes
   --max-splice-length                &lt;INT&gt;      DEFAULT 300000
   --max-paired-end-length            &lt;INT&gt;      DEFAULT 300000
   --max-bio-indel                    &lt;INT&gt;      DEFAULT 15
   --max-bases-retrieved              &lt;INT&gt;      DEFAULT 15
  * Undetermined
   --min-support-no-cover             &lt;FLOAT&gt;    DEFAULT 1.30

Additional settings for advanced users
  * Break verification and fusion (merging mirage breaks)
   --min-break-length                 &lt;FLOAT&gt; DEFAULT 0.50
   --max-bases-randomly-matched       &lt;INT&gt;   DEFAULT 10
   --max-extension-length             &lt;INT&gt;   DEFAULT 10

  * Threading
   --nb-tags-info-stored              &lt;INT&gt;   DEFAULT 1000

  * Deep SNV search option
   --nb-nucleotides-snv-comparison    &lt;INT&gt;   DEFAULT 8
   </help>

</tool>
author	bonsai
date	Fri, 13 Sep 2013 10:36:13 -0400
parents	4cf2808854bc
children	46d61dc5c92e