Mercurial > repos > bonsai > crac

--- a/crac/crac-index-wrapper.sh	Fri Sep 13 09:51:59 2013 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,50 +0,0 @@
-#!/bin/sh
-
-# Recovering parameters from crac-index.xml
-###############################################################
-CRAC_INDEX_BINARY=crac-index
-BASE_FILENAME="$1"
-HTML_REPORT="$2"
-FILES_PATH="$3"
-BUCKET="$4"
-
-#echo Wrapper for crac-index
-#echo running: $0 "$@"
-
-shift 4
-
-#echo After shifting, args are:
-#echo "$@"
-
-mkdir -p "$FILES_PATH"
-
-# Execution of the command line (Submiting job to the cluster)
-###############################################################
-  CRAC_INDEX_CMD_LINE="$CRAC_INDEX_BINARY index "$FILES_PATH/$BASE_FILENAME" -b "$BUCKET" "$@""
-
-  out=`$CRAC_INDEX_CMD_LINE`
-
-  jID=`echo $out | awk {'print $3'}`
-fi
-
-cat << MARINE > "$HTML_REPORT"
-
-exit 0
-
-echo "<html>
-  <head>
-    <title>Files for Crac Index (crac_index)</title>
-  </head>
-  <body>
-    This index is composed of the following files:
-    <p/>
-    <ul>
-      <li><a href=""$BASE_FILENAME.conf"" type=""text/plain"">$BASE_FILENAME.conf</a></li>
-      <li><a href=""$BASE_FILENAME.ssa"" type=""application/binary"">$BASE_FILENAME.ssa</a></li>
-    </ul>
-  </body>
-</html>" > "$HTML_REPORT"
-
-
-MARINE
-
--- a/crac/crac-index.xml	Fri Sep 13 09:51:59 2013 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,64 +0,0 @@
-<tool id="crac-index" name="CRAC index">
-
-   <description>Create genome indexes available to be used with CRAC mapping/annotation tool </description>
-
-   <command>
-     crac-index-wrapper.sh "$output_name" "$output" "$output.files_path" "$bucket" "$input_file"
-   </command>
-
-   <inputs>
-      <param name="input_file" type="data" label="Source file" format="fasta" help="You must choose a fasta file containing the genome"/>
-      <param name="output_name" type="text" value ="IndexOutput" label="Output name" help="Name must be different from 'index' word, otherwise CRAC-index will fail." />
-      <param name="bucket" type="integer" value="100000000" label="Bucket size" help="The size of the bucket for the index construction (default 100000000)."/>
-   </inputs>
-
-   <outputs>
-      <data name="output" format="crac_index" label="${output_name}.crac-index" />
-   </outputs>
-
-   <help>
-
-**What it does**
-
-Crac-index generates an indexed genome from a fasta file. This is especially useful for the Crac mapping/annotation tool.
-
-----------------------
-
-**Input Formats**
-
-Crac-index takes as input files any fasta or multi-fasta files.
-
-----------------------
-
-**Outputs**
-
-Crac-index on Galaxy produces a composite output named crac-index, which is made of a ssa file and a conf file. Both are required to the use of your index.
-
-----------------------
-
-**Crac-index settings**
-
-
-Usage : ./crac-index [options] command output_file input_file
-
-  command must be :
-    index: create an index on the specified input file(s).
-
-  options can be :
-
-  -b bucket_size	 the size of the bucket for the index construction
-                  	 (default 100000000)
-  -d diff-cover 	 parameter for the index construction (default 1024)
-  -v              	 verbose mode
-
-  Examples:
-		./crac-index index myIndex sequence1.fa sequence2.fa sequence3.fa
-			You can specify FASTA or MultiFASTA file(s).
-			In this example, two files will be created:
-			- myIndex.ssa (index storing the compressed sequences)
-			- myIndex.conf (information on sequence names and length)
-
-   </help>
-
-
-</tool>
--- a/crac/crac.xml	Fri Sep 13 09:51:59 2013 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,265 +0,0 @@
-<?xml version="1.0" encoding="utf-8"?>
-<tool id="crac" name="CRAC" version="1.0">
-  <requirements>
-    <requirement type='package' version="1.3.0">crac</requirement>
-  </requirements>
-   <description>Analyzing RNAs in high-throughput sequencing data</description>
-   <command interpreter="bash"> crac_wrapper.sh
-              #if $Genome.which_genome == "prebuilt"
-   <!--1-->      "$Genome.prebuilt_genome.fields.path"
-              #else
-   <!--1-->      "$Genome.index_input.extra_files_path"
-              #end if
-              #if $condi_compressed == "yes"
-                --gz
-              #end if
-   <!--2-->   $output_name.extra_files_path		<!-- Usefull for submitting jobs on crac.sh-->
-              -r $input -k $kmer_length --read-length $read_length --sam $output_name
-              #if $condi_deep_snp.deepSNP == "yes"
-                 --deep-snv --nb-nucleotides-snv-comparison $condi_deep_snp.nb_nucleotides_snp_comparison
-              #end if
-              #if $choixSettings.settings == "experimental"
-                --max-splice-length $choixSettings.max_splice_length
-                --max-bio-indel $choixSettings.max_bio_indel
-                --min-duplication $choixSettings.min_duplication
-                --max-duplication $choixSettings.max_duplication
-                --min-percent-single-loc $choixSettings.min_percent_single_loc
-                --min-percent-duplication-loc $choixSettings.min_percent_duplication_loc
-                --max-bases-randomly-matched $choixSettings.max_bases_randomly_matched
-                --max-extension-length $choixSettings.max_extension_length
-                --min-support-no-cover $choixSettings.min_support_no_cover
-                --min-break-length $choixSettings.min_break_length
-              #end if
-              #if str($detailed_sam) == "yes"
-                --detailed-sam
-              #end if
-
-   </command>
-
-   <inputs>
-      <!-- Normal Setting -->
-
-      <conditional name="Genome">			<!-- Conditional 3 (Which genome) -->
-      <param name="which_genome" type="select" label="Do you want to use a pre-built reference genome or a Crac-index generated genome from your history?" help="Pre-built reference genomes are generated by Crac-index.">
-         <option value="prebuilt"> Use a pre-built reference genome </option>
-         <option value="history"> Use a Crac-index generated genome from my history</option>
-      </param>
-
-      <when value="prebuilt">
-         <param name="prebuilt_genome" type="select" label="Select a reference genome" help="if your genome of interest is not listed - contact authors">
-            <options from_data_table="crac_indexes">
-            <filter type="sort_by" column="2" />
-            <validator type="no_options" message="No indexes are available" />
-            </options>
-         </param>
-      </when>
-
-      <when value="history">
-         <param name="index_input" format="crac_index" type="data" label="Reference Genome" help="Select an indexed Genome from your history"/>
-      </when>
-      </conditional>
-
-
-      <param name="input" format="txt,raw,fastq" type="data" label="Reads File" help="Select a file"/>
-      <param name="kmer_length" type="integer" min="12" max="32" value="21" label="k-mer length">
-         <help>k-mer length must be carefully chosen. A k-mer of that length must map to a unique location in the genome with a high probability. Recommended value for the human genome: 22</help>
-      </param>
-      <param name="read_length" type="integer" label="Read length" value="0" help="Set read length when all reads have the same length to dramatically increase computation speed. Default value (no read length considered) : 0. Note : If read length is set, shorter reads will be ignored, longer reads will be cut."/>
-      <param name="detailed_sam" type="select" label="Do you want a detailed sam output file ?" help="Detailed sam output file gives you information on the SNPs, Splice junctions, Sequencing errors, Chimeras, ..." >
-         <option value="no"> No, I do not want detailed sam output file </option>
-         <option value="yes"> Yes, I want detailed sam output file </option>
-         <when value="yes"/>				<!-- Supress warnings-->
-         <when value="no"/>				<!-- Supress warnings-->
-      </param>
-      <param name="condi_compressed" type="select" display="radio" label="Compress output files?">
-        <option value="yes">Yes</option>
-        <option value="no">No</option>
-      </param>
-      <conditional name="condi_deep_snp">		<!-- Conditional 1 -->
-		<param type="select" name="deepSNP" label="Search hard for SNPs?">
-			<option value="no" selected="true"> No, do not search hard for SNVs </option>
-			<option value="yes"> Yes, search hard for SNVs (takes more time) </option>
- 		</param>
-		<when value="yes">
-                   <param name="nb_nucleotides_snp_comparison" type="integer" value="8" label="Number of nucleotides for SNV comparison" help="Default value for human genome : 8. A smaller value will find more SNVs, but will be less accurate."/>
-                </when>
-		<when value="no"/> 			<!-- Suppress warnings -->
-      </conditional>					<!-- End Conditional 1 -->
-
-    <!-- Experimental Setting-->
-    <conditional name="choixSettings">			<!-- Conditional 2 (setting choice) -->
-    	<param name="settings" type="select" label="Advanced CRAC settings to use" help="If you want full control to optimize your experience, use Advanced Settings. Be careful, these settings are experimental and one single change can make Crac fail">
-    	    <option value="normal" selected="true"> Normal settings </option>
-    	    <option value="experimental"> Advanced Settings </option>
-    	</param>
-    	<when value="normal"/>				<!-- Supress warnings-->
-    	<when value="experimental">			<!-- Supress warnings-->
-           <param name="max_splice_length" type="integer" value="300000" label="Maximum splice length" help="Splices larger than this value, will not be considered as splices, but (if possible) as chimeras. Default value for human genome : 300,000 bp." />
-           <param name="max_bio_indel" type="integer" value="15" label="Maximum indel length. Larger indels will be considered as splice junctions" help=" HELP. Default value for human genome : 15 bp." />
-           <param name="min_duplication" type="integer" value="2" label="Minimum duplication occurrence" help=" Minimum number of k-mer occurrences in the genome to be considered as duplicated. Default value for human genome : 2." />
-           <param name="max_duplication" type="integer" value="9" label="Maximum duplication occurrence" help=" Maximum number of k-mer occurrences to be considered as duplicated. Default value for human genome : 9." />
-           <param name="min_percent_single_loc" type="float" value="0.15" label="Minimum unique location percentage" help=" Minimal percentage of k-mers that must be unique in the genome, to consider the read as unique. Default value for human genome : 0.15." />
-           <param name="min_percent_duplication_loc" type="float" value="0.20" label="Minimum duplicated location percentage" help=" Minimal percentage of k-mers that must be duplicated in the genome, to consider the read as duplicated. Default value for human genome : 0.20." />
-           <!--param name="min_percent_multiple_loc" type="float" value="0.20" label="Minimum percent multiple localisation" help=" HELP. Default value for human genome : 0.20." /-->
-           <param name="max_bases_randomly_matched" type="integer" value="10" label="Maximum bases randomly matched" help=" Maximum number of bases that can be considered as randomly matched. Default value for human genome : 10." />
-           <param name="max_bases_retrieved" type="integer" value="10" label="Maximum bases retrieved" help=" Maximum number of bases retrieved from the genome when outputting deletions. Default value for human genome : 10." />
-           <param name="max_extension_length" type="integer" value="10" label="Maximum extension length" help=" Maximal number of nucleotides visited to extend a break and to make sure that the location is consistent. Default value for human genome : 10." />
-           <param name="min_support_no_cover" type="float" value="1.30" label="Minimum suppot no cover" help=" Average coverage along the read to consider it as not covered. Default value for human genome : 1.30." />
-           <param name="min_break_length" type="float" value="0.5" label="Minimum break length" help=" Breaks shorter than this ratio times the k-mer length will be considered as too short and will be merged if necessary. Default value for human genome : 0.5." />
-        </when>						<!-- End "when experimental" -->
-    </conditional>					<!-- End Conditional 2 -->
-   </inputs>
-
-   <outputs>
-	<data name="output_name" format="sam" label="${tool.name} on ${on_string}: mapped reads" />
-   </outputs>
-
-   <tests>
-      <test>
-      </test>
-   </tests>
-
-   <help>
-**What it does**
-
-CRAC proposes a novel way of analyzing reads that integrates genomic locations
-and local coverage, and delivers all above mentioned predictions in a single
-step. CRAC uses a double k-mer profiling approach to detect candidate
-mutations, indels, splice or fusion junctions in each single read.
-
-.. _CRAC: http://crac.gforge.inria.fr/
-
-If you use this tool, please cite:
-  - Philippe N., Salson M., Commes T., Rivals E., `"CRAC: an integrated approach to the analysis of RNA-seq reads"`__, Genome Biology (2013), 14:R30, doi: 10.1186/gb-2013-14-3-r30.
-
-.. __: http://genomebiology.com/2013/14/3/R30/
-
-------
-
-**Input formats**
-
-CRAC accepts files in FASTA, FASTQ or any text format (txt, raw, ...).
-
-------
-
-**Output**
-
-The output is in SAM format. If you choose the detailed SAM output, CRAC adds several flags to tell more informations. You can see the details here: http://crac.gforge.inria.fr/index.php?id=sam-documentation
-
-
-------
-
-**Crac settings**
-
-Main options are displayed at the top of the page. If you're an experimented user, you can choose to display
-the whole Crac setting.  Most of the options in Crac have been implemented here.
-
-------
-crac 1.3.0      Compiled on Sep 13 2013.
-
-   -h, --help           <none>          print this help and exit
-   -f, --full-help      <none>          print a complete help and exit
-   -v                   <none>          print version and exit
-
-Mandatory arguments
-   -i                   <FILE>          set genome index file (without the extension filename)
-   -r                   <FILE> [FILE2]  set read file. Specify FILE2 in case of paired-end reads
-   -k                   <INT>           set k-mer length
-   -o, --sam            <FILE>          set SAM output filename or print on STDOUT with "-o -" argument
-
-Optional arguments
-  * Protocol
-   --stranded           <none>          set the read mapping with for a strand specific library (DEFAULT non-strand specific)
-
-  * Efficiency
-   --nb-threads         <INT>           set the number of worker threads (DEFAULT 1)
-   --read-length, -m    <INT>           set read length in case of all reads have the same length to optimize
-                                        CPU and memory times
-   --treat-multiple     <none>          consider alignments with multiple locations (>max-duplication) rather than considering a no-alignment in the SAM file
-   --max-locs           <INT>           set the maximum number of locations on the reference index (DEFAULT 300)
-
-  * Accuracy
-   --no-ambiguity       <none>          discard biological events (splice, snv, indel, chimera) which have several matches on the reference index
-
-
-Optional output arguments
-   --all                              <FILE>     set output base filename for all causes following
-   --gz                               <none>     all output files specified after this argument are gzipped
-
-  * Summary and statistics
-   --summary                          <FILE>     set output summary file
-  * Mapping
-   --single                           <FILE>     set output single file
-   --duplicate                        <FILE>     set output duplication file
-   --multiple                         <FILE>     set output multiple file
-   --none                             <FILE>     set output none file
-   --normal                           <FILE>     set output normal file
-   --almost-normal                    <FILE>     set output almost normal file
-
-  * Biological causes
-   --snv                              <FILE>     set output SNV file
-   --indel                            <FILE>     set output short indel file
-   --splice                           <FILE>     set output splice junction file
-   --weak-splice                      <FILE>     set output coverless splice junction file
-   --chimera                          <FILE>     set output chimera junction file
-   --paired-end-chimera               <FILE>     set output for paired-end chimera file
-   --biological                       <FILE>     set output bio-undetermined file
-
-  * Sequence errors
-   --errors                           <FILE>     set output sequence errors file
-
-  * Repetition
-   --repeat                           <FILE>     set output repetition file
-
-  * Other causes
-   --undetermined                     <FILE>     set output undetermined file
-   --nothing                          <FILE>     set output nothing file
-
-Optional process for specific research
-   --deep-snv                         <none>     will search hard to find SNPs
-   --stringent-chimera                <none>     will search chimeras with more accuracy (but less sensitivity)
-
-Optional process launcher (once must be selected)
-  * Exact matching tool
-   --emt                              <none>     launch CRAC-emt for exact mapping of short reads
-
-  * Server tool (for debugging)
-   --server                           <none>     launch CRAC server,the output arguments will
-                                                 not be taken into account
-   --input-name-server                <STRING>   DEFAULT classify.fifo
-   --output-name-server               <STRING>   DEFAULT classify.out.fifo
-
-Additional settings for users
-  * Sam output file
-   --detailed-sam                     <none>     more informations are added in SAM output file
-
-  * Mapping
-   --min-percent-single-loc           <FLOAT>    DEFAULT 0.15
-   --min-duplication                  <INT>      DEFAULT 2
-   --max-duplication                  <INT>      DEFAULT 9
-   --min-percent-duplication-loc      <FLOAT>    DEFAULT 0.15
-   --min-percent-multiple-loc         <FLOAT>    DEFAULT 0.50
-   --min-repetition                   <INT>      DEFAULT 20
-   --min-percent-repetition-loc       <FLOAT>    DEFAULT 0.20
-  * Biological causes
-   --max-splice-length                <INT>      DEFAULT 300000
-   --max-paired-end-length            <INT>      DEFAULT 300000
-   --max-bio-indel                    <INT>      DEFAULT 15
-   --max-bases-retrieved              <INT>      DEFAULT 15
-  * Undetermined
-   --min-support-no-cover             <FLOAT>    DEFAULT 1.30
-
-Additional settings for advanced users
-  * Break verification and fusion (merging mirage breaks)
-   --min-break-length                 <FLOAT> DEFAULT 0.50
-   --max-bases-randomly-matched       <INT>   DEFAULT 10
-   --max-extension-length             <INT>   DEFAULT 10
-
-  * Threading
-   --nb-tags-info-stored              <INT>   DEFAULT 1000
-
-  * Deep SNV search option
-   --nb-nucleotides-snv-comparison    <INT>   DEFAULT 8
-   </help>
-
-</tool>
--- a/crac/crac_wrapper.sh	Fri Sep 13 09:51:59 2013 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,37 +0,0 @@
-#!/bin/sh
-
-# Recovering special parameters from crac.xml
-###############################################################
-CRAC_BINARY=crac
-INDEX_INPUT="$1"
-
-# Getting the indexed genome value
-###############################################################
-# Getting the indexed Genome name without the extension
-if [ -d "$INDEX_INPUT" ]; then				# If $INDEX_INPUT is a directory (that is to say an index from the history)
-  cpt=0
-  for fichier in $INDEX_INPUT/*.ssa
-    do
-      if [ $((++cpt)) -gt 1 ]; then 			#More than 1 '.ssa' file is not expected
-        echo "Warning:Multiple indexes found [$INDEX]" >&2
-      fi
-    INDEX=${fichier%%.ssa}				#Getting the index from history
-    done
-  else
-    INDEX="$INDEX_INPUT"				#Getting the prebuilt index
-fi
-if [ ! -f "$INDEX.ssa" -a ! -f "$INDEX.conf" ]; then	#Both '.ssa' and '.conf' files are required
-  echo "Error:Index not found [$INDEX]" >&2
-  exit 1
-fi
-
-# Execution of the command line (Submiting job to the cluster)
-###############################################################
-shift 2					#Avoiding index_input and output_name.extra_files_path
-
-CRAC_CMD_LINE=""$CRAC_BINARY" -i "$INDEX" "$@""
-
-out=`$CRAC_CMD_LINE`
-
-exit 0
-
--- a/crac/tool_dependencies.xml	Fri Sep 13 09:51:59 2013 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,16 +0,0 @@
-<?xml version="1.0"?>
-<tool_dependency>
-    <package name="crac" version="1.3.0">
-        <install version="1.0">
-            <actions>
-                <action type="download_by_url">https://gforge.inria.fr/frs/download.php/32471/crac-1.3.0.tar.gz</action>
-                <action type="shell_command">./configure</action>
-                <action type="shell_command">make</action>
-                <action type="shell_command">make check</action>
-            </actions>
-        </install>
-        <readme>
-CRAC requires g++ 4.3 or later.
-	</readme>
-    </package>
-</tool_dependency>