Files for Crac Index (crac

# HG changeset patch # User bonsai # Date 1379080860 14400 # Node ID 4cf2808854bcab2a651ac17db504a0c161969bb5 # Parent dbb83adec9eb5e13e2cbf8a5c8a9249e83871be7 Uploaded diff -r dbb83adec9eb -r 4cf2808854bc crac-index-wrapper.sh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/crac-index-wrapper.sh Fri Sep 13 10:01:00 2013 -0400 @@ -0,0 +1,50 @@ +#!/bin/sh + +# Recovering parameters from crac-index.xml +############################################################### +CRAC_INDEX_BINARY=crac-index +BASE_FILENAME="$1" +HTML_REPORT="$2" +FILES_PATH="$3" +BUCKET="$4" + +#echo Wrapper for crac-index +#echo running: $0 "$@" + +shift 4 + +#echo After shifting, args are: +#echo "$@" + +mkdir -p "$FILES_PATH" + +# Execution of the command line (Submiting job to the cluster) +############################################################### + CRAC_INDEX_CMD_LINE="$CRAC_INDEX_BINARY index "$FILES_PATH/$BASE_FILENAME" -b "$BUCKET" "$@"" + + out=`$CRAC_INDEX_CMD_LINE` + + jID=`echo $out | awk {'print $3'}` +fi + +cat << MARINE > "$HTML_REPORT" + +exit 0 + +echo " + + Files for Crac Index (crac_index) + + + This index is composed of the following files: +

$BASE_FILENAME.conf
$BASE_FILENAME.ssa

+ +" > "$HTML_REPORT" + + +MARINE + diff -r dbb83adec9eb -r 4cf2808854bc crac-index.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/crac-index.xml Fri Sep 13 10:01:00 2013 -0400 @@ -0,0 +1,64 @@ + + + Create genome indexes available to be used with CRAC mapping/annotation tool + + + crac-index-wrapper.sh "$output_name" "$output" "$output.files_path" "$bucket" "$input_file" + + + + + + + + + + + + + + +**What it does** + +Crac-index generates an indexed genome from a fasta file. This is especially useful for the Crac mapping/annotation tool. + +---------------------- + +**Input Formats** + +Crac-index takes as input files any fasta or multi-fasta files. + +---------------------- + +**Outputs** + +Crac-index on Galaxy produces a composite output named crac-index, which is made of a ssa file and a conf file. Both are required to the use of your index. + +---------------------- + +**Crac-index settings** + + +Usage : ./crac-index [options] command output_file input_file + + command must be : + index: create an index on the specified input file(s). + + options can be : + + -b bucket_size the size of the bucket for the index construction + (default 100000000) + -d diff-cover parameter for the index construction (default 1024) + -v verbose mode + + Examples: + ./crac-index index myIndex sequence1.fa sequence2.fa sequence3.fa + You can specify FASTA or MultiFASTA file(s). + In this example, two files will be created: + - myIndex.ssa (index storing the compressed sequences) + - myIndex.conf (information on sequence names and length) + + + + + diff -r dbb83adec9eb -r 4cf2808854bc crac.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/crac.xml Fri Sep 13 10:01:00 2013 -0400 @@ -0,0 +1,265 @@ + + + + crac + + Analyzing RNAs in high-throughput sequencing data + crac_wrapper.sh + #if $Genome.which_genome == "prebuilt" + "$Genome.prebuilt_genome.fields.path" + #else + "$Genome.index_input.extra_files_path" + #end if + #if $condi_compressed == "yes" + --gz + #end if + $output_name.extra_files_path + -r $input -k $kmer_length --read-length $read_length --sam $output_name + #if $condi_deep_snp.deepSNP == "yes" + --deep-snv --nb-nucleotides-snv-comparison $condi_deep_snp.nb_nucleotides_snp_comparison + #end if + #if $choixSettings.settings == "experimental" + --max-splice-length $choixSettings.max_splice_length + --max-bio-indel $choixSettings.max_bio_indel + --min-duplication $choixSettings.min_duplication + --max-duplication $choixSettings.max_duplication + --min-percent-single-loc $choixSettings.min_percent_single_loc + --min-percent-duplication-loc $choixSettings.min_percent_duplication_loc + --max-bases-randomly-matched $choixSettings.max_bases_randomly_matched + --max-extension-length $choixSettings.max_extension_length + --min-support-no-cover $choixSettings.min_support_no_cover + --min-break-length $choixSettings.min_break_length + #end if + #if str($detailed_sam) == "yes" + --detailed-sam + #end if + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + k-mer length must be carefully chosen. A k-mer of that length must map to a unique location in the genome with a high probability. Recommended value for the human genome: 22 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +**What it does** + +CRAC proposes a novel way of analyzing reads that integrates genomic locations +and local coverage, and delivers all above mentioned predictions in a single +step. CRAC uses a double k-mer profiling approach to detect candidate +mutations, indels, splice or fusion junctions in each single read. + +.. _CRAC: http://crac.gforge.inria.fr/ + +If you use this tool, please cite: + - Philippe N., Salson M., Commes T., Rivals E., `"CRAC: an integrated approach to the analysis of RNA-seq reads"`__, Genome Biology (2013), 14:R30, doi: 10.1186/gb-2013-14-3-r30. + +.. __: http://genomebiology.com/2013/14/3/R30/ + +------ + +**Input formats** + +CRAC accepts files in FASTA, FASTQ or any text format (txt, raw, ...). + +------ + +**Output** + +The output is in SAM format. If you choose the detailed SAM output, CRAC adds several flags to tell more informations. You can see the details here: http://crac.gforge.inria.fr/index.php?id=sam-documentation + + +------ + +**Crac settings** + +Main options are displayed at the top of the page. If you're an experimented user, you can choose to display +the whole Crac setting. Most of the options in Crac have been implemented here. + +------ +crac 1.3.0 Compiled on Sep 13 2013. + + -h, --help print this help and exit + -f, --full-help print a complete help and exit + -v print version and exit + +Mandatory arguments + -i set genome index file (without the extension filename) + -r [FILE2] set read file. Specify FILE2 in case of paired-end reads + -k set k-mer length + -o, --sam set SAM output filename or print on STDOUT with "-o -" argument + +Optional arguments + * Protocol + --stranded set the read mapping with for a strand specific library (DEFAULT non-strand specific) + + * Efficiency + --nb-threads set the number of worker threads (DEFAULT 1) + --read-length, -m set read length in case of all reads have the same length to optimize + CPU and memory times + --treat-multiple consider alignments with multiple locations (>max-duplication) rather than considering a no-alignment in the SAM file + --max-locs set the maximum number of locations on the reference index (DEFAULT 300) + + * Accuracy + --no-ambiguity discard biological events (splice, snv, indel, chimera) which have several matches on the reference index + + +Optional output arguments + --all set output base filename for all causes following + --gz all output files specified after this argument are gzipped + + * Summary and statistics + --summary set output summary file + * Mapping + --single set output single file + --duplicate set output duplication file + --multiple set output multiple file + --none set output none file + --normal set output normal file + --almost-normal set output almost normal file + + * Biological causes + --snv set output SNV file + --indel set output short indel file + --splice set output splice junction file + --weak-splice set output coverless splice junction file + --chimera set output chimera junction file + --paired-end-chimera set output for paired-end chimera file + --biological set output bio-undetermined file + + * Sequence errors + --errors set output sequence errors file + + * Repetition + --repeat set output repetition file + + * Other causes + --undetermined set output undetermined file + --nothing set output nothing file + +Optional process for specific research + --deep-snv will search hard to find SNPs + --stringent-chimera will search chimeras with more accuracy (but less sensitivity) + +Optional process launcher (once must be selected) + * Exact matching tool + --emt launch CRAC-emt for exact mapping of short reads + + * Server tool (for debugging) + --server launch CRAC server,the output arguments will + not be taken into account + --input-name-server DEFAULT classify.fifo + --output-name-server DEFAULT classify.out.fifo + +Additional settings for users + * Sam output file + --detailed-sam more informations are added in SAM output file + + * Mapping + --min-percent-single-loc DEFAULT 0.15 + --min-duplication DEFAULT 2 + --max-duplication DEFAULT 9 + --min-percent-duplication-loc DEFAULT 0.15 + --min-percent-multiple-loc DEFAULT 0.50 + --min-repetition DEFAULT 20 + --min-percent-repetition-loc DEFAULT 0.20 + * Biological causes + --max-splice-length DEFAULT 300000 + --max-paired-end-length DEFAULT 300000 + --max-bio-indel DEFAULT 15 + --max-bases-retrieved DEFAULT 15 + * Undetermined + --min-support-no-cover DEFAULT 1.30 + +Additional settings for advanced users + * Break verification and fusion (merging mirage breaks) + --min-break-length DEFAULT 0.50 + --max-bases-randomly-matched DEFAULT 10 + --max-extension-length DEFAULT 10 + + * Threading + --nb-tags-info-stored DEFAULT 1000 + + * Deep SNV search option + --nb-nucleotides-snv-comparison DEFAULT 8 + + + diff -r dbb83adec9eb -r 4cf2808854bc crac_wrapper.sh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/crac_wrapper.sh Fri Sep 13 10:01:00 2013 -0400 @@ -0,0 +1,37 @@ +#!/bin/sh + +# Recovering special parameters from crac.xml +############################################################### +CRAC_BINARY=crac +INDEX_INPUT="$1" + +# Getting the indexed genome value +############################################################### +# Getting the indexed Genome name without the extension +if [ -d "$INDEX_INPUT" ]; then # If $INDEX_INPUT is a directory (that is to say an index from the history) + cpt=0 + for fichier in $INDEX_INPUT/*.ssa + do + if [ $((++cpt)) -gt 1 ]; then #More than 1 '.ssa' file is not expected + echo "Warning:Multiple indexes found [$INDEX]" >&2 + fi + INDEX=${fichier%%.ssa} #Getting the index from history + done + else + INDEX="$INDEX_INPUT" #Getting the prebuilt index +fi +if [ ! -f "$INDEX.ssa" -a ! -f "$INDEX.conf" ]; then #Both '.ssa' and '.conf' files are required + echo "Error:Index not found [$INDEX]" >&2 + exit 1 +fi + +# Execution of the command line (Submiting job to the cluster) +############################################################### +shift 2 #Avoiding index_input and output_name.extra_files_path + +CRAC_CMD_LINE=""$CRAC_BINARY" -i "$INDEX" "$@"" + +out=`$CRAC_CMD_LINE` + +exit 0 + diff -r dbb83adec9eb -r 4cf2808854bc tool_dependencies.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_dependencies.xml Fri Sep 13 10:01:00 2013 -0400 @@ -0,0 +1,16 @@ + + + + + + https://gforge.inria.fr/frs/download.php/32471/crac-1.3.0.tar.gz + ./configure + make + make check + + + +CRAC requires g++ 4.3 or later. + + +