Files for Crac Index (crac

# HG changeset patch # User bonsai # Date 1379080839 14400 # Node ID dbb83adec9eb5e13e2cbf8a5c8a9249e83871be7 # Parent e6e516ff34a87ace5c8b072c0110d34869fc481e Deleted selected files diff -r e6e516ff34a8 -r dbb83adec9eb crac/crac-index-wrapper.sh --- a/crac/crac-index-wrapper.sh Fri Sep 13 09:51:59 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,50 +0,0 @@ -#!/bin/sh - -# Recovering parameters from crac-index.xml -############################################################### -CRAC_INDEX_BINARY=crac-index -BASE_FILENAME="$1" -HTML_REPORT="$2" -FILES_PATH="$3" -BUCKET="$4" - -#echo Wrapper for crac-index -#echo running: $0 "$@" - -shift 4 - -#echo After shifting, args are: -#echo "$@" - -mkdir -p "$FILES_PATH" - -# Execution of the command line (Submiting job to the cluster) -############################################################### - CRAC_INDEX_CMD_LINE="$CRAC_INDEX_BINARY index "$FILES_PATH/$BASE_FILENAME" -b "$BUCKET" "$@"" - - out=`$CRAC_INDEX_CMD_LINE` - - jID=`echo $out | awk {'print $3'}` -fi - -cat << MARINE > "$HTML_REPORT" - -exit 0 - -echo " - - Files for Crac Index (crac_index) - - - This index is composed of the following files: -

$BASE_FILENAME.conf
$BASE_FILENAME.ssa

- -" > "$HTML_REPORT" - - -MARINE - diff -r e6e516ff34a8 -r dbb83adec9eb crac/crac-index.xml --- a/crac/crac-index.xml Fri Sep 13 09:51:59 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,64 +0,0 @@ - - - Create genome indexes available to be used with CRAC mapping/annotation tool - - - crac-index-wrapper.sh "$output_name" "$output" "$output.files_path" "$bucket" "$input_file" - - - - - - - - - - - - - - -**What it does** - -Crac-index generates an indexed genome from a fasta file. This is especially useful for the Crac mapping/annotation tool. - ----------------------- - -**Input Formats** - -Crac-index takes as input files any fasta or multi-fasta files. - ----------------------- - -**Outputs** - -Crac-index on Galaxy produces a composite output named crac-index, which is made of a ssa file and a conf file. Both are required to the use of your index. - ----------------------- - -**Crac-index settings** - - -Usage : ./crac-index [options] command output_file input_file - - command must be : - index: create an index on the specified input file(s). - - options can be : - - -b bucket_size the size of the bucket for the index construction - (default 100000000) - -d diff-cover parameter for the index construction (default 1024) - -v verbose mode - - Examples: - ./crac-index index myIndex sequence1.fa sequence2.fa sequence3.fa - You can specify FASTA or MultiFASTA file(s). - In this example, two files will be created: - - myIndex.ssa (index storing the compressed sequences) - - myIndex.conf (information on sequence names and length) - - - - - diff -r e6e516ff34a8 -r dbb83adec9eb crac/crac.xml --- a/crac/crac.xml Fri Sep 13 09:51:59 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,265 +0,0 @@ - - - - crac - - Analyzing RNAs in high-throughput sequencing data - crac_wrapper.sh - #if $Genome.which_genome == "prebuilt" - "$Genome.prebuilt_genome.fields.path" - #else - "$Genome.index_input.extra_files_path" - #end if - #if $condi_compressed == "yes" - --gz - #end if - $output_name.extra_files_path - -r $input -k $kmer_length --read-length $read_length --sam $output_name - #if $condi_deep_snp.deepSNP == "yes" - --deep-snv --nb-nucleotides-snv-comparison $condi_deep_snp.nb_nucleotides_snp_comparison - #end if - #if $choixSettings.settings == "experimental" - --max-splice-length $choixSettings.max_splice_length - --max-bio-indel $choixSettings.max_bio_indel - --min-duplication $choixSettings.min_duplication - --max-duplication $choixSettings.max_duplication - --min-percent-single-loc $choixSettings.min_percent_single_loc - --min-percent-duplication-loc $choixSettings.min_percent_duplication_loc - --max-bases-randomly-matched $choixSettings.max_bases_randomly_matched - --max-extension-length $choixSettings.max_extension_length - --min-support-no-cover $choixSettings.min_support_no_cover - --min-break-length $choixSettings.min_break_length - #end if - #if str($detailed_sam) == "yes" - --detailed-sam - #end if - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - k-mer length must be carefully chosen. A k-mer of that length must map to a unique location in the genome with a high probability. Recommended value for the human genome: 22 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -**What it does** - -CRAC proposes a novel way of analyzing reads that integrates genomic locations -and local coverage, and delivers all above mentioned predictions in a single -step. CRAC uses a double k-mer profiling approach to detect candidate -mutations, indels, splice or fusion junctions in each single read. - -.. _CRAC: http://crac.gforge.inria.fr/ - -If you use this tool, please cite: - - Philippe N., Salson M., Commes T., Rivals E., `"CRAC: an integrated approach to the analysis of RNA-seq reads"`__, Genome Biology (2013), 14:R30, doi: 10.1186/gb-2013-14-3-r30. - -.. __: http://genomebiology.com/2013/14/3/R30/ - ------- - -**Input formats** - -CRAC accepts files in FASTA, FASTQ or any text format (txt, raw, ...). - ------- - -**Output** - -The output is in SAM format. If you choose the detailed SAM output, CRAC adds several flags to tell more informations. You can see the details here: http://crac.gforge.inria.fr/index.php?id=sam-documentation - - ------- - -**Crac settings** - -Main options are displayed at the top of the page. If you're an experimented user, you can choose to display -the whole Crac setting. Most of the options in Crac have been implemented here. - ------- -crac 1.3.0 Compiled on Sep 13 2013. - - -h, --help print this help and exit - -f, --full-help print a complete help and exit - -v print version and exit - -Mandatory arguments - -i set genome index file (without the extension filename) - -r [FILE2] set read file. Specify FILE2 in case of paired-end reads - -k set k-mer length - -o, --sam set SAM output filename or print on STDOUT with "-o -" argument - -Optional arguments - * Protocol - --stranded set the read mapping with for a strand specific library (DEFAULT non-strand specific) - - * Efficiency - --nb-threads set the number of worker threads (DEFAULT 1) - --read-length, -m set read length in case of all reads have the same length to optimize - CPU and memory times - --treat-multiple consider alignments with multiple locations (>max-duplication) rather than considering a no-alignment in the SAM file - --max-locs set the maximum number of locations on the reference index (DEFAULT 300) - - * Accuracy - --no-ambiguity discard biological events (splice, snv, indel, chimera) which have several matches on the reference index - - -Optional output arguments - --all set output base filename for all causes following - --gz all output files specified after this argument are gzipped - - * Summary and statistics - --summary set output summary file - * Mapping - --single set output single file - --duplicate set output duplication file - --multiple set output multiple file - --none set output none file - --normal set output normal file - --almost-normal set output almost normal file - - * Biological causes - --snv set output SNV file - --indel set output short indel file - --splice set output splice junction file - --weak-splice set output coverless splice junction file - --chimera set output chimera junction file - --paired-end-chimera set output for paired-end chimera file - --biological set output bio-undetermined file - - * Sequence errors - --errors set output sequence errors file - - * Repetition - --repeat set output repetition file - - * Other causes - --undetermined set output undetermined file - --nothing set output nothing file - -Optional process for specific research - --deep-snv will search hard to find SNPs - --stringent-chimera will search chimeras with more accuracy (but less sensitivity) - -Optional process launcher (once must be selected) - * Exact matching tool - --emt launch CRAC-emt for exact mapping of short reads - - * Server tool (for debugging) - --server launch CRAC server,the output arguments will - not be taken into account - --input-name-server DEFAULT classify.fifo - --output-name-server DEFAULT classify.out.fifo - -Additional settings for users - * Sam output file - --detailed-sam more informations are added in SAM output file - - * Mapping - --min-percent-single-loc DEFAULT 0.15 - --min-duplication DEFAULT 2 - --max-duplication DEFAULT 9 - --min-percent-duplication-loc DEFAULT 0.15 - --min-percent-multiple-loc DEFAULT 0.50 - --min-repetition DEFAULT 20 - --min-percent-repetition-loc DEFAULT 0.20 - * Biological causes - --max-splice-length DEFAULT 300000 - --max-paired-end-length DEFAULT 300000 - --max-bio-indel DEFAULT 15 - --max-bases-retrieved DEFAULT 15 - * Undetermined - --min-support-no-cover DEFAULT 1.30 - -Additional settings for advanced users - * Break verification and fusion (merging mirage breaks) - --min-break-length DEFAULT 0.50 - --max-bases-randomly-matched DEFAULT 10 - --max-extension-length DEFAULT 10 - - * Threading - --nb-tags-info-stored DEFAULT 1000 - - * Deep SNV search option - --nb-nucleotides-snv-comparison DEFAULT 8 - - - diff -r e6e516ff34a8 -r dbb83adec9eb crac/crac_wrapper.sh --- a/crac/crac_wrapper.sh Fri Sep 13 09:51:59 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,37 +0,0 @@ -#!/bin/sh - -# Recovering special parameters from crac.xml -############################################################### -CRAC_BINARY=crac -INDEX_INPUT="$1" - -# Getting the indexed genome value -############################################################### -# Getting the indexed Genome name without the extension -if [ -d "$INDEX_INPUT" ]; then # If $INDEX_INPUT is a directory (that is to say an index from the history) - cpt=0 - for fichier in $INDEX_INPUT/*.ssa - do - if [ $((++cpt)) -gt 1 ]; then #More than 1 '.ssa' file is not expected - echo "Warning:Multiple indexes found [$INDEX]" >&2 - fi - INDEX=${fichier%%.ssa} #Getting the index from history - done - else - INDEX="$INDEX_INPUT" #Getting the prebuilt index -fi -if [ ! -f "$INDEX.ssa" -a ! -f "$INDEX.conf" ]; then #Both '.ssa' and '.conf' files are required - echo "Error:Index not found [$INDEX]" >&2 - exit 1 -fi - -# Execution of the command line (Submiting job to the cluster) -############################################################### -shift 2 #Avoiding index_input and output_name.extra_files_path - -CRAC_CMD_LINE=""$CRAC_BINARY" -i "$INDEX" "$@"" - -out=`$CRAC_CMD_LINE` - -exit 0 - diff -r e6e516ff34a8 -r dbb83adec9eb crac/tool_dependencies.xml --- a/crac/tool_dependencies.xml Fri Sep 13 09:51:59 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,16 +0,0 @@ - - - - - - https://gforge.inria.fr/frs/download.php/32471/crac-1.3.0.tar.gz - ./configure - make - make check - - - -CRAC requires g++ 4.3 or later. - - -