Mercurial > repos > bonsai > crac
changeset 2:dbb83adec9eb draft
Deleted selected files
author | bonsai |
---|---|
date | Fri, 13 Sep 2013 10:00:39 -0400 |
parents | e6e516ff34a8 |
children | 4cf2808854bc |
files | crac/crac-index-wrapper.sh crac/crac-index.xml crac/crac.xml crac/crac_wrapper.sh crac/tool_dependencies.xml |
diffstat | 5 files changed, 0 insertions(+), 432 deletions(-) [+] |
line wrap: on
line diff
--- a/crac/crac-index-wrapper.sh Fri Sep 13 09:51:59 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,50 +0,0 @@ -#!/bin/sh - -# Recovering parameters from crac-index.xml -############################################################### -CRAC_INDEX_BINARY=crac-index -BASE_FILENAME="$1" -HTML_REPORT="$2" -FILES_PATH="$3" -BUCKET="$4" - -#echo Wrapper for crac-index -#echo running: $0 "$@" - -shift 4 - -#echo After shifting, args are: -#echo "$@" - -mkdir -p "$FILES_PATH" - -# Execution of the command line (Submiting job to the cluster) -############################################################### - CRAC_INDEX_CMD_LINE="$CRAC_INDEX_BINARY index "$FILES_PATH/$BASE_FILENAME" -b "$BUCKET" "$@"" - - out=`$CRAC_INDEX_CMD_LINE` - - jID=`echo $out | awk {'print $3'}` -fi - -cat << MARINE > "$HTML_REPORT" - -exit 0 - -echo "<html> - <head> - <title>Files for Crac Index (crac_index)</title> - </head> - <body> - This index is composed of the following files: - <p/> - <ul> - <li><a href=""$BASE_FILENAME.conf"" type=""text/plain"">$BASE_FILENAME.conf</a></li> - <li><a href=""$BASE_FILENAME.ssa"" type=""application/binary"">$BASE_FILENAME.ssa</a></li> - </ul> - </body> -</html>" > "$HTML_REPORT" - - -MARINE -
--- a/crac/crac-index.xml Fri Sep 13 09:51:59 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,64 +0,0 @@ -<tool id="crac-index" name="CRAC index"> - - <description>Create genome indexes available to be used with CRAC mapping/annotation tool </description> - - <command> - crac-index-wrapper.sh "$output_name" "$output" "$output.files_path" "$bucket" "$input_file" - </command> - - <inputs> - <param name="input_file" type="data" label="Source file" format="fasta" help="You must choose a fasta file containing the genome"/> - <param name="output_name" type="text" value ="IndexOutput" label="Output name" help="Name must be different from 'index' word, otherwise CRAC-index will fail." /> - <param name="bucket" type="integer" value="100000000" label="Bucket size" help="The size of the bucket for the index construction (default 100000000)."/> - </inputs> - - <outputs> - <data name="output" format="crac_index" label="${output_name}.crac-index" /> - </outputs> - - <help> - -**What it does** - -Crac-index generates an indexed genome from a fasta file. This is especially useful for the Crac mapping/annotation tool. - ----------------------- - -**Input Formats** - -Crac-index takes as input files any fasta or multi-fasta files. - ----------------------- - -**Outputs** - -Crac-index on Galaxy produces a composite output named crac-index, which is made of a ssa file and a conf file. Both are required to the use of your index. - ----------------------- - -**Crac-index settings** - - -Usage : ./crac-index [options] command output_file input_file - - command must be : - index: create an index on the specified input file(s). - - options can be : - - -b bucket_size the size of the bucket for the index construction - (default 100000000) - -d diff-cover parameter for the index construction (default 1024) - -v verbose mode - - Examples: - ./crac-index index myIndex sequence1.fa sequence2.fa sequence3.fa - You can specify FASTA or MultiFASTA file(s). - In this example, two files will be created: - - myIndex.ssa (index storing the compressed sequences) - - myIndex.conf (information on sequence names and length) - - </help> - - -</tool>
--- a/crac/crac.xml Fri Sep 13 09:51:59 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,265 +0,0 @@ -<?xml version="1.0" encoding="utf-8"?> -<tool id="crac" name="CRAC" version="1.0"> - <requirements> - <requirement type='package' version="1.3.0">crac</requirement> - </requirements> - <description>Analyzing RNAs in high-throughput sequencing data</description> - <command interpreter="bash"> crac_wrapper.sh - #if $Genome.which_genome == "prebuilt" - <!--1--> "$Genome.prebuilt_genome.fields.path" - #else - <!--1--> "$Genome.index_input.extra_files_path" - #end if - #if $condi_compressed == "yes" - --gz - #end if - <!--2--> $output_name.extra_files_path <!-- Usefull for submitting jobs on crac.sh--> - -r $input -k $kmer_length --read-length $read_length --sam $output_name - #if $condi_deep_snp.deepSNP == "yes" - --deep-snv --nb-nucleotides-snv-comparison $condi_deep_snp.nb_nucleotides_snp_comparison - #end if - #if $choixSettings.settings == "experimental" - --max-splice-length $choixSettings.max_splice_length - --max-bio-indel $choixSettings.max_bio_indel - --min-duplication $choixSettings.min_duplication - --max-duplication $choixSettings.max_duplication - --min-percent-single-loc $choixSettings.min_percent_single_loc - --min-percent-duplication-loc $choixSettings.min_percent_duplication_loc - --max-bases-randomly-matched $choixSettings.max_bases_randomly_matched - --max-extension-length $choixSettings.max_extension_length - --min-support-no-cover $choixSettings.min_support_no_cover - --min-break-length $choixSettings.min_break_length - #end if - #if str($detailed_sam) == "yes" - --detailed-sam - #end if - - </command> - - <inputs> - <!-- Normal Setting --> - - <conditional name="Genome"> <!-- Conditional 3 (Which genome) --> - <param name="which_genome" type="select" label="Do you want to use a pre-built reference genome or a Crac-index generated genome from your history?" help="Pre-built reference genomes are generated by Crac-index."> - <option value="prebuilt"> Use a pre-built reference genome </option> - <option value="history"> Use a Crac-index generated genome from my history</option> - </param> - - <when value="prebuilt"> - <param name="prebuilt_genome" type="select" label="Select a reference genome" help="if your genome of interest is not listed - contact authors"> - <options from_data_table="crac_indexes"> - <filter type="sort_by" column="2" /> - <validator type="no_options" message="No indexes are available" /> - </options> - </param> - </when> - - <when value="history"> - <param name="index_input" format="crac_index" type="data" label="Reference Genome" help="Select an indexed Genome from your history"/> - </when> - </conditional> - - - <param name="input" format="txt,raw,fastq" type="data" label="Reads File" help="Select a file"/> - <param name="kmer_length" type="integer" min="12" max="32" value="21" label="k-mer length"> - <help>k-mer length must be carefully chosen. A k-mer of that length must map to a unique location in the genome with a high probability. Recommended value for the human genome: 22</help> - </param> - <param name="read_length" type="integer" label="Read length" value="0" help="Set read length when all reads have the same length to dramatically increase computation speed. Default value (no read length considered) : 0. Note : If read length is set, shorter reads will be ignored, longer reads will be cut."/> - <param name="detailed_sam" type="select" label="Do you want a detailed sam output file ?" help="Detailed sam output file gives you information on the SNPs, Splice junctions, Sequencing errors, Chimeras, ..." > - <option value="no"> No, I do not want detailed sam output file </option> - <option value="yes"> Yes, I want detailed sam output file </option> - <when value="yes"/> <!-- Supress warnings--> - <when value="no"/> <!-- Supress warnings--> - </param> - <param name="condi_compressed" type="select" display="radio" label="Compress output files?"> - <option value="yes">Yes</option> - <option value="no">No</option> - </param> - <conditional name="condi_deep_snp"> <!-- Conditional 1 --> - <param type="select" name="deepSNP" label="Search hard for SNPs?"> - <option value="no" selected="true"> No, do not search hard for SNVs </option> - <option value="yes"> Yes, search hard for SNVs (takes more time) </option> - </param> - <when value="yes"> - <param name="nb_nucleotides_snp_comparison" type="integer" value="8" label="Number of nucleotides for SNV comparison" help="Default value for human genome : 8. A smaller value will find more SNVs, but will be less accurate."/> - </when> - <when value="no"/> <!-- Suppress warnings --> - </conditional> <!-- End Conditional 1 --> - - <!-- Experimental Setting--> - <conditional name="choixSettings"> <!-- Conditional 2 (setting choice) --> - <param name="settings" type="select" label="Advanced CRAC settings to use" help="If you want full control to optimize your experience, use Advanced Settings. Be careful, these settings are experimental and one single change can make Crac fail"> - <option value="normal" selected="true"> Normal settings </option> - <option value="experimental"> Advanced Settings </option> - </param> - <when value="normal"/> <!-- Supress warnings--> - <when value="experimental"> <!-- Supress warnings--> - <param name="max_splice_length" type="integer" value="300000" label="Maximum splice length" help="Splices larger than this value, will not be considered as splices, but (if possible) as chimeras. Default value for human genome : 300,000 bp." /> - <param name="max_bio_indel" type="integer" value="15" label="Maximum indel length. Larger indels will be considered as splice junctions" help=" HELP. Default value for human genome : 15 bp." /> - <param name="min_duplication" type="integer" value="2" label="Minimum duplication occurrence" help=" Minimum number of k-mer occurrences in the genome to be considered as duplicated. Default value for human genome : 2." /> - <param name="max_duplication" type="integer" value="9" label="Maximum duplication occurrence" help=" Maximum number of k-mer occurrences to be considered as duplicated. Default value for human genome : 9." /> - <param name="min_percent_single_loc" type="float" value="0.15" label="Minimum unique location percentage" help=" Minimal percentage of k-mers that must be unique in the genome, to consider the read as unique. Default value for human genome : 0.15." /> - <param name="min_percent_duplication_loc" type="float" value="0.20" label="Minimum duplicated location percentage" help=" Minimal percentage of k-mers that must be duplicated in the genome, to consider the read as duplicated. Default value for human genome : 0.20." /> - <!--param name="min_percent_multiple_loc" type="float" value="0.20" label="Minimum percent multiple localisation" help=" HELP. Default value for human genome : 0.20." /--> - <param name="max_bases_randomly_matched" type="integer" value="10" label="Maximum bases randomly matched" help=" Maximum number of bases that can be considered as randomly matched. Default value for human genome : 10." /> - <param name="max_bases_retrieved" type="integer" value="10" label="Maximum bases retrieved" help=" Maximum number of bases retrieved from the genome when outputting deletions. Default value for human genome : 10." /> - <param name="max_extension_length" type="integer" value="10" label="Maximum extension length" help=" Maximal number of nucleotides visited to extend a break and to make sure that the location is consistent. Default value for human genome : 10." /> - <param name="min_support_no_cover" type="float" value="1.30" label="Minimum suppot no cover" help=" Average coverage along the read to consider it as not covered. Default value for human genome : 1.30." /> - <param name="min_break_length" type="float" value="0.5" label="Minimum break length" help=" Breaks shorter than this ratio times the k-mer length will be considered as too short and will be merged if necessary. Default value for human genome : 0.5." /> - </when> <!-- End "when experimental" --> - </conditional> <!-- End Conditional 2 --> - </inputs> - - <outputs> - <data name="output_name" format="sam" label="${tool.name} on ${on_string}: mapped reads" /> - </outputs> - - <tests> - <test> - </test> - </tests> - - <help> -**What it does** - -CRAC proposes a novel way of analyzing reads that integrates genomic locations -and local coverage, and delivers all above mentioned predictions in a single -step. CRAC uses a double k-mer profiling approach to detect candidate -mutations, indels, splice or fusion junctions in each single read. - -.. _CRAC: http://crac.gforge.inria.fr/ - -If you use this tool, please cite: - - Philippe N., Salson M., Commes T., Rivals E., `"CRAC: an integrated approach to the analysis of RNA-seq reads"`__, Genome Biology (2013), 14:R30, doi: 10.1186/gb-2013-14-3-r30. - -.. __: http://genomebiology.com/2013/14/3/R30/ - ------- - -**Input formats** - -CRAC accepts files in FASTA, FASTQ or any text format (txt, raw, ...). - ------- - -**Output** - -The output is in SAM format. If you choose the detailed SAM output, CRAC adds several flags to tell more informations. You can see the details here: http://crac.gforge.inria.fr/index.php?id=sam-documentation - - ------- - -**Crac settings** - -Main options are displayed at the top of the page. If you're an experimented user, you can choose to display -the whole Crac setting. Most of the options in Crac have been implemented here. - ------- -crac 1.3.0 Compiled on Sep 13 2013. - - -h, --help <none> print this help and exit - -f, --full-help <none> print a complete help and exit - -v <none> print version and exit - -Mandatory arguments - -i <FILE> set genome index file (without the extension filename) - -r <FILE> [FILE2] set read file. Specify FILE2 in case of paired-end reads - -k <INT> set k-mer length - -o, --sam <FILE> set SAM output filename or print on STDOUT with "-o -" argument - -Optional arguments - * Protocol - --stranded <none> set the read mapping with for a strand specific library (DEFAULT non-strand specific) - - * Efficiency - --nb-threads <INT> set the number of worker threads (DEFAULT 1) - --read-length, -m <INT> set read length in case of all reads have the same length to optimize - CPU and memory times - --treat-multiple <none> consider alignments with multiple locations (>max-duplication) rather than considering a no-alignment in the SAM file - --max-locs <INT> set the maximum number of locations on the reference index (DEFAULT 300) - - * Accuracy - --no-ambiguity <none> discard biological events (splice, snv, indel, chimera) which have several matches on the reference index - - -Optional output arguments - --all <FILE> set output base filename for all causes following - --gz <none> all output files specified after this argument are gzipped - - * Summary and statistics - --summary <FILE> set output summary file - * Mapping - --single <FILE> set output single file - --duplicate <FILE> set output duplication file - --multiple <FILE> set output multiple file - --none <FILE> set output none file - --normal <FILE> set output normal file - --almost-normal <FILE> set output almost normal file - - * Biological causes - --snv <FILE> set output SNV file - --indel <FILE> set output short indel file - --splice <FILE> set output splice junction file - --weak-splice <FILE> set output coverless splice junction file - --chimera <FILE> set output chimera junction file - --paired-end-chimera <FILE> set output for paired-end chimera file - --biological <FILE> set output bio-undetermined file - - * Sequence errors - --errors <FILE> set output sequence errors file - - * Repetition - --repeat <FILE> set output repetition file - - * Other causes - --undetermined <FILE> set output undetermined file - --nothing <FILE> set output nothing file - -Optional process for specific research - --deep-snv <none> will search hard to find SNPs - --stringent-chimera <none> will search chimeras with more accuracy (but less sensitivity) - -Optional process launcher (once must be selected) - * Exact matching tool - --emt <none> launch CRAC-emt for exact mapping of short reads - - * Server tool (for debugging) - --server <none> launch CRAC server,the output arguments will - not be taken into account - --input-name-server <STRING> DEFAULT classify.fifo - --output-name-server <STRING> DEFAULT classify.out.fifo - -Additional settings for users - * Sam output file - --detailed-sam <none> more informations are added in SAM output file - - * Mapping - --min-percent-single-loc <FLOAT> DEFAULT 0.15 - --min-duplication <INT> DEFAULT 2 - --max-duplication <INT> DEFAULT 9 - --min-percent-duplication-loc <FLOAT> DEFAULT 0.15 - --min-percent-multiple-loc <FLOAT> DEFAULT 0.50 - --min-repetition <INT> DEFAULT 20 - --min-percent-repetition-loc <FLOAT> DEFAULT 0.20 - * Biological causes - --max-splice-length <INT> DEFAULT 300000 - --max-paired-end-length <INT> DEFAULT 300000 - --max-bio-indel <INT> DEFAULT 15 - --max-bases-retrieved <INT> DEFAULT 15 - * Undetermined - --min-support-no-cover <FLOAT> DEFAULT 1.30 - -Additional settings for advanced users - * Break verification and fusion (merging mirage breaks) - --min-break-length <FLOAT> DEFAULT 0.50 - --max-bases-randomly-matched <INT> DEFAULT 10 - --max-extension-length <INT> DEFAULT 10 - - * Threading - --nb-tags-info-stored <INT> DEFAULT 1000 - - * Deep SNV search option - --nb-nucleotides-snv-comparison <INT> DEFAULT 8 - </help> - -</tool>
--- a/crac/crac_wrapper.sh Fri Sep 13 09:51:59 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,37 +0,0 @@ -#!/bin/sh - -# Recovering special parameters from crac.xml -############################################################### -CRAC_BINARY=crac -INDEX_INPUT="$1" - -# Getting the indexed genome value -############################################################### -# Getting the indexed Genome name without the extension -if [ -d "$INDEX_INPUT" ]; then # If $INDEX_INPUT is a directory (that is to say an index from the history) - cpt=0 - for fichier in $INDEX_INPUT/*.ssa - do - if [ $((++cpt)) -gt 1 ]; then #More than 1 '.ssa' file is not expected - echo "Warning:Multiple indexes found [$INDEX]" >&2 - fi - INDEX=${fichier%%.ssa} #Getting the index from history - done - else - INDEX="$INDEX_INPUT" #Getting the prebuilt index -fi -if [ ! -f "$INDEX.ssa" -a ! -f "$INDEX.conf" ]; then #Both '.ssa' and '.conf' files are required - echo "Error:Index not found [$INDEX]" >&2 - exit 1 -fi - -# Execution of the command line (Submiting job to the cluster) -############################################################### -shift 2 #Avoiding index_input and output_name.extra_files_path - -CRAC_CMD_LINE=""$CRAC_BINARY" -i "$INDEX" "$@"" - -out=`$CRAC_CMD_LINE` - -exit 0 -
--- a/crac/tool_dependencies.xml Fri Sep 13 09:51:59 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,16 +0,0 @@ -<?xml version="1.0"?> -<tool_dependency> - <package name="crac" version="1.3.0"> - <install version="1.0"> - <actions> - <action type="download_by_url">https://gforge.inria.fr/frs/download.php/32471/crac-1.3.0.tar.gz</action> - <action type="shell_command">./configure</action> - <action type="shell_command">make</action> - <action type="shell_command">make check</action> - </actions> - </install> - <readme> -CRAC requires g++ 4.3 or later. - </readme> - </package> -</tool_dependency>