# HG changeset patch # User jbrayet # Date 1453985038 18000 # Node ID b044e98c81d2f26a2e19acbb9be845baab18ee4f # Parent 8f3347849abf54b3e573da78e320dfa3bbecab1d Uploaded diff -r 8f3347849abf -r b044e98c81d2 ncPRO-ANNOTATION.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ncPRO-ANNOTATION.xml Thu Jan 28 07:43:58 2016 -0500 @@ -0,0 +1,599 @@ + + of mapped reads + + institutcuriengsintegration/ncproseqgalaxy:1.6.5 + + + +ncPRO-ANNOTATION.sh + -i $input + -g $genome + -t $cond.datatype + -e $cond['database']$cond['borne']$cond['N1']"_"$cond['N2'] + -u $ucsc_track + -n $rpm + -l $outlog + #if str($cond.datatype) == "matmir": + #if str($cond['allmiRNA']) == "True": + -o $out_mirna + -a $out_all_mirna + #else + -o $out_mirna + #end if + #elif str($cond.datatype) == 'trna': + -o $out_trna + #elif str($cond.datatype) == 'premir': + #if str($cond['allmiRNA']) == "True": + -o $out_premirna + -a $out_all_premirna + #else + -o $out_premirna + #end if + #elif str($cond.datatype) == 'rfam': + -o $out_rfam + #elif str($cond.datatype) == 'rmsk': + -o $out_rmsk + #end if + + #if str($ucsc_track) == 'True': + -v $out_ucsc + #end if + -r ${__root_dir__} + -p $projectName + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Fill this field with +/- value + + + Fill this field with +/- value + + + + + + + + + + + + + + + + + + Fill this field with +/- value + + + Fill this field with +/- value + + + + + + + + + + + + + + + + Fill this field with +/- value + + + Fill this field with +/- value + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Fill this field with +/- value + + + Fill this field with +/- value + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Fill this field with +/- value + + + Fill this field with +/- value + + + + + + + + + + + + + + + + (str(cond['datatype']) == "matmir") + + + ((str(cond['datatype']) == "matmir") and (cond['allmiRNA'] == True)) + + + (str(cond['datatype']) == "premir") + + + ((str(cond['datatype']) == "premir") and (cond['allmiRNA'] == True)) + + + (str(cond['datatype']) == "trna") + + + (str(cond['datatype']) == "rfam") + + + (str(cond['datatype']) == "rmsk") + + + + (str(ucsc_track) == 'True') + + + + + + + +**What ncPRO-seq does ?** + +------ + +ncPRO-seq is a tool for annotation and profiling of ncRNAs from smallRNA sequencing data. It aims to interrogate and perform detailed analysis on small RNAs derived from annotated non-coding regions in miRBase, Rfam and repeatMasker, and regions defined by users. A command line version and an online version are available at http://ncpro.curie.fr. +If you use the ncPRO-seq tool for your analysis, please cite the following paper : +Chen C., Servant N., Toedling J., Sarazin A., Marchais A., Duvernois-Berthet E., Cognat V., Colot V., Voinnet O., Heard E., Ciaudo C. and Barillot E. (2012) ncPRO-seq: a tool for annotation and profiling analysis of ncRNAs from small RNA-seq.Bioinformatics.28(23):3147-9. + +# Copyleft ↄ⃝ 2012 Institut Curie +# Author(s): Jocelyn Brayet, Laurene Syx, Chongjian Chen, Nicolas Servant(Institut Curie) 2012 - 2015 +# Contact: bioinfo.ncproseq@curie.fr +# This software is distributed without any guarantee under the terms of the GNU General +# Public License, either Version 2, June 1991 or Version 3, June 2007. + +------ + +**Input Formats** + +Aligned file (BAM) is allowed. See ncPRO-seq Alignment and quality control for more details. + +------ + +**Annotation** + +To find overlaps between read alignments and genomic annotations, intersectBed tool in +BEDTools is implemented. Only read alignments which have 100% overlap with annotations are reported +by setting -f option in intersectBed to 1. + +-Reads with multiple mapping sites +A major challenging problem using NGS sequencing data is the annotation of reads aligned at multiple +locations. Most of the available frameworks resolve this situation by discarding these reads or by +providing random annotations. Here, we propose to keep all the reads aligned to the genome, and to +weight them by the number of mapping sites. Suppose a read can be aligned 5 times to the genome, +for each mapping site, the read would be counted as 0.2, i.e. 1/5. + +-Annotation +There are four types of extended items which can be used to modify coordinates. +1- shorten [+-]N1 bp at 5' end, [+-]N2 bp at 3' end +2- extend [+-]N1 bp at 5' end, [+-]N2 bp at 3' end +3- get coordinates for sub-region from position N1 to N2 indexed from 5' end +4- get coordinates for sub-region from position N1 to N2 indexed from 3' end + +For instance, due to the inaccurate processing of precursor miRNAs by Dicer or downstream miRNA remodelling, mature miRNAs often have end heterogeneities comparing to their annotations in miRBase. Thus, when analyzing mature miRNAs, it is necessary to extend miRNA annotation several bases (e.g. 2 bases) in both upstream and downstream region. + + +