# HG changeset patch
# User yhoogstrate
# Date 1395839550 14400
# Node ID cada2d2f0a7c057becc8617120876b82295e9d0b
Uploaded
diff -r 000000000000 -r cada2d2f0a7c featurecounts.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/featurecounts.xml Wed Mar 26 09:12:30 2014 -0400
@@ -0,0 +1,237 @@
+
+
+ Measure gene expression in RNA-Seq experiments from SAM or BAM files.
+
+ featurecounts
+ featurecounts2bed
+
+
+
+
+
+ #if $reference_gene_sets_source.source_select == "attribute" and len({ alignment.metadata.dbkey:True for alignment in $alignments }.keys()) != 1
+ echo "Invalid number of dbkeys are found: ${ len({ alignment.metadata.dbkey:True for alignment in $alignments }.keys()) }, while only one should be used. Make sure that the alignments are done on the same reference genome and that 'tool-data/gene_sets.loc' is configured properly!" >&2
+ #else
+
+ #if len({ alignment.extension:True for alignment in $alignments }.keys()) != 1
+ echo "Either all files must be SAM or all files must be BAM, no mixture is allowed." >&2
+ #else
+ featureCounts
+ -a
+ #if $reference_gene_sets_source.source_select == "indexed_filtered"
+ "$reference_gene_sets_source.reference_gene_sets"
+ #else if $reference_gene_sets_source.source_select == "indexed_all"
+ "$reference_gene_sets_source.reference_gene_sets"
+ #else if $reference_gene_sets_source.source_select == "history"
+ "$reference_gene_sets_source.reference_gene_sets"
+ #else
+
+ "${ filter( lambda x: str( x[0] ) == str( { alignment.metadata.dbkey:True for alignment in $alignments }.keys()[0] ), $__app__.tool_data_tables[ 'gene_sets' ].get_fields() )[0][2] }"
+ #end if
+
+ -o "$output"
+ -T $threads
+
+ #if $extended_parameters.parameters == "extended"
+ -t $extended_parameters.gff_feature_type
+ -g $extended_parameters.gff_feature_attribute
+ $extended_parameters.summarization_level
+ $extended_parameters.contribute_to_multiple_features
+ $extended_parameters.protocol
+ $extended_parameters.multimapping_counts
+ -Q $extended_parameters.mapping_quality
+ $extended_parameters.fragment_counting
+ $extended_parameters.check_distance
+ -d $extended_parameters.minimum_fragment_length
+ -D $extended_parameters.maximum_fragment_length
+ $extended_parameters.only_both_ends
+ $extended_parameters.exclude_chimerics
+ $extended_parameters.namesort
+ #end if
+
+ #for $alignment in $alignments
+ ${alignment}
+ #end for
+
+ 2>&1
+
+ #if $format == "tabdel_default" or $format.value == "tabdel_default"
+ ; cp $output tmp.txt
+ ; egrep -v "^#" tmp.txt > tmp2.txt
+ ; cut -f 1,7 tmp2.txt > tmp_left.txt
+ ; cut -f 6 tmp2.txt > tmp_right.txt
+ ; paste tmp_left.txt tmp_right.txt > $output
+ #elif $format == "tabdel_short" or $format.value == "tabdel_short"
+ ; cp $output tmp.txt
+ ; egrep -v "^#" tmp.txt | cut -f 1,7 > $output
+ #end if
+
+ ## For every alignment, replace its filename for: "hid: sample name"
+ #for $alignment in $alignments
+ #set $alignment_escaped = str($alignment).replace('/', '\/').replace('.', '\.')
+ #set $alignment_name_escaped = str(alignment.hid)+": "+str($alignment.name).replace('\t',' ').replace('\\','\\\\').replace("'","\\'").replace('/','\/')
+
+ #if $format.value == "tabdel_default" or $format.value == "tabdel_short"
+ ; sed -e '1 s/$alignment_escaped/${alignment_name_escaped}/g' $output > tmp.txt
+ #elif $format.value == "bed":
+ ; featurecounts2bed.sh -f "$output" > tmp.txt
+ #else
+ ; sed -e '1,2 s/$alignment_escaped/${alignment_name_escaped}/g' $output > tmp.txt
+ #end if
+
+ ; mv tmp.txt $output
+ #end for
+ #end if
+ #end if
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+featureCounts::
+**Overview**
+
+FeatureCounts is a light-weight read counting program written entirely in the C programming language. It can be used to count both gDNA-seq and RNA-seq reads for genomic features in in SAM/BAM files.
+It has a variety of advanced parameters but its major strength is its outstanding performance: analysis of a 10GB BAM file takes about 7 minutes on a single average CPU (Homo Sapiens genome)!
+Liao Y, Smyth GK and Shi W. featureCounts: an efficient general-purpose program for assigning sequence reads to genomic features. Bioinformatics, Advance Access, accepted on Nov 7, 2013
+
+featureCounts is part of a bigger analysis suite called subread:
+http://subread.sourceforge.net/
+Liao Y, Smyth GK and Shi W. The Subread aligner: fast, accurate and scalable read mapping by seed-and-vote. Nucleic Acids Research, 41(10):e108, 2013
+
+**Input formats**
+
+Alignments should be provided in either:
+* SAM format - hhttp://samtools.sourceforge.net/samtools.shtml#5
+* BAM format
+
+Gene regions should be provided in the GFF/GTF format:
+* http://genome.ucsc.edu/FAQ/FAQformat.html#format3
+* http://www.ensembl.org/info/website/upload/gff.html
+
+**Installation**
+
+1) Make sure you have proper GFF/GTF files (corresponding to your reference genome used for the aligment) uploaded to your history.
+
+2) Make sure that your gene_sets.loc is configured properly as data table. This is generally done by copying the right information into: tool_data_table_conf.xml.
+https://wiki.galaxyproject.org/Admin/Tools/Data%20Tables
+
+**Examples**
+
+**License**
+
+* featureCounts / subread: GNU General Public License version 3.0 (GPLv3)
+
+**Contact**
+
+The tool wrapper has been written by Youri Hoogstrate from the Erasmus Medical Center (Rotterdam, Netherlands) on behalf of the Translational Research IT (TraIT) project:
+http://www.ctmm.nl/en/programmas/infrastructuren/traitprojecttranslationeleresearch
+
+I want to thank the Marius van den Beek for his contribution as well.
+
+More tools by the Translational Research IT (TraIT) project can be found in the following repository:
+http://toolshed.dtls.nl/
+
+
+
\ No newline at end of file
diff -r 000000000000 -r cada2d2f0a7c featurecounts2bed.sh
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/featurecounts2bed.sh Wed Mar 26 09:12:30 2014 -0400
@@ -0,0 +1,62 @@
+#!/bin/bash
+
+# This tool has been written by Youri Hoogstrate from the Erasmus
+# Medical Center (Rotterdam, Netherlands) on behalf of the Translational
+# Research IT (TraIT) project:
+# http://www.ctmm.nl/en/programmas/infrastructuren/traitprojecttranslationeleresearch
+#
+# More tools by the Translational Research IT (TraIT) project can be
+# found in the following repository:
+# http://toolshed.dtls.nl/
+
+exon_level="true"
+filename=""
+
+# Parse parameters
+while getopts e:f: option
+do
+ case "${option}"
+ in
+ e) exon_level=${OPTARG};;
+ f) filename=$OPTARG;;
+ esac
+done
+
+# Convert the file
+if [ $filename == "" ]; then
+ echo "Usage:"
+ echo " -e [true, false] true = entry for every exon; false = line for genes first exon"
+ echo " -f FILENAME from featureCounts"
+else
+ while read line; do
+ first=${line:0:1}
+ if [ $first != "#" ]; then
+ columns=($line)
+ uid=${columns[@]:0:1}
+ if [ $uid != "Geneid" ]; then
+ chr=${columns[@]:1:1}
+ start=${columns[@]:2:1}
+ stop=${columns[@]:3:1}
+ direction=${columns[@]:4:1}
+ length=${columns[@]:5:1}
+ count=${columns[@]:6:1}
+
+ chr_splitted=($(echo $chr | tr ";" "\n"))
+ start_splitted=($(echo $start | tr ";" "\n"))
+ stop_splitted=($(echo $stop | tr ";" "\n"))
+ strand_splitted=($(echo $direction | tr ";" "\n"))
+
+ if [ $exon_level == "true" ]; then
+ n=${#chr_splitted[@]}
+ else
+ n=1
+ fi
+
+ for (( i=0; i<$n; i++ ))
+ do
+ echo ${chr_splitted[@]:$i:1}" "${start_splitted[@]:$i:1}" "${stop_splitted[@]:$i:1}" "$uid" ("$((${stop_splitted[@]:$i:1}-${start_splitted[@]:$i:1}))"/"$length"nt) "$count" "${strand_splitted[@]:$i:1}
+ done
+ fi
+ fi
+ done < $filename
+fi
diff -r 000000000000 -r cada2d2f0a7c tool-data/gene_sets.loc.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/gene_sets.loc.sample Wed Mar 26 09:12:30 2014 -0400
@@ -0,0 +1,39 @@
+# This is a sample file distributed with Galaxy that enables tools
+# to use gene/exon annotations in the GFF/GTF format. You will need
+# to add all the gene set annotations in this configuration file.
+# Because of this file, galaxy tools are able to access gene annotations
+# provided as GFF / GTF files by selecting on:
+# - Priorty (defined by the order in the file)
+# - Provider
+# - dbkey (reference genome id)
+#
+# The gene_sets.loc file syntax is:
+#
+# *optional
+#
+# Please ensure maximally one TAB (\t) between two columns!
+# ---------------------------------------------------------
+#
+# In case you have TWO or MORE providers PER dbkey, the one mentioned
+# first in the file, should have the "default" priority.
+#
+
+
+
+# [UCSC - UCSC Genes: knownGene]
+
+#hg19.UCSC knownGene (mm/'yy) hg19 /depot/data2/galaxy/hg19/gene_sets/Homo_sapiens.GRCh37.74.gtf ucsc http://genome.ucsc.edu/
+#hg18.UCSC knownGene (mm/'yy) hg18 /depot/data2/galaxy/hg18/gene_sets/Homo_sapiens.NCBI36.54.gtf ucsc http://genome.ucsc.edu/
+
+
+
+# [RefSeq Genes: RefGene]
+# link: ftp://ftp.ncbi.nlm.nih.gov/refseq/H_sapiens/RefSeqGene/
+
+
+
+# [ Ensembl]
+# link: http://www.ensembl.org/info/data/ftp/index.html
+
+#Homo_sapiens.GRCh37.74 hg19 /depot/data2/galaxy/hg19/gene_sets/Homo_sapiens.GRCh37.74.gtf ensembl ftp://ftp.ensembl.org/pub/release-74/gtf/homo_sapiens/Homo_sapiens.GRCh37.74.gtf.gz
+#Homo_sapiens.NCBI36.54 hg18 /depot/data2/galaxy/hg18/gene_sets/Homo_sapiens.NCBI36.54.gtf ensembl ftp://ftp.ensembl.org/pub/release-54/gtf/homo_sapiens/Homo_sapiens.NCBI36.54.gtf.gz
diff -r 000000000000 -r cada2d2f0a7c tool_data_table_conf.xml.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample Wed Mar 26 09:12:30 2014 -0400
@@ -0,0 +1,8 @@
+
+
+
+
+ name, dbkey, value, provider, reference
+
+
+
\ No newline at end of file
diff -r 000000000000 -r cada2d2f0a7c tool_dependencies.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_dependencies.xml Wed Mar 26 09:12:30 2014 -0400
@@ -0,0 +1,37 @@
+
+
+
+
+
+ wget http://sourceforge.net/projects/subread/files/subread-1.4.4/subread-1.4.4-source.tar.gz && tar -zxvf subread-1.4.4-source.tar.gz && cd subread-1.4.4-source/src/ && make -f Makefile.Linux && cd ../../
+
+ ../subread-1.4.4-source/bin/featureCounts
+ $INSTALL_DIR/bin
+
+
+ $INSTALL_DIR/bin
+ $REPOSITORY_INSTALL_DIR
+
+
+
+
+ Downloads and installs featureCounts; requires WGET, GNU AUTOTOOLS and TAR to be installed!
+
+
+
+
+
+
+ wget http://yhoogstrate@toolshed.dtls.nl/repos/yhoogstrate/featurecounts/raw-file/tip/featurecounts2bed.sh && chmod 755 *.sh
+
+ ../featurecounts2bed.sh
+ $INSTALL_DIR/bin
+
+
+ $INSTALL_DIR/bin
+ $REPOSITORY_INSTALL_DIR
+
+
+
+
+