# HG changeset patch
# User nikos
# Date 1410438075 14400
# Node ID 3ecde8c1bd83ca8b8fa9aad0dda1a3abbebaf511

Uploaded. Tool tests or/and test files are missing.

diff -r 000000000000 -r 3ecde8c1bd83 hrf-seq/estimate_unique_counts.sh
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hrf-seq/estimate_unique_counts.sh	Thu Sep 11 08:21:15 2014 -0400
@@ -0,0 +1,86 @@
+#!/bin/bash
+
+############################################################
+#Create table of fragments containing coordinates[RNA molecule, start,end], number of mapped reads, number of unique barcodes and 3'most nucleotide of cDNA (on that was ligated to)
+############################################################
+#Remove untemplated nucleotides and create positions_temp file 
+
+#defaults
+output_dir="output_dir"
+priming_pos=-1
+
+#parse input
+while getopts hf:b:p:o: myarg
+do	case "$myarg" in
+	h)	echo "Usage: estimate_unique_counts.sh -f <bam_file> -b <barcodes_file> -o <output_dir>"
+		exit ;;
+	f)	bamfile="$OPTARG" ;; #required
+	b)	barcodes="$OPTARG" ;; #required
+	p)	priming_pos="$OPTARG" ;; #optional
+	o)	output_dir="$OPTARG" ;;	#optional
+	[?])	echo "Usage: estimate_unique_counts.sh -f <myfile.bam> -b <barcodes.txt>"
+		exit 1 ;;
+	esac
+done
+
+mkdir $output_dir
+
+samtools view $bamfile |  awk 'BEGIN{OFS="\t"}{if(substr($0,1,1)!="@"){print}}' - | awk -v out="${output_dir}/trimming_stats.txt" 'BEGIN{OFS="\t";counter[0]=0;counter[1]=0;counter[2]=0;counter[3]=0}
+function abs(value){return(value<0?-value:value)}
+($2==99 && /[\s\t]MD:Z:/ && !/MD:Z:([012][ACGT])/)  {print($1, $3, $4+0, $4+abs($9)-1);counter[0]++;next};
+($2==99 && /[\s\t]MD:Z:0[ACGT]/ && !/MD:Z:0[ACGT][01][ACGT]/) {print($1, $3, $4+1, $4+abs($9)-1);counter[1]++;next};
+($2==99 && (/[\s\t]MD:Z:1[ACGT]/ && !/MD:Z:1[ACGT]0[ACGT]/ || (/MD:Z:0[ACGT]0[ACGT]/ && !/MD:Z:0[ACGT]0[ACGT]0[ACGT]/))) {print($1, $3, $4+2, $4+abs($9)-1);counter[2]++;next};
+($2==99 && ((/[\s\t]MD:Z:1[ACGT]0[ACGT]/)||(/MD:Z:0[ACGT]1[ACGT]/)||(/MD:Z:0[ACGT]0[ACGT]0[ACGT]/)||(/MD:Z:2[ACGT]/))) {print($1, $3, $4+3, $4+abs($9)-1);counter[3]++;next}
+END{print("No trimming:",counter[0],"1 nt trimmed:", counter[1],"2 nt trimmed:", counter[2],"3 nt trimmed:",counter[3]) > out}' | sort -k1,1 | gzip > positions_temp_sorted.gz &
+
+# Computing barcode length (Use the first line and compute the string length of the second column
+
+TMP=`head -1 $barcodes | awk '{print $2}'`
+BAR_LEN=`echo ${#TMP}`
+
+# Remove "@" from barcodes and sort them
+sed 's/^.//' $barcodes | sort -k1,1 | gzip > barcodes_temp_sorted.gz &
+
+wait 
+
+# Merge poistions and barcodes
+join -1 1 <(zcat positions_temp_sorted.gz) <(zcat barcodes_temp_sorted.gz) | cut -f 2,3,4,5 -d " " | awk '{if($4 !~ /N/){print}}' | awk -v bar_len="${BAR_LEN}" '{if(length($4)==bar_len){print}}' | gzip > merged_temp.gz
+
+#### If priming flag is set....
+if [ $priming_pos != -1 ]; then
+    zcat merged_temp.gz | awk -v pos="${priming_pos}" '{print $1, $2, pos, $4}' - > merged_temp2
+    cat merged_temp2 | gzip > merged_temp.gz
+    rm merged_temp2
+fi
+
+#File summary.gz columns: RNA_ID, Start, End, barcode sequence, sequenced_count[=number of sequenced fragments fulfilling previous requiremnts]
+
+zcat merged_temp.gz | awk '{barcode[$1][$2][$3][$4]++}END{
+for(RNA in barcode){
+for(start_position in barcode[RNA]){
+for(end_position in barcode[RNA][start_position]){
+for(barseq in barcode[RNA][start_position][end_position]){print RNA,start_position,end_position,barseq,barcode[RNA][start_position][end_position][barseq]}}}}}' > $output_dir/summary.txt
+
+#File unique_barcodes columns: RNA_ID, Start, End, number of unique barcodes observed for this fragment [PROBLEM: How to treat the different 3cdns for the same fragment? if the template was homogenous then it should be always the same]
+
+awk '{barcode[$1][$2][$3]++}END{
+for(RNA in barcode){
+for(start_position in barcode[RNA]){
+for(end_position in barcode[RNA][start_position]){print RNA,start_position,end_position,barcode[RNA][start_position][end_position]}}}}' $output_dir/summary.txt > $output_dir/unique_barcodes.txt &
+
+#read_counts.gz colums: RNA_ID, Start, End, sequenced_count
+
+zcat merged_temp.gz | awk '{barcode[$1][$2][$3]++}END{
+for(RNA in barcode){
+for(start_position in barcode[RNA]){
+for(end_position in barcode[RNA][start_position]){print RNA,start_position,end_position,barcode[RNA][start_position][end_position]}}}}' > $output_dir/read_counts.txt &
+
+wait
+
+##Remove temporary files - didn't do it, in case debugging needed.
+
+rm positions_temp_sorted.gz
+rm barcodes_temp_sorted.gz
+#rm merged_temp.gz
+
+##End of remove temp files
diff -r 000000000000 -r 3ecde8c1bd83 hrf-seq/estimate_unique_counts.xml
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hrf-seq/estimate_unique_counts.xml	Thu Sep 11 08:21:15 2014 -0400
@@ -0,0 +1,52 @@
+<tool id="hrf_euc" version="0.1" name="Estimate Unique Counts" force_history_refresh="True">
+    <description></description>
+    <requirements>
+        <requirement type="package" version="0.1.18">samtools</requirement>
+    </requirements>
+    <command interpreter="bash">
+	estimate_unique_counts.sh
+
+	## Inputs
+	-f $input1 -b $input2 
+	
+	##
+	#if str( $priming.flag ) == 'False':
+		-p -1
+	#else
+		-p $priming.position
+	#end if
+		
+    </command>
+
+    <inputs>
+        <param format="bam" name="input1" type="data" label="Aligned Reads" help="BAM format." />
+	<param format="tabular" name="input2" type="data" label="Barcodes" help="Produced by Debarcoding tool." />
+	<conditional name="priming"> 
+	    <param name="flag" type="boolean" checked="False" truevalue="True" falsevalue="False" label="Set priming position" help="Set all reads to have a specific priming position." />	
+	    <when value="True">
+	        <param name="position" type="integer" value="-1" label="Priming position" />
+	    </when>
+	</conditional>
+    </inputs>
+
+    <outputs>
+        <data format="tabular" name="trimming_stats" label="${tool.name} on ${on_string}: Trimming stats" from_work_dir="output_dir/trimming_stats.txt" />
+        <data format="tabular" name="summary" label="${tool.name} on ${on_string}: Summary" from_work_dir="output_dir/summary.txt" />
+        <data format="tabular" name="unique_barcodes" label="${tool.name} on ${on_string}: Unique Barcodes" from_work_dir="output_dir/unique_barcodes.txt" />
+        <data format="tabular" name="read_counts" label="${tool.name} on ${on_string}: Read Counts" from_work_dir="output_dir/read_counts.txt" />
+    </outputs>
+    
+    <tests>
+	<test>
+	    <param name="input1" value="sample.bam" />
+	    <param name="input2" value="barcodes.txt" />
+	    <output name="trimming_stats" file="trimming_stats.txt" />
+	    <output name="summary" file="summary.txt" />
+	    <output name="unique_barcodes" file="unique_barcodes.txt" />
+	    <output name="read_counts" file="read_counts.txt" />
+	</test>
+    </tests>
+
+    <help>
+    </help>
+</tool>
diff -r 000000000000 -r 3ecde8c1bd83 hrf-seq/preprocessing.sh
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hrf-seq/preprocessing.sh	Thu Sep 11 08:21:15 2014 -0400
@@ -0,0 +1,116 @@
+#!/bin/bash
+
+#Preprocessing workflow - trimming adapters, barcodes etc
+
+# Barcode in the oligo used: NWTRYSNNNN
+
+# Which means that each proper read must begin with: NNNN(C|G)(A|G)(C|T)A(A|T)N
+
+# As regex:
+
+# ^[ACGT][ACGT][ACGT][ACGT][CG][AG][CT][A][AT][ACGT]
+
+
+READ1=${1}
+READ2=${2}
+BARCODE=$3
+ADAPTER1=$4
+ADAPTER2=$5
+CUTOFF=$6
+CUTADAPT_LOG=${7}
+TRIM_LENGTH=$8
+OVERLAP_LENGTH=$9
+OUT_R1=${10}
+OUT_R2=${11}
+OUT_BARCODES=${12}
+
+BAR_LENGTH=`eval echo ${#BARCODE}`
+
+#########################################################################################
+
+# Reverse complement the barcode sequence and create a regular expression.
+
+REV_COMPLEMENT=$(perl -0777ne's/\n //g; tr/ATGCatgcNnYyRrKkMmBbVvDdHh/TACGtacgNnRrYyMmKkVvBbHhDd/; print scalar reverse $_;' <(echo $BARCODE))
+
+REG_EXP=$(sed 's/A/[A]/g;s/C/[C]/g;s/G/[G]/g;s/T/[T]/g;s/R/[AG]/g;s/Y/[CT]/g;s/S/[GC]/g;s/W/[AT]/g;s/K/[GT]/g;s/M/[AC]/g;s/B/[CGT]/g;s/D/[AGT]/g;s/H/[ACT]/g;s/V/[ACG]/g;s/N/[AGCT]/g;' <<< $REV_COMPLEMENT)
+
+########################################################################
+#1. Remove all the reads that do not start with the signature 
+
+# (first awk removes them, second awk removes corresponding quality strings) followed by cutadapt. 
+
+# After cutadapt, remove last 15 nt - may be derived from the random primer [should be optional, as the user may: 1) use different random primer length, 2) have short reasd and would lose too much info] and remove all the reads that are shorter than 30 nt 
+
+# (10 nt barcode + 20 nt for mapping)
+
+echo -e '------------------------Read1------------------------\n' > $CUTADAPT_LOG
+
+awk '{if(NR%4==2){if(/^'"$REG_EXP"'/){print}else{print ""}}else{print}}' $READ1 | 
+
+awk 'BEGIN{trim_flag=0; trimming_stats=0; all_processed=0}
+{
+    if(NR%4==1){print; all_processed++}
+    if(NR%4==2){if(length($1)==0){trim_flag=1;trimming_stats++}else{trim_flag=0};print}
+    if(NR%4==3){print}
+    if(NR%4==0){if(trim_flag==1){print ""}else{print $0}}
+}END{print(trimming_stats, all_processed) > "trimming_stats.error"}' | 
+
+cutadapt -a $ADAPTER1 -q $CUTOFF --format=fastq -O $OVERLAP_LENGTH - 2>>$CUTADAPT_LOG |
+
+awk -v len="${TRIM_LENGTH}" '{if(NR%2==0){print(substr($1,0,length($1)-len))}else{print}}' | 
+
+awk -v len="${BAR_LENGTH}" '{if(NR%2==0 && length($1)<20+len){printf("\n")}else{print}}' | gzip  > R1.fastq.gz &
+
+wait
+
+########################################################################
+#2. Trim the adapter, primer and possible random barcode from the second read
+
+echo -e '------------------------Read2------------------------\n' >> $CUTADAPT_LOG
+
+cutadapt -a $ADAPTER2 -q $CUTOFF --format=fastq -O $OVERLAP_LENGTH $READ2 2>>$CUTADAPT_LOG | 
+
+awk -v len1="${TRIM_LENGTH}" -v len2="${BAR_LENGTH}" '{if(NR%2==0){print(substr($0,len1+1,(length($0)-len1-len2)))}else{print($0)}}' - |
+
+awk '{if(NR%2==0 && length($1)<20){printf("\n")}else{print}}' | gzip > R2.fastq.gz &
+
+wait
+
+########################################################################
+#3. Remove empty reads - remove each pair from for which at least one read of the pair got removed (they are problematic for tophat mapping)
+
+#first define which lines to keep from both fastq files (k for keep, d for discard in the lines_to_keep file)
+
+paste <(zcat R1.fastq.gz) <(zcat R2.fastq.gz) | awk 'BEGIN{OFS="\n"}{if(NR%4==2 && NF==2){print("k","k","k","k")}else{if(NR%4==2 && NF<2){print("d","d","d","d")}}}' > lines_to_keep
+
+paste lines_to_keep <(zcat R1.fastq.gz) | awk '{if($1=="k")print($2,$3)}' | gzip > R1_readsANDbarcodes.fastq.gz &
+
+paste lines_to_keep <(zcat R2.fastq.gz) | awk '{if($1=="k")print($2,$3)}' > $OUT_R2 &
+
+wait
+
+########################################################################
+#4. Extract the barcode sequence from the first read:
+zcat R1_readsANDbarcodes.fastq.gz | awk -v len="${BAR_LENGTH}" '{if(NR%2==0 && length($1)<20+len){printf("\n")}else{if(NR%2==0){print(substr($0,len+1,length($0)))}else{print($0)}}}' > $OUT_R1 &
+
+zcat R1_readsANDbarcodes.fastq.gz | awk -v len="${BAR_LENGTH}" '{if(NR%4==1){print($1)}else{if(NR%4==2){print(substr($0,0,len))}}}' | paste - - > ${OUT_BARCODES} &
+
+wait 
+
+########################################################################
+#6. Remove temporary fastq files"
+
+rm R1.fastq.gz R2.fastq.gz R1_readsANDbarcodes.fastq.gz
+rm lines_to_keep
+
+########################################################################
+#7. problem! Spaces added at the end of the strings in fastq files (AWK induced). I will remove them:
+
+mv $OUT_R1 R1.temp.fastq
+mv $OUT_R2 R2.temp.fastq
+
+awk '{print($1)}' R1.temp.fastq > $OUT_R1 &
+awk '{print($1)}' R2.temp.fastq > $OUT_R2 &
+
+wait
+rm R?.temp.fastq
diff -r 000000000000 -r 3ecde8c1bd83 hrf-seq/preprocessing.xml
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hrf-seq/preprocessing.xml	Thu Sep 11 08:21:15 2014 -0400
@@ -0,0 +1,61 @@
+<tool id="hrf_preprocessing" version="0.1" name="Preprocessing" force_history_refresh="True">
+    <description>HRF-Seq raw reads</description>
+    <requirements>
+        <requirement type="package" version="1.4.2">cutadapt</requirement>
+	<requirement type="package" version="0.1.18">samtools</requirement>
+	<requirement type="package" version="2.1.0">bowtie2</requirement>
+    </requirements>
+
+    <command interpreter="bash">
+        preprocessing.sh
+
+	## Inputs
+	$input1 $input2
+
+	## Barcode sequence
+	$barcode_seq
+
+	## Cutadapt options
+	$adapter1 $adapter2 $quality_cutoff $log
+	
+	## Trimming length
+	$trim 
+	
+	## Minimum overlap length
+	$overlap_length
+
+	## Outputs
+	$output1 $output2 $barcodes
+
+    </command>
+
+    <inputs>
+        <param format="fastqsanger" name="input1" type="data" label="FASTQ file (read 1)" help="Must have Sanger-scaled quality values (fastqsanger)." />
+        <param format="fastqsanger" name="input2" type="data" label="FASTQ file (read 2)" help="Must have Sanger-scaled quality values (fastqsanger)." />
+
+	<param name="barcode_seq" type="text" size="20" label="Barcode sequence" help="Reads that do not start with the signature will be removed. Use IUPAC alphabet, e.g. NNNNXRTYNN as in the randomized part of the ligation adapter." />
+
+	<param name="adapter1" type="text" size="40" value="AGATCGGAAGAGCACACGTCT" label="Read 1 3' adapter." help="Reverse complement of an adapter introduced in the reverse transcription, excluding gene-specific or random region. The adapter itself and anything that follows is trimmed." />
+
+        <param name="adapter2" type="text" size="40" value="AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT" label="Read 2 3' adapter." help="Sequence of an adapter that was ligated to the cDNA 3' end, excluding random barcode sequence. The adapter itself and anything that follows is trimmed." />
+
+        <param name="quality_cutoff" type="integer" min="0" optional="true" value="17" label="Quality cutoff" help="Trim low-quality ends from reads before adapter removal. The algorithm is the same as the one used by BWA (Subtract CUTOFF from all qualities; compute partial sums from all indices to the end of the sequence; cut sequence at the index at which the sum is minimal). Value of 0 means no quality trimming." />
+
+	<param name="trim" type="integer" min="0" optional="true" value="15" label="3' trimming length" help="Number of random bases for random priming, will be removed as they are likely to differ from a template." />
+
+        <param name="overlap_length" type="integer" min="1" optional="true" value="3" label="Minimum overlap length" help="If the overlap between the read and the adapter is shorter than LENGTH, the read is not modified. This reduces the no. of bases trimmed purely due to short random adapter matches." />
+
+    </inputs>
+
+    <outputs>
+	<data format="fastqsanger" name="output1" label="${tool.name} on ${on_string}: Read 1" />
+	<data format="fastqsanger" name="output2" label="${tool.name} on
+ ${on_string}: Read 2" />
+	<data format="tabular" name="barcodes" label="${tool.name} on ${on_string}: Barcodes" />
+	<data format="txt" name="log" label="${tool.name} on ${on_string}: Cutadapt log" />
+    </outputs>
+
+    <help>
+    </help>
+
+</tool>
diff -r 000000000000 -r 3ecde8c1bd83 hrf-seq/rna_probing_coverage.R
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hrf-seq/rna_probing_coverage.R	Thu Sep 11 08:21:15 2014 -0400
@@ -0,0 +1,138 @@
+## Setup R error handling to go to stderr
+options( show.error.messages=F, error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } )
+
+# we need that to not crash galaxy with an UTF8 error on German LC settings.
+#Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")
+
+suppressMessages(library('getopt'))
+
+options(stringAsfactors = FALSE, useFancyQuotes = FALSE)
+args <- commandArgs(trailingOnly = TRUE)
+
+#get options, using the spec as defined by the enclosed list.
+#we read the options from the default: commandArgs(TRUE).
+spec = matrix(c(
+  'verbose', 'v', 2, "integer",
+  'help' , 'h', 0, "logical",
+  'sample', 's', 1, "character",
+  'control', 'c', 2, "character",
+  'window', 'w', 1, "integer",
+  'ball', 'b', 1, "double",
+  'peak', 'p', 1, "integer",
+  'output', 'o', 1, "character",
+  'counts', 'x', 1, "character",
+  'ratio', 'y', 1, "character",
+  'priming', 'z', 1, "character",
+  'plots', 'l', 2, "character"
+  ), byrow=TRUE, ncol=4);
+
+opt = getopt(spec);
+
+# if help was asked for print a friendly message
+# and exit with a non-zero error code
+if ( !is.null(opt$help) ) {
+  cat(getopt(spec, usage=TRUE));
+  q(status=1);
+}
+
+
+#This is streamlined protocol that works only with reads mapped to one RNA molecule (16S rRNA) #For plot used in NAR preparation (without two bottom plots) see under many #'s
+
+suppressMessages(library('GenomicRanges'));
+#setwd("/home/nikos/rna_probing_galaxy")
+
+#Inputs:
+#Options:
+window_size <- opt$window
+ball_size <- opt$ball
+peak <- opt$peak
+
+#Initialize variables:
+counts <- list() 							
+gene_coverage <- c()
+gene_counts <- c()
+gene_ratio <- c()
+gene_priming <- c()
+#End of initializing
+
+####Read in datasets:
+counts[[1]] <- read.table( opt$sample )
+
+#if ( !is.null( opt$control ) ) {
+#    counts[[2]] <- read.table( opt$control )
+#}
+#####
+
+#Filter out the short inserts not to deal with size selection correctors.
+for(i in seq(1,length(counts))){
+  counts[[i]] <- subset(counts[[i]], (counts[[i]][,3]-counts[[i]][,2])>=peak) 
+}
+
+############Initilize matrices for storing data:
+counter <-1
+for(i in seq(1,length(counts))){
+  temp_storage <- counts[[i]]
+  gene_coverage <- matrix(nrow=max(c(temp_storage[,3], nrow(gene_coverage))), ncol=counter)
+  gene_counts <- matrix(nrow=max(c(temp_storage[,3], nrow(gene_counts))), ncol=counter)
+  gene_priming <- matrix(nrow=max(c(temp_storage[,3], nrow(gene_priming))), ncol=counter)
+  gene_ratio <- matrix(nrow=max(c(temp_storage[,3], nrow(gene_ratio))), ncol=counter)
+  counter <- counter+1
+}
+###########End of Initilize matrices for storing data
+
+##################
+##Fill in the matrices with data 
+counter <-1
+for(i in seq(1,length(counts))){
+  temp_storage <- counts[[i]]
+  cover <- coverage(IRanges(start=temp_storage[,2], end=(temp_storage[,3]-peak)), weight=temp_storage[,4])
+  gene_coverage[1:length((rep(runValue(cover), runLength(cover)))),counter] <- c((rep(runValue(cover), runLength(cover))))
+  stopping_reads <- aggregate(temp_storage[,4]~temp_storage[,2],temp_storage, sum)
+  gene_counts[stopping_reads[,1],counter] <- stopping_reads[,2]
+  gene_counts[is.na(gene_counts)] <- 0
+  priming_reads <- aggregate(temp_storage[,4]~temp_storage[,3],temp_storage, sum)
+  gene_priming[priming_reads[,1],counter] <- priming_reads[,2]
+  gene_priming[is.na(gene_priming)] <- 0
+  counter <- counter+1
+}
+
+#End of filling in
+gene_ratio <- gene_counts/gene_coverage
+
+###Export to Galaxy
+data <- data.frame(gene_counts, gene_coverage, gene_priming, gene_ratio )
+colnames(data) <- c("Read counts", "Effective Coverage EUC", "Termination EUC", "TCR")
+
+write.table( data, opt$output, sep = "\t", quote = F, row.names = F)
+
+
+#Return plots
+if ( !is.null(opt$plots) ) {
+  pdf(opt$plots)
+ 
+  # Termination signal
+  plot(gene_counts, type= 'l', main = "Termination signal", ylab = "", xlab = "Position")
+
+  # Priming signal
+  plot(gene_priming, type= 'l', main = "Priming signal", ylab = "", xlab = "Position")
+  # Effective Coverage
+
+  y <- gene_coverage
+  y[is.na(y)] <- 0
+  x <- seq_along(y)
+  y2 <- rep(y, each=2)
+  y2 <- y2[-length(y2)]
+  x2 <- rep(x, each=2)[-1]
+  x3 <- c(min(x2), x2, max(x2))
+  y3 <- c(0, y2, 0)
+  
+  # because polygon() is dumb and wants a pre-existing plot
+  plot(x, y, ylim=c(0, max(y)), type="n", main = "Effective Coverage", ylab = "", xlab = "Position")
+  
+  polygon(x3, y3, border=NA, col="black")
+  lines(x2, y2)
+  
+  # Termination Coverage Ratio
+  plot(gene_ratio, type= 'l', main = "Termination Coverage Ratio", ylab = "", xlab = "Position")
+  dump <- dev.off()
+}
diff -r 000000000000 -r 3ecde8c1bd83 hrf-seq/rna_probing_coverage.xml
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hrf-seq/rna_probing_coverage.xml	Thu Sep 11 08:21:15 2014 -0400
@@ -0,0 +1,61 @@
+<tool id="probing_coverage" version="0.1" name="Calculate coverage and TCR" force_history_refresh="True">
+    <description>(Termination Coverage Ratio) on HRF-Seq data</description>
+    <command interpreter="Rscript"> 
+                rna_probing_coverage.R 
+
+		## Inputs
+		--sample '$input1' 
+		
+		##--control '$input2'
+	
+		## Plotting
+		#if $pdf:
+			--plots '$plots' 
+		#end if
+
+		## Options		
+		#if str( $params.full) == "yes":
+
+			-p $params.peak
+			-b $params.ball
+			-w $params.window
+		#else 
+			-p 100
+			-b 4
+			-w 1
+		#end if 
+
+		## Output
+		-o '$output'  
+    </command>
+
+    <inputs>
+        <param format="tabular" name="input1" type="data" label="Read Count file" help="Read counts output from Estimate Unique Counts tool." />
+<!--	<param format="tabular" name="input2" type="data" label="Control sample Read Count Input" help="Read counts output from RNA-probing Count tool." />
+-->
+	<param name="pdf" type="boolean" truevalue="" falsevalue="" checked="true" label="Produce plots" help="Outputs a PDF file." />
+
+
+	<conditional name="params"> 
+	    <param name="full" type="select" label="Parameter Settings" help="You can use the default settings or set custom values suited to your experiment." >	
+              <option value="no">Use defaults</option>
+              <option value="yes">Full parameter list</option>
+ 	    </param>
+	    <when value="yes">
+		<param name="peak" type="integer" value="100" label="Size selection" help="Filter out the short inserts." />
+		<param name="ball" type="float" value="4" label="Solvent radius" help="Used for ribose accessibility measurment. Must be precomputed!!!" />
+		<param name="window" type="integer" value="1" label="Window size" help="Smoothening parameter. Effective size is (window_size * 2 + 1" />
+	    </when>
+	    <when value="no" />
+	</conditional>
+    </inputs>
+
+    <outputs>
+        <data format="tabular" name="output" label="${tool.name} on ${on_string}" />
+        <data format="pdf" name="plots" label="${tool.name} on ${on_string}: Plots" >
+	  <filter>pdf is True</filter>
+	</data>
+        </outputs>
+<help>
+</help>
+</tool>
diff -r 000000000000 -r 3ecde8c1bd83 hrf-seq/tool_dependencies.xml
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hrf-seq/tool_dependencies.xml	Thu Sep 11 08:21:15 2014 -0400
@@ -0,0 +1,26 @@
+<?xml version="1.0"?>
+<tool_dependency>
+    <package name="cutadapt" version="1.4.2">
+        <install version="1.0">
+            <actions>
+                <action type="download_by_url">http://pypi.python.org/packages/source/c/cutadapt/cutadapt-1.4.2.tar.gz</action>
+                <action type="shell_command">python setup.py install --home $INSTALL_DIR --install-scripts $INSTALL_DIR/bin</action>
+                <action type="set_environment">
+                    <environment_variable action="append_to" name="PYTHONPATH">$INSTALL_DIR/lib/python</environment_variable>
+                    <environment_variable action="prepend_to" name="PATH">$INSTALL_DIR/bin</environment_variable>
+                </action>
+            </actions>
+        </install>
+        <readme>
+        </readme>
+    </package>
+    <package name="bowtie2" version="2.1.0">
+        <repository changeset_revision="017a00c265f1" name="package_bowtie2_2_1_0" owner="devteam" prior_installation_required="False" toolshed="http://toolshed.g2.bx.psu.edu" />
+    </package>
+    <package name="samtools" version="0.1.18">
+        <repository changeset_revision="171cd8bc208d" name="package_samtools_0_1_18" owner="devteam" prior_installation_required="False" toolshed="http://toolshed.g2.bx.psu.edu" />
+    </package>
+    <package name="R_3_0_2" version="3.0.2">
+        <repository changeset_revision="b6fe8ca3230d" name="package_r_3_0_2" owner="iuc" toolshed="https://testtoolshed.g2.bx.psu.edu" />
+    </package>
+</tool_dependency>