diff vst_transformation.pl @ 0:a48dde9abecf draft

Uploaded initial version
author joachim-jacob
date Wed, 17 Jul 2013 04:06:12 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/vst_transformation.pl	Wed Jul 17 04:06:12 2013 -0400
@@ -0,0 +1,240 @@
+#!/usr/bin/perl
+# vst_transformation.pl
+# Joachim Jacob - joachim.jacob@vib.be - 2013
+
+use strict;
+use File::Basename;
+use Log::Log4perl qw(:easy);
+
+# ---------------------- Prepping Logging -----------------------------#
+########################################################################
+# Log levels: 			$DEBUG	$INFO	$WARN	$ERROR	$FATAL
+# ConversionPattern:	%d %-5p %F{1} [%M] (line %L): %m%n%n
+my $log_conf = q/ 
+    log4perl.category = ERROR, Screen 
+    log4perl.appender.Screen.stderr=1
+    log4perl.appender.Screen.layout=Log::Log4perl::Layout::PatternLayout
+    log4perl.appender.Screen.layout.ConversionPattern = %d %-5p %m%n
+    log4perl.appender.Screen = Log::Log4perl::Appender::Screen 
+/;
+
+Log::Log4perl::init( \$log_conf );
+my $logger = get_logger();
+
+# ----------------- Getting parameters file ---------------------------#
+########################################################################
+my ($parameters) = @ARGV;
+my (%para, @repeat_blocks);
+open(PARAMETERS,"<$parameters");
+while (<PARAMETERS>) {
+	if (/(\S+)==(.+)$/){ 
+		$para{$1} = $2;
+	}
+}
+close(PARAMETERS);
+
+=Excerpt Config parameters
+		counttable=$counttable
+        outtab==$outttab
+        html_file==$html_file
+
+        ##REPEAT BLOCK
+        #for $i, $s in enumerate( $replicates )
+        replicates_$i==$s.repl_group_name
+        replicates_$i==$s.rep_columns
+        #end for
+        
+        replicates_$s.repl_group_name==$s.rep_columns
+
+=cut
+
+for my $para (keys %para){
+	INFO "$para==$para{$para}";
+}
+
+# --------------------- KNITR SCRIPT ----------------------------------#
+########################################################################
+# This script compile the HTML output from the R Markdown script below.
+my $htmloutput=$para{'html_file'};
+open(KNITRSCRIPT , ">" , "knitrscript.r");
+print KNITRSCRIPT <<EOF;
+require(knitr)
+require(markdown)
+knit("rmarkdownscript.rmd", "temp.md")
+markdownToHTML("temp.md", "$htmloutput")
+EOF
+
+# -------------------- R MARKDOWN SCRIPT ------------------------------#
+########################################################################
+open(RMARKDOWN , ">>" , "rmarkdownscript.rmd");
+
+## Initiating R environment
+## ---------------------------------------------------------------------
+print  RMARKDOWN <<EOF;
+VST transformation
+================================
+
+Loading the R environment
+-------------------------
+
+```{r initiating environment, message=FALSE, highlight=TRUE, results="hide"}
+library('DESeq')
+library(reshape)
+library(ggplot2)
+```
+----
+EOF
+
+## Reading input
+## Reshaping parameters
+## ---------------------------------------------------------------------
+my ($col_names,$row_names, $columns, $repnames, $user_col_names)=("","","","","");
+
+# Getting the column numbers and replicate names
+for my $parameter (keys %para){
+	if ($parameter =~ /^replicates_(\S+)$/ ) {
+		my $rep_name=$1;
+		my @cols = split(",",$para{$parameter});
+		# we need to subtract one from the column number if
+		# an column with identifiers is present.
+		# This column will be removed from the data when reading in.
+		if ( $para{'header_columns'} eq 'TRUE' ){
+			foreach (@cols){
+				my $colnum = $_-1;		
+				$columns.= $colnum.",";
+			}
+		} else {
+			$columns.=$para{$parameter}.","; 
+		}
+		# We fill a vector of replicate names as many as there are columns
+		$repnames.= "\"$rep_name\"," x @cols;
+	}
+}
+chop $columns;
+chop $repnames;
+
+# If header column is present, this column contains the names of the rows.
+if ( $para{'header_columns'} eq 'TRUE' ) {
+	$row_names = ' row.names = 1, '
+} 
+
+# If the first row contains the column names? 
+if ( $para{'header_row'} eq 'TRUE' ) {
+	$col_names = ' header=T,  ';
+} else { 
+	$user_col_names = " col.names=c(";
+	if ( $para{'header_columns'} eq 'TRUE' ) { $user_col_names .= '"ID",'; }
+	$user_col_names .= " $repnames), ";
+}
+
+print RMARKDOWN <<EOF;
+Reading input
+-------------
+```{r reading input, message=FALSE, highlight=TRUE}
+filename="$para{'counttable'}"
+raw_counts = read.csv(filename, sep="\t", $col_names $row_names stringsAsFactors=F)
+raw_counts_deseqct = newCountDataSet(raw_counts[,c($columns)], factor(c($repnames)))
+```
+----
+EOF
+
+## Applying variance stabilizing transformation
+## ---------------------------------------------------------------------
+print RMARKDOWN <<EOF;
+Performing VST transformation
+-----------------------------
+```{r vst on raw counts, message=FALSE, highlight=TRUE}
+raw_counts_deseqct = newCountDataSet(raw_counts[,c($columns)], factor(c($repnames)))
+raw_counts_deseqct = estimateSizeFactors(raw_counts_deseqct)
+raw_counts_deseqctBlind = estimateDispersions(raw_counts_deseqct,method="blind")
+vsd = varianceStabilizingTransformation(raw_counts_deseqctBlind)
+vst_raw_counts = exprs(vsd)
+```
+----
+EOF
+
+## Plotting and writing results
+## ---------------------------------------------------------------------
+my ($colnames_out, $gg_legend, $hist_title)=("","","");
+if($para{'header_row'} eq 'TRUE' ) {
+	$colnames_out="col.names=NA , ";
+	$hist_title='main=paste("Sample",sample),';
+} else {
+	$colnames_out="col.names=FALSE , "; 
+	$gg_legend= '+ theme(legend.position="none") ';
+	$hist_title='main="Histogram VST counts", ';
+}
+
+print  RMARKDOWN <<EOF;
+Histograms
+----------
+```{r plotting and writing, message=FALSE, echo=FALSE}
+write.table( 	vst_raw_counts, file = "$para{'outtab'}", 
+	sep = "\\t", quote = F, $colnames_out 	row.names=$para{'header_columns'}
+)
+vst_raw_counts_melted = melt(vst_raw_counts)
+colnames(vst_raw_counts_melted)=c("ID","samples","value")
+ggplot( data=vst_raw_counts_melted, aes(value,colour=samples)) + geom_density(alpha=0.2) + labs(x="VST of counts") $gg_legend
+for(sample in colnames(vst_raw_counts)){ 
+	hist(vst_raw_counts[,colnames(vst_raw_counts)==sample], 
+	$hist_title breaks=80, xlab="VST counts" )
+}
+```
+----
+
+EOF
+
+## Plotting and writing results out
+## ---------------------------------------------------------------------
+print  RMARKDOWN <<EOF;
+Logging R environment
+---------------------
+```{r for the log, highlight=FALSE}
+sessionInfo()
+```
+----
+EOF
+
+# -------------------- Logging scripts --------------------------------#
+########################################################################
+open(my $knitrscript,"<","knitrscript.r");
+open(my $rmarkdownscript,"<","rmarkdownscript.rmd");
+while(<$knitrscript>){ 
+	INFO $_;
+}
+close($knitrscript);
+close($rmarkdownscript);
+
+
+# -------------------- Assembling command  ----------------------------#
+########################################################################
+my $command = "R CMD BATCH knitrscript.r";   # to execute
+
+# --------------------- Executing command  ----------------------------#
+########################################################################
+run_process($command, "Knitting RMarkdown script");
+
+# ------------------- Cleaning up and exiting  ------------------------#
+########################################################################
+$command = "rm -rf temp.md knitrscript.r knitrscript.r.Rout rmarkdownscript.rmd";
+run_process($command, "Cleaning up");
+exit 0;
+
+### 					      Subroutines 						     ###
+########################################################################
+sub run_process {
+	my ($command, $name)= @_;
+	my $logger = get_logger();
+	INFO "Launching process\n    $command\n";
+	system("$command 2>/dev/null") == 0 or die "$name failed\nExit status $?\nCommand: $command";
+	if ($? == -1) {
+		print "failed to execute: $!\n";
+	} elsif ($? & 127) {
+		printf "child died with signal %d, %s coredump\n", ($? & 127), ($? & 128) ? 'with' : 'without';
+	} else {
+		printf "$name executed successfully\n", $? >> 8;
+	}	
+}
+
+
+