view vst_transformation.pl @ 1:7d7d6c743df0 draft default tip

Uploaded corrected tool dependencies.xml
author joachim-jacob
date Wed, 17 Jul 2013 04:07:52 -0400
parents a48dde9abecf
children
line wrap: on
line source

#!/usr/bin/perl
# vst_transformation.pl
# Joachim Jacob - joachim.jacob@vib.be - 2013

use strict;
use File::Basename;
use Log::Log4perl qw(:easy);

# ---------------------- Prepping Logging -----------------------------#
########################################################################
# Log levels: 			$DEBUG	$INFO	$WARN	$ERROR	$FATAL
# ConversionPattern:	%d %-5p %F{1} [%M] (line %L): %m%n%n
my $log_conf = q/ 
    log4perl.category = ERROR, Screen 
    log4perl.appender.Screen.stderr=1
    log4perl.appender.Screen.layout=Log::Log4perl::Layout::PatternLayout
    log4perl.appender.Screen.layout.ConversionPattern = %d %-5p %m%n
    log4perl.appender.Screen = Log::Log4perl::Appender::Screen 
/;

Log::Log4perl::init( \$log_conf );
my $logger = get_logger();

# ----------------- Getting parameters file ---------------------------#
########################################################################
my ($parameters) = @ARGV;
my (%para, @repeat_blocks);
open(PARAMETERS,"<$parameters");
while (<PARAMETERS>) {
	if (/(\S+)==(.+)$/){ 
		$para{$1} = $2;
	}
}
close(PARAMETERS);

=Excerpt Config parameters
		counttable=$counttable
        outtab==$outttab
        html_file==$html_file

        ##REPEAT BLOCK
        #for $i, $s in enumerate( $replicates )
        replicates_$i==$s.repl_group_name
        replicates_$i==$s.rep_columns
        #end for
        
        replicates_$s.repl_group_name==$s.rep_columns

=cut

for my $para (keys %para){
	INFO "$para==$para{$para}";
}

# --------------------- KNITR SCRIPT ----------------------------------#
########################################################################
# This script compile the HTML output from the R Markdown script below.
my $htmloutput=$para{'html_file'};
open(KNITRSCRIPT , ">" , "knitrscript.r");
print KNITRSCRIPT <<EOF;
require(knitr)
require(markdown)
knit("rmarkdownscript.rmd", "temp.md")
markdownToHTML("temp.md", "$htmloutput")
EOF

# -------------------- R MARKDOWN SCRIPT ------------------------------#
########################################################################
open(RMARKDOWN , ">>" , "rmarkdownscript.rmd");

## Initiating R environment
## ---------------------------------------------------------------------
print  RMARKDOWN <<EOF;
VST transformation
================================

Loading the R environment
-------------------------

```{r initiating environment, message=FALSE, highlight=TRUE, results="hide"}
library('DESeq')
library(reshape)
library(ggplot2)
```
----
EOF

## Reading input
## Reshaping parameters
## ---------------------------------------------------------------------
my ($col_names,$row_names, $columns, $repnames, $user_col_names)=("","","","","");

# Getting the column numbers and replicate names
for my $parameter (keys %para){
	if ($parameter =~ /^replicates_(\S+)$/ ) {
		my $rep_name=$1;
		my @cols = split(",",$para{$parameter});
		# we need to subtract one from the column number if
		# an column with identifiers is present.
		# This column will be removed from the data when reading in.
		if ( $para{'header_columns'} eq 'TRUE' ){
			foreach (@cols){
				my $colnum = $_-1;		
				$columns.= $colnum.",";
			}
		} else {
			$columns.=$para{$parameter}.","; 
		}
		# We fill a vector of replicate names as many as there are columns
		$repnames.= "\"$rep_name\"," x @cols;
	}
}
chop $columns;
chop $repnames;

# If header column is present, this column contains the names of the rows.
if ( $para{'header_columns'} eq 'TRUE' ) {
	$row_names = ' row.names = 1, '
} 

# If the first row contains the column names? 
if ( $para{'header_row'} eq 'TRUE' ) {
	$col_names = ' header=T,  ';
} else { 
	$user_col_names = " col.names=c(";
	if ( $para{'header_columns'} eq 'TRUE' ) { $user_col_names .= '"ID",'; }
	$user_col_names .= " $repnames), ";
}

print RMARKDOWN <<EOF;
Reading input
-------------
```{r reading input, message=FALSE, highlight=TRUE}
filename="$para{'counttable'}"
raw_counts = read.csv(filename, sep="\t", $col_names $row_names stringsAsFactors=F)
raw_counts_deseqct = newCountDataSet(raw_counts[,c($columns)], factor(c($repnames)))
```
----
EOF

## Applying variance stabilizing transformation
## ---------------------------------------------------------------------
print RMARKDOWN <<EOF;
Performing VST transformation
-----------------------------
```{r vst on raw counts, message=FALSE, highlight=TRUE}
raw_counts_deseqct = newCountDataSet(raw_counts[,c($columns)], factor(c($repnames)))
raw_counts_deseqct = estimateSizeFactors(raw_counts_deseqct)
raw_counts_deseqctBlind = estimateDispersions(raw_counts_deseqct,method="blind")
vsd = varianceStabilizingTransformation(raw_counts_deseqctBlind)
vst_raw_counts = exprs(vsd)
```
----
EOF

## Plotting and writing results
## ---------------------------------------------------------------------
my ($colnames_out, $gg_legend, $hist_title)=("","","");
if($para{'header_row'} eq 'TRUE' ) {
	$colnames_out="col.names=NA , ";
	$hist_title='main=paste("Sample",sample),';
} else {
	$colnames_out="col.names=FALSE , "; 
	$gg_legend= '+ theme(legend.position="none") ';
	$hist_title='main="Histogram VST counts", ';
}

print  RMARKDOWN <<EOF;
Histograms
----------
```{r plotting and writing, message=FALSE, echo=FALSE}
write.table( 	vst_raw_counts, file = "$para{'outtab'}", 
	sep = "\\t", quote = F, $colnames_out 	row.names=$para{'header_columns'}
)
vst_raw_counts_melted = melt(vst_raw_counts)
colnames(vst_raw_counts_melted)=c("ID","samples","value")
ggplot( data=vst_raw_counts_melted, aes(value,colour=samples)) + geom_density(alpha=0.2) + labs(x="VST of counts") $gg_legend
for(sample in colnames(vst_raw_counts)){ 
	hist(vst_raw_counts[,colnames(vst_raw_counts)==sample], 
	$hist_title breaks=80, xlab="VST counts" )
}
```
----

EOF

## Plotting and writing results out
## ---------------------------------------------------------------------
print  RMARKDOWN <<EOF;
Logging R environment
---------------------
```{r for the log, highlight=FALSE}
sessionInfo()
```
----
EOF

# -------------------- Logging scripts --------------------------------#
########################################################################
open(my $knitrscript,"<","knitrscript.r");
open(my $rmarkdownscript,"<","rmarkdownscript.rmd");
while(<$knitrscript>){ 
	INFO $_;
}
close($knitrscript);
close($rmarkdownscript);


# -------------------- Assembling command  ----------------------------#
########################################################################
my $command = "R CMD BATCH knitrscript.r";   # to execute

# --------------------- Executing command  ----------------------------#
########################################################################
run_process($command, "Knitting RMarkdown script");

# ------------------- Cleaning up and exiting  ------------------------#
########################################################################
$command = "rm -rf temp.md knitrscript.r knitrscript.r.Rout rmarkdownscript.rmd";
run_process($command, "Cleaning up");
exit 0;

### 					      Subroutines 						     ###
########################################################################
sub run_process {
	my ($command, $name)= @_;
	my $logger = get_logger();
	INFO "Launching process\n    $command\n";
	system("$command 2>/dev/null") == 0 or die "$name failed\nExit status $?\nCommand: $command";
	if ($? == -1) {
		print "failed to execute: $!\n";
	} elsif ($? & 127) {
		printf "child died with signal %d, %s coredump\n", ($? & 127), ($? & 128) ? 'with' : 'without';
	} else {
		printf "$name executed successfully\n", $? >> 8;
	}	
}