Mercurial > repos > joachim-jacob > vstranformation
diff vst_transformation.pl @ 0:a48dde9abecf draft
Uploaded initial version
author | joachim-jacob |
---|---|
date | Wed, 17 Jul 2013 04:06:12 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/vst_transformation.pl Wed Jul 17 04:06:12 2013 -0400 @@ -0,0 +1,240 @@ +#!/usr/bin/perl +# vst_transformation.pl +# Joachim Jacob - joachim.jacob@vib.be - 2013 + +use strict; +use File::Basename; +use Log::Log4perl qw(:easy); + +# ---------------------- Prepping Logging -----------------------------# +######################################################################## +# Log levels: $DEBUG $INFO $WARN $ERROR $FATAL +# ConversionPattern: %d %-5p %F{1} [%M] (line %L): %m%n%n +my $log_conf = q/ + log4perl.category = ERROR, Screen + log4perl.appender.Screen.stderr=1 + log4perl.appender.Screen.layout=Log::Log4perl::Layout::PatternLayout + log4perl.appender.Screen.layout.ConversionPattern = %d %-5p %m%n + log4perl.appender.Screen = Log::Log4perl::Appender::Screen +/; + +Log::Log4perl::init( \$log_conf ); +my $logger = get_logger(); + +# ----------------- Getting parameters file ---------------------------# +######################################################################## +my ($parameters) = @ARGV; +my (%para, @repeat_blocks); +open(PARAMETERS,"<$parameters"); +while (<PARAMETERS>) { + if (/(\S+)==(.+)$/){ + $para{$1} = $2; + } +} +close(PARAMETERS); + +=Excerpt Config parameters + counttable=$counttable + outtab==$outttab + html_file==$html_file + + ##REPEAT BLOCK + #for $i, $s in enumerate( $replicates ) + replicates_$i==$s.repl_group_name + replicates_$i==$s.rep_columns + #end for + + replicates_$s.repl_group_name==$s.rep_columns + +=cut + +for my $para (keys %para){ + INFO "$para==$para{$para}"; +} + +# --------------------- KNITR SCRIPT ----------------------------------# +######################################################################## +# This script compile the HTML output from the R Markdown script below. +my $htmloutput=$para{'html_file'}; +open(KNITRSCRIPT , ">" , "knitrscript.r"); +print KNITRSCRIPT <<EOF; +require(knitr) +require(markdown) +knit("rmarkdownscript.rmd", "temp.md") +markdownToHTML("temp.md", "$htmloutput") +EOF + +# -------------------- R MARKDOWN SCRIPT ------------------------------# +######################################################################## +open(RMARKDOWN , ">>" , "rmarkdownscript.rmd"); + +## Initiating R environment +## --------------------------------------------------------------------- +print RMARKDOWN <<EOF; +VST transformation +================================ + +Loading the R environment +------------------------- + +```{r initiating environment, message=FALSE, highlight=TRUE, results="hide"} +library('DESeq') +library(reshape) +library(ggplot2) +``` +---- +EOF + +## Reading input +## Reshaping parameters +## --------------------------------------------------------------------- +my ($col_names,$row_names, $columns, $repnames, $user_col_names)=("","","","",""); + +# Getting the column numbers and replicate names +for my $parameter (keys %para){ + if ($parameter =~ /^replicates_(\S+)$/ ) { + my $rep_name=$1; + my @cols = split(",",$para{$parameter}); + # we need to subtract one from the column number if + # an column with identifiers is present. + # This column will be removed from the data when reading in. + if ( $para{'header_columns'} eq 'TRUE' ){ + foreach (@cols){ + my $colnum = $_-1; + $columns.= $colnum.","; + } + } else { + $columns.=$para{$parameter}.","; + } + # We fill a vector of replicate names as many as there are columns + $repnames.= "\"$rep_name\"," x @cols; + } +} +chop $columns; +chop $repnames; + +# If header column is present, this column contains the names of the rows. +if ( $para{'header_columns'} eq 'TRUE' ) { + $row_names = ' row.names = 1, ' +} + +# If the first row contains the column names? +if ( $para{'header_row'} eq 'TRUE' ) { + $col_names = ' header=T, '; +} else { + $user_col_names = " col.names=c("; + if ( $para{'header_columns'} eq 'TRUE' ) { $user_col_names .= '"ID",'; } + $user_col_names .= " $repnames), "; +} + +print RMARKDOWN <<EOF; +Reading input +------------- +```{r reading input, message=FALSE, highlight=TRUE} +filename="$para{'counttable'}" +raw_counts = read.csv(filename, sep="\t", $col_names $row_names stringsAsFactors=F) +raw_counts_deseqct = newCountDataSet(raw_counts[,c($columns)], factor(c($repnames))) +``` +---- +EOF + +## Applying variance stabilizing transformation +## --------------------------------------------------------------------- +print RMARKDOWN <<EOF; +Performing VST transformation +----------------------------- +```{r vst on raw counts, message=FALSE, highlight=TRUE} +raw_counts_deseqct = newCountDataSet(raw_counts[,c($columns)], factor(c($repnames))) +raw_counts_deseqct = estimateSizeFactors(raw_counts_deseqct) +raw_counts_deseqctBlind = estimateDispersions(raw_counts_deseqct,method="blind") +vsd = varianceStabilizingTransformation(raw_counts_deseqctBlind) +vst_raw_counts = exprs(vsd) +``` +---- +EOF + +## Plotting and writing results +## --------------------------------------------------------------------- +my ($colnames_out, $gg_legend, $hist_title)=("","",""); +if($para{'header_row'} eq 'TRUE' ) { + $colnames_out="col.names=NA , "; + $hist_title='main=paste("Sample",sample),'; +} else { + $colnames_out="col.names=FALSE , "; + $gg_legend= '+ theme(legend.position="none") '; + $hist_title='main="Histogram VST counts", '; +} + +print RMARKDOWN <<EOF; +Histograms +---------- +```{r plotting and writing, message=FALSE, echo=FALSE} +write.table( vst_raw_counts, file = "$para{'outtab'}", + sep = "\\t", quote = F, $colnames_out row.names=$para{'header_columns'} +) +vst_raw_counts_melted = melt(vst_raw_counts) +colnames(vst_raw_counts_melted)=c("ID","samples","value") +ggplot( data=vst_raw_counts_melted, aes(value,colour=samples)) + geom_density(alpha=0.2) + labs(x="VST of counts") $gg_legend +for(sample in colnames(vst_raw_counts)){ + hist(vst_raw_counts[,colnames(vst_raw_counts)==sample], + $hist_title breaks=80, xlab="VST counts" ) +} +``` +---- + +EOF + +## Plotting and writing results out +## --------------------------------------------------------------------- +print RMARKDOWN <<EOF; +Logging R environment +--------------------- +```{r for the log, highlight=FALSE} +sessionInfo() +``` +---- +EOF + +# -------------------- Logging scripts --------------------------------# +######################################################################## +open(my $knitrscript,"<","knitrscript.r"); +open(my $rmarkdownscript,"<","rmarkdownscript.rmd"); +while(<$knitrscript>){ + INFO $_; +} +close($knitrscript); +close($rmarkdownscript); + + +# -------------------- Assembling command ----------------------------# +######################################################################## +my $command = "R CMD BATCH knitrscript.r"; # to execute + +# --------------------- Executing command ----------------------------# +######################################################################## +run_process($command, "Knitting RMarkdown script"); + +# ------------------- Cleaning up and exiting ------------------------# +######################################################################## +$command = "rm -rf temp.md knitrscript.r knitrscript.r.Rout rmarkdownscript.rmd"; +run_process($command, "Cleaning up"); +exit 0; + +### Subroutines ### +######################################################################## +sub run_process { + my ($command, $name)= @_; + my $logger = get_logger(); + INFO "Launching process\n $command\n"; + system("$command 2>/dev/null") == 0 or die "$name failed\nExit status $?\nCommand: $command"; + if ($? == -1) { + print "failed to execute: $!\n"; + } elsif ($? & 127) { + printf "child died with signal %d, %s coredump\n", ($? & 127), ($? & 128) ? 'with' : 'without'; + } else { + printf "$name executed successfully\n", $? >> 8; + } +} + + +