Mercurial > repos > joachim-jacob > vstranformation
view vst_transformation.pl @ 1:7d7d6c743df0 draft default tip
Uploaded corrected tool dependencies.xml
author | joachim-jacob |
---|---|
date | Wed, 17 Jul 2013 04:07:52 -0400 |
parents | a48dde9abecf |
children |
line wrap: on
line source
#!/usr/bin/perl # vst_transformation.pl # Joachim Jacob - joachim.jacob@vib.be - 2013 use strict; use File::Basename; use Log::Log4perl qw(:easy); # ---------------------- Prepping Logging -----------------------------# ######################################################################## # Log levels: $DEBUG $INFO $WARN $ERROR $FATAL # ConversionPattern: %d %-5p %F{1} [%M] (line %L): %m%n%n my $log_conf = q/ log4perl.category = ERROR, Screen log4perl.appender.Screen.stderr=1 log4perl.appender.Screen.layout=Log::Log4perl::Layout::PatternLayout log4perl.appender.Screen.layout.ConversionPattern = %d %-5p %m%n log4perl.appender.Screen = Log::Log4perl::Appender::Screen /; Log::Log4perl::init( \$log_conf ); my $logger = get_logger(); # ----------------- Getting parameters file ---------------------------# ######################################################################## my ($parameters) = @ARGV; my (%para, @repeat_blocks); open(PARAMETERS,"<$parameters"); while (<PARAMETERS>) { if (/(\S+)==(.+)$/){ $para{$1} = $2; } } close(PARAMETERS); =Excerpt Config parameters counttable=$counttable outtab==$outttab html_file==$html_file ##REPEAT BLOCK #for $i, $s in enumerate( $replicates ) replicates_$i==$s.repl_group_name replicates_$i==$s.rep_columns #end for replicates_$s.repl_group_name==$s.rep_columns =cut for my $para (keys %para){ INFO "$para==$para{$para}"; } # --------------------- KNITR SCRIPT ----------------------------------# ######################################################################## # This script compile the HTML output from the R Markdown script below. my $htmloutput=$para{'html_file'}; open(KNITRSCRIPT , ">" , "knitrscript.r"); print KNITRSCRIPT <<EOF; require(knitr) require(markdown) knit("rmarkdownscript.rmd", "temp.md") markdownToHTML("temp.md", "$htmloutput") EOF # -------------------- R MARKDOWN SCRIPT ------------------------------# ######################################################################## open(RMARKDOWN , ">>" , "rmarkdownscript.rmd"); ## Initiating R environment ## --------------------------------------------------------------------- print RMARKDOWN <<EOF; VST transformation ================================ Loading the R environment ------------------------- ```{r initiating environment, message=FALSE, highlight=TRUE, results="hide"} library('DESeq') library(reshape) library(ggplot2) ``` ---- EOF ## Reading input ## Reshaping parameters ## --------------------------------------------------------------------- my ($col_names,$row_names, $columns, $repnames, $user_col_names)=("","","","",""); # Getting the column numbers and replicate names for my $parameter (keys %para){ if ($parameter =~ /^replicates_(\S+)$/ ) { my $rep_name=$1; my @cols = split(",",$para{$parameter}); # we need to subtract one from the column number if # an column with identifiers is present. # This column will be removed from the data when reading in. if ( $para{'header_columns'} eq 'TRUE' ){ foreach (@cols){ my $colnum = $_-1; $columns.= $colnum.","; } } else { $columns.=$para{$parameter}.","; } # We fill a vector of replicate names as many as there are columns $repnames.= "\"$rep_name\"," x @cols; } } chop $columns; chop $repnames; # If header column is present, this column contains the names of the rows. if ( $para{'header_columns'} eq 'TRUE' ) { $row_names = ' row.names = 1, ' } # If the first row contains the column names? if ( $para{'header_row'} eq 'TRUE' ) { $col_names = ' header=T, '; } else { $user_col_names = " col.names=c("; if ( $para{'header_columns'} eq 'TRUE' ) { $user_col_names .= '"ID",'; } $user_col_names .= " $repnames), "; } print RMARKDOWN <<EOF; Reading input ------------- ```{r reading input, message=FALSE, highlight=TRUE} filename="$para{'counttable'}" raw_counts = read.csv(filename, sep="\t", $col_names $row_names stringsAsFactors=F) raw_counts_deseqct = newCountDataSet(raw_counts[,c($columns)], factor(c($repnames))) ``` ---- EOF ## Applying variance stabilizing transformation ## --------------------------------------------------------------------- print RMARKDOWN <<EOF; Performing VST transformation ----------------------------- ```{r vst on raw counts, message=FALSE, highlight=TRUE} raw_counts_deseqct = newCountDataSet(raw_counts[,c($columns)], factor(c($repnames))) raw_counts_deseqct = estimateSizeFactors(raw_counts_deseqct) raw_counts_deseqctBlind = estimateDispersions(raw_counts_deseqct,method="blind") vsd = varianceStabilizingTransformation(raw_counts_deseqctBlind) vst_raw_counts = exprs(vsd) ``` ---- EOF ## Plotting and writing results ## --------------------------------------------------------------------- my ($colnames_out, $gg_legend, $hist_title)=("","",""); if($para{'header_row'} eq 'TRUE' ) { $colnames_out="col.names=NA , "; $hist_title='main=paste("Sample",sample),'; } else { $colnames_out="col.names=FALSE , "; $gg_legend= '+ theme(legend.position="none") '; $hist_title='main="Histogram VST counts", '; } print RMARKDOWN <<EOF; Histograms ---------- ```{r plotting and writing, message=FALSE, echo=FALSE} write.table( vst_raw_counts, file = "$para{'outtab'}", sep = "\\t", quote = F, $colnames_out row.names=$para{'header_columns'} ) vst_raw_counts_melted = melt(vst_raw_counts) colnames(vst_raw_counts_melted)=c("ID","samples","value") ggplot( data=vst_raw_counts_melted, aes(value,colour=samples)) + geom_density(alpha=0.2) + labs(x="VST of counts") $gg_legend for(sample in colnames(vst_raw_counts)){ hist(vst_raw_counts[,colnames(vst_raw_counts)==sample], $hist_title breaks=80, xlab="VST counts" ) } ``` ---- EOF ## Plotting and writing results out ## --------------------------------------------------------------------- print RMARKDOWN <<EOF; Logging R environment --------------------- ```{r for the log, highlight=FALSE} sessionInfo() ``` ---- EOF # -------------------- Logging scripts --------------------------------# ######################################################################## open(my $knitrscript,"<","knitrscript.r"); open(my $rmarkdownscript,"<","rmarkdownscript.rmd"); while(<$knitrscript>){ INFO $_; } close($knitrscript); close($rmarkdownscript); # -------------------- Assembling command ----------------------------# ######################################################################## my $command = "R CMD BATCH knitrscript.r"; # to execute # --------------------- Executing command ----------------------------# ######################################################################## run_process($command, "Knitting RMarkdown script"); # ------------------- Cleaning up and exiting ------------------------# ######################################################################## $command = "rm -rf temp.md knitrscript.r knitrscript.r.Rout rmarkdownscript.rmd"; run_process($command, "Cleaning up"); exit 0; ### Subroutines ### ######################################################################## sub run_process { my ($command, $name)= @_; my $logger = get_logger(); INFO "Launching process\n $command\n"; system("$command 2>/dev/null") == 0 or die "$name failed\nExit status $?\nCommand: $command"; if ($? == -1) { print "failed to execute: $!\n"; } elsif ($? & 127) { printf "child died with signal %d, %s coredump\n", ($? & 127), ($? & 128) ? 'with' : 'without'; } else { printf "$name executed successfully\n", $? >> 8; } }