comparison vst_transformation.pl @ 0:a48dde9abecf draft

Uploaded initial version
author joachim-jacob
date Wed, 17 Jul 2013 04:06:12 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:a48dde9abecf
1 #!/usr/bin/perl
2 # vst_transformation.pl
3 # Joachim Jacob - joachim.jacob@vib.be - 2013
4
5 use strict;
6 use File::Basename;
7 use Log::Log4perl qw(:easy);
8
9 # ---------------------- Prepping Logging -----------------------------#
10 ########################################################################
11 # Log levels: $DEBUG $INFO $WARN $ERROR $FATAL
12 # ConversionPattern: %d %-5p %F{1} [%M] (line %L): %m%n%n
13 my $log_conf = q/
14 log4perl.category = ERROR, Screen
15 log4perl.appender.Screen.stderr=1
16 log4perl.appender.Screen.layout=Log::Log4perl::Layout::PatternLayout
17 log4perl.appender.Screen.layout.ConversionPattern = %d %-5p %m%n
18 log4perl.appender.Screen = Log::Log4perl::Appender::Screen
19 /;
20
21 Log::Log4perl::init( \$log_conf );
22 my $logger = get_logger();
23
24 # ----------------- Getting parameters file ---------------------------#
25 ########################################################################
26 my ($parameters) = @ARGV;
27 my (%para, @repeat_blocks);
28 open(PARAMETERS,"<$parameters");
29 while (<PARAMETERS>) {
30 if (/(\S+)==(.+)$/){
31 $para{$1} = $2;
32 }
33 }
34 close(PARAMETERS);
35
36 =Excerpt Config parameters
37 counttable=$counttable
38 outtab==$outttab
39 html_file==$html_file
40
41 ##REPEAT BLOCK
42 #for $i, $s in enumerate( $replicates )
43 replicates_$i==$s.repl_group_name
44 replicates_$i==$s.rep_columns
45 #end for
46
47 replicates_$s.repl_group_name==$s.rep_columns
48
49 =cut
50
51 for my $para (keys %para){
52 INFO "$para==$para{$para}";
53 }
54
55 # --------------------- KNITR SCRIPT ----------------------------------#
56 ########################################################################
57 # This script compile the HTML output from the R Markdown script below.
58 my $htmloutput=$para{'html_file'};
59 open(KNITRSCRIPT , ">" , "knitrscript.r");
60 print KNITRSCRIPT <<EOF;
61 require(knitr)
62 require(markdown)
63 knit("rmarkdownscript.rmd", "temp.md")
64 markdownToHTML("temp.md", "$htmloutput")
65 EOF
66
67 # -------------------- R MARKDOWN SCRIPT ------------------------------#
68 ########################################################################
69 open(RMARKDOWN , ">>" , "rmarkdownscript.rmd");
70
71 ## Initiating R environment
72 ## ---------------------------------------------------------------------
73 print RMARKDOWN <<EOF;
74 VST transformation
75 ================================
76
77 Loading the R environment
78 -------------------------
79
80 ```{r initiating environment, message=FALSE, highlight=TRUE, results="hide"}
81 library('DESeq')
82 library(reshape)
83 library(ggplot2)
84 ```
85 ----
86 EOF
87
88 ## Reading input
89 ## Reshaping parameters
90 ## ---------------------------------------------------------------------
91 my ($col_names,$row_names, $columns, $repnames, $user_col_names)=("","","","","");
92
93 # Getting the column numbers and replicate names
94 for my $parameter (keys %para){
95 if ($parameter =~ /^replicates_(\S+)$/ ) {
96 my $rep_name=$1;
97 my @cols = split(",",$para{$parameter});
98 # we need to subtract one from the column number if
99 # an column with identifiers is present.
100 # This column will be removed from the data when reading in.
101 if ( $para{'header_columns'} eq 'TRUE' ){
102 foreach (@cols){
103 my $colnum = $_-1;
104 $columns.= $colnum.",";
105 }
106 } else {
107 $columns.=$para{$parameter}.",";
108 }
109 # We fill a vector of replicate names as many as there are columns
110 $repnames.= "\"$rep_name\"," x @cols;
111 }
112 }
113 chop $columns;
114 chop $repnames;
115
116 # If header column is present, this column contains the names of the rows.
117 if ( $para{'header_columns'} eq 'TRUE' ) {
118 $row_names = ' row.names = 1, '
119 }
120
121 # If the first row contains the column names?
122 if ( $para{'header_row'} eq 'TRUE' ) {
123 $col_names = ' header=T, ';
124 } else {
125 $user_col_names = " col.names=c(";
126 if ( $para{'header_columns'} eq 'TRUE' ) { $user_col_names .= '"ID",'; }
127 $user_col_names .= " $repnames), ";
128 }
129
130 print RMARKDOWN <<EOF;
131 Reading input
132 -------------
133 ```{r reading input, message=FALSE, highlight=TRUE}
134 filename="$para{'counttable'}"
135 raw_counts = read.csv(filename, sep="\t", $col_names $row_names stringsAsFactors=F)
136 raw_counts_deseqct = newCountDataSet(raw_counts[,c($columns)], factor(c($repnames)))
137 ```
138 ----
139 EOF
140
141 ## Applying variance stabilizing transformation
142 ## ---------------------------------------------------------------------
143 print RMARKDOWN <<EOF;
144 Performing VST transformation
145 -----------------------------
146 ```{r vst on raw counts, message=FALSE, highlight=TRUE}
147 raw_counts_deseqct = newCountDataSet(raw_counts[,c($columns)], factor(c($repnames)))
148 raw_counts_deseqct = estimateSizeFactors(raw_counts_deseqct)
149 raw_counts_deseqctBlind = estimateDispersions(raw_counts_deseqct,method="blind")
150 vsd = varianceStabilizingTransformation(raw_counts_deseqctBlind)
151 vst_raw_counts = exprs(vsd)
152 ```
153 ----
154 EOF
155
156 ## Plotting and writing results
157 ## ---------------------------------------------------------------------
158 my ($colnames_out, $gg_legend, $hist_title)=("","","");
159 if($para{'header_row'} eq 'TRUE' ) {
160 $colnames_out="col.names=NA , ";
161 $hist_title='main=paste("Sample",sample),';
162 } else {
163 $colnames_out="col.names=FALSE , ";
164 $gg_legend= '+ theme(legend.position="none") ';
165 $hist_title='main="Histogram VST counts", ';
166 }
167
168 print RMARKDOWN <<EOF;
169 Histograms
170 ----------
171 ```{r plotting and writing, message=FALSE, echo=FALSE}
172 write.table( vst_raw_counts, file = "$para{'outtab'}",
173 sep = "\\t", quote = F, $colnames_out row.names=$para{'header_columns'}
174 )
175 vst_raw_counts_melted = melt(vst_raw_counts)
176 colnames(vst_raw_counts_melted)=c("ID","samples","value")
177 ggplot( data=vst_raw_counts_melted, aes(value,colour=samples)) + geom_density(alpha=0.2) + labs(x="VST of counts") $gg_legend
178 for(sample in colnames(vst_raw_counts)){
179 hist(vst_raw_counts[,colnames(vst_raw_counts)==sample],
180 $hist_title breaks=80, xlab="VST counts" )
181 }
182 ```
183 ----
184
185 EOF
186
187 ## Plotting and writing results out
188 ## ---------------------------------------------------------------------
189 print RMARKDOWN <<EOF;
190 Logging R environment
191 ---------------------
192 ```{r for the log, highlight=FALSE}
193 sessionInfo()
194 ```
195 ----
196 EOF
197
198 # -------------------- Logging scripts --------------------------------#
199 ########################################################################
200 open(my $knitrscript,"<","knitrscript.r");
201 open(my $rmarkdownscript,"<","rmarkdownscript.rmd");
202 while(<$knitrscript>){
203 INFO $_;
204 }
205 close($knitrscript);
206 close($rmarkdownscript);
207
208
209 # -------------------- Assembling command ----------------------------#
210 ########################################################################
211 my $command = "R CMD BATCH knitrscript.r"; # to execute
212
213 # --------------------- Executing command ----------------------------#
214 ########################################################################
215 run_process($command, "Knitting RMarkdown script");
216
217 # ------------------- Cleaning up and exiting ------------------------#
218 ########################################################################
219 $command = "rm -rf temp.md knitrscript.r knitrscript.r.Rout rmarkdownscript.rmd";
220 run_process($command, "Cleaning up");
221 exit 0;
222
223 ### Subroutines ###
224 ########################################################################
225 sub run_process {
226 my ($command, $name)= @_;
227 my $logger = get_logger();
228 INFO "Launching process\n $command\n";
229 system("$command 2>/dev/null") == 0 or die "$name failed\nExit status $?\nCommand: $command";
230 if ($? == -1) {
231 print "failed to execute: $!\n";
232 } elsif ($? & 127) {
233 printf "child died with signal %d, %s coredump\n", ($? & 127), ($? & 128) ? 'with' : 'without';
234 } else {
235 printf "$name executed successfully\n", $? >> 8;
236 }
237 }
238
239
240