0
|
1 #!/usr/bin/perl
|
|
2 # vst_transformation.pl
|
|
3 # Joachim Jacob - joachim.jacob@vib.be - 2013
|
|
4
|
|
5 use strict;
|
|
6 use File::Basename;
|
|
7 use Log::Log4perl qw(:easy);
|
|
8
|
|
9 # ---------------------- Prepping Logging -----------------------------#
|
|
10 ########################################################################
|
|
11 # Log levels: $DEBUG $INFO $WARN $ERROR $FATAL
|
|
12 # ConversionPattern: %d %-5p %F{1} [%M] (line %L): %m%n%n
|
|
13 my $log_conf = q/
|
|
14 log4perl.category = ERROR, Screen
|
|
15 log4perl.appender.Screen.stderr=1
|
|
16 log4perl.appender.Screen.layout=Log::Log4perl::Layout::PatternLayout
|
|
17 log4perl.appender.Screen.layout.ConversionPattern = %d %-5p %m%n
|
|
18 log4perl.appender.Screen = Log::Log4perl::Appender::Screen
|
|
19 /;
|
|
20
|
|
21 Log::Log4perl::init( \$log_conf );
|
|
22 my $logger = get_logger();
|
|
23
|
|
24 # ----------------- Getting parameters file ---------------------------#
|
|
25 ########################################################################
|
|
26 my ($parameters) = @ARGV;
|
|
27 my (%para, @repeat_blocks);
|
|
28 open(PARAMETERS,"<$parameters");
|
|
29 while (<PARAMETERS>) {
|
|
30 if (/(\S+)==(.+)$/){
|
|
31 $para{$1} = $2;
|
|
32 }
|
|
33 }
|
|
34 close(PARAMETERS);
|
|
35
|
|
36 =Excerpt Config parameters
|
|
37 counttable=$counttable
|
|
38 outtab==$outttab
|
|
39 html_file==$html_file
|
|
40
|
|
41 ##REPEAT BLOCK
|
|
42 #for $i, $s in enumerate( $replicates )
|
|
43 replicates_$i==$s.repl_group_name
|
|
44 replicates_$i==$s.rep_columns
|
|
45 #end for
|
|
46
|
|
47 replicates_$s.repl_group_name==$s.rep_columns
|
|
48
|
|
49 =cut
|
|
50
|
|
51 for my $para (keys %para){
|
|
52 INFO "$para==$para{$para}";
|
|
53 }
|
|
54
|
|
55 # --------------------- KNITR SCRIPT ----------------------------------#
|
|
56 ########################################################################
|
|
57 # This script compile the HTML output from the R Markdown script below.
|
|
58 my $htmloutput=$para{'html_file'};
|
|
59 open(KNITRSCRIPT , ">" , "knitrscript.r");
|
|
60 print KNITRSCRIPT <<EOF;
|
|
61 require(knitr)
|
|
62 require(markdown)
|
|
63 knit("rmarkdownscript.rmd", "temp.md")
|
|
64 markdownToHTML("temp.md", "$htmloutput")
|
|
65 EOF
|
|
66
|
|
67 # -------------------- R MARKDOWN SCRIPT ------------------------------#
|
|
68 ########################################################################
|
|
69 open(RMARKDOWN , ">>" , "rmarkdownscript.rmd");
|
|
70
|
|
71 ## Initiating R environment
|
|
72 ## ---------------------------------------------------------------------
|
|
73 print RMARKDOWN <<EOF;
|
|
74 VST transformation
|
|
75 ================================
|
|
76
|
|
77 Loading the R environment
|
|
78 -------------------------
|
|
79
|
|
80 ```{r initiating environment, message=FALSE, highlight=TRUE, results="hide"}
|
|
81 library('DESeq')
|
|
82 library(reshape)
|
|
83 library(ggplot2)
|
|
84 ```
|
|
85 ----
|
|
86 EOF
|
|
87
|
|
88 ## Reading input
|
|
89 ## Reshaping parameters
|
|
90 ## ---------------------------------------------------------------------
|
|
91 my ($col_names,$row_names, $columns, $repnames, $user_col_names)=("","","","","");
|
|
92
|
|
93 # Getting the column numbers and replicate names
|
|
94 for my $parameter (keys %para){
|
|
95 if ($parameter =~ /^replicates_(\S+)$/ ) {
|
|
96 my $rep_name=$1;
|
|
97 my @cols = split(",",$para{$parameter});
|
|
98 # we need to subtract one from the column number if
|
|
99 # an column with identifiers is present.
|
|
100 # This column will be removed from the data when reading in.
|
|
101 if ( $para{'header_columns'} eq 'TRUE' ){
|
|
102 foreach (@cols){
|
|
103 my $colnum = $_-1;
|
|
104 $columns.= $colnum.",";
|
|
105 }
|
|
106 } else {
|
|
107 $columns.=$para{$parameter}.",";
|
|
108 }
|
|
109 # We fill a vector of replicate names as many as there are columns
|
|
110 $repnames.= "\"$rep_name\"," x @cols;
|
|
111 }
|
|
112 }
|
|
113 chop $columns;
|
|
114 chop $repnames;
|
|
115
|
|
116 # If header column is present, this column contains the names of the rows.
|
|
117 if ( $para{'header_columns'} eq 'TRUE' ) {
|
|
118 $row_names = ' row.names = 1, '
|
|
119 }
|
|
120
|
|
121 # If the first row contains the column names?
|
|
122 if ( $para{'header_row'} eq 'TRUE' ) {
|
|
123 $col_names = ' header=T, ';
|
|
124 } else {
|
|
125 $user_col_names = " col.names=c(";
|
|
126 if ( $para{'header_columns'} eq 'TRUE' ) { $user_col_names .= '"ID",'; }
|
|
127 $user_col_names .= " $repnames), ";
|
|
128 }
|
|
129
|
|
130 print RMARKDOWN <<EOF;
|
|
131 Reading input
|
|
132 -------------
|
|
133 ```{r reading input, message=FALSE, highlight=TRUE}
|
|
134 filename="$para{'counttable'}"
|
|
135 raw_counts = read.csv(filename, sep="\t", $col_names $row_names stringsAsFactors=F)
|
|
136 raw_counts_deseqct = newCountDataSet(raw_counts[,c($columns)], factor(c($repnames)))
|
|
137 ```
|
|
138 ----
|
|
139 EOF
|
|
140
|
|
141 ## Applying variance stabilizing transformation
|
|
142 ## ---------------------------------------------------------------------
|
|
143 print RMARKDOWN <<EOF;
|
|
144 Performing VST transformation
|
|
145 -----------------------------
|
|
146 ```{r vst on raw counts, message=FALSE, highlight=TRUE}
|
|
147 raw_counts_deseqct = newCountDataSet(raw_counts[,c($columns)], factor(c($repnames)))
|
|
148 raw_counts_deseqct = estimateSizeFactors(raw_counts_deseqct)
|
|
149 raw_counts_deseqctBlind = estimateDispersions(raw_counts_deseqct,method="blind")
|
|
150 vsd = varianceStabilizingTransformation(raw_counts_deseqctBlind)
|
|
151 vst_raw_counts = exprs(vsd)
|
|
152 ```
|
|
153 ----
|
|
154 EOF
|
|
155
|
|
156 ## Plotting and writing results
|
|
157 ## ---------------------------------------------------------------------
|
|
158 my ($colnames_out, $gg_legend, $hist_title)=("","","");
|
|
159 if($para{'header_row'} eq 'TRUE' ) {
|
|
160 $colnames_out="col.names=NA , ";
|
|
161 $hist_title='main=paste("Sample",sample),';
|
|
162 } else {
|
|
163 $colnames_out="col.names=FALSE , ";
|
|
164 $gg_legend= '+ theme(legend.position="none") ';
|
|
165 $hist_title='main="Histogram VST counts", ';
|
|
166 }
|
|
167
|
|
168 print RMARKDOWN <<EOF;
|
|
169 Histograms
|
|
170 ----------
|
|
171 ```{r plotting and writing, message=FALSE, echo=FALSE}
|
|
172 write.table( vst_raw_counts, file = "$para{'outtab'}",
|
|
173 sep = "\\t", quote = F, $colnames_out row.names=$para{'header_columns'}
|
|
174 )
|
|
175 vst_raw_counts_melted = melt(vst_raw_counts)
|
|
176 colnames(vst_raw_counts_melted)=c("ID","samples","value")
|
|
177 ggplot( data=vst_raw_counts_melted, aes(value,colour=samples)) + geom_density(alpha=0.2) + labs(x="VST of counts") $gg_legend
|
|
178 for(sample in colnames(vst_raw_counts)){
|
|
179 hist(vst_raw_counts[,colnames(vst_raw_counts)==sample],
|
|
180 $hist_title breaks=80, xlab="VST counts" )
|
|
181 }
|
|
182 ```
|
|
183 ----
|
|
184
|
|
185 EOF
|
|
186
|
|
187 ## Plotting and writing results out
|
|
188 ## ---------------------------------------------------------------------
|
|
189 print RMARKDOWN <<EOF;
|
|
190 Logging R environment
|
|
191 ---------------------
|
|
192 ```{r for the log, highlight=FALSE}
|
|
193 sessionInfo()
|
|
194 ```
|
|
195 ----
|
|
196 EOF
|
|
197
|
|
198 # -------------------- Logging scripts --------------------------------#
|
|
199 ########################################################################
|
|
200 open(my $knitrscript,"<","knitrscript.r");
|
|
201 open(my $rmarkdownscript,"<","rmarkdownscript.rmd");
|
|
202 while(<$knitrscript>){
|
|
203 INFO $_;
|
|
204 }
|
|
205 close($knitrscript);
|
|
206 close($rmarkdownscript);
|
|
207
|
|
208
|
|
209 # -------------------- Assembling command ----------------------------#
|
|
210 ########################################################################
|
|
211 my $command = "R CMD BATCH knitrscript.r"; # to execute
|
|
212
|
|
213 # --------------------- Executing command ----------------------------#
|
|
214 ########################################################################
|
|
215 run_process($command, "Knitting RMarkdown script");
|
|
216
|
|
217 # ------------------- Cleaning up and exiting ------------------------#
|
|
218 ########################################################################
|
|
219 $command = "rm -rf temp.md knitrscript.r knitrscript.r.Rout rmarkdownscript.rmd";
|
|
220 run_process($command, "Cleaning up");
|
|
221 exit 0;
|
|
222
|
|
223 ### Subroutines ###
|
|
224 ########################################################################
|
|
225 sub run_process {
|
|
226 my ($command, $name)= @_;
|
|
227 my $logger = get_logger();
|
|
228 INFO "Launching process\n $command\n";
|
|
229 system("$command 2>/dev/null") == 0 or die "$name failed\nExit status $?\nCommand: $command";
|
|
230 if ($? == -1) {
|
|
231 print "failed to execute: $!\n";
|
|
232 } elsif ($? & 127) {
|
|
233 printf "child died with signal %d, %s coredump\n", ($? & 127), ($? & 128) ? 'with' : 'without';
|
|
234 } else {
|
|
235 printf "$name executed successfully\n", $? >> 8;
|
|
236 }
|
|
237 }
|
|
238
|
|
239
|
|
240
|