Mercurial > repos > joachim-jacob > vstranformation
comparison vst_transformation.pl @ 0:a48dde9abecf draft
Uploaded initial version
author | joachim-jacob |
---|---|
date | Wed, 17 Jul 2013 04:06:12 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:a48dde9abecf |
---|---|
1 #!/usr/bin/perl | |
2 # vst_transformation.pl | |
3 # Joachim Jacob - joachim.jacob@vib.be - 2013 | |
4 | |
5 use strict; | |
6 use File::Basename; | |
7 use Log::Log4perl qw(:easy); | |
8 | |
9 # ---------------------- Prepping Logging -----------------------------# | |
10 ######################################################################## | |
11 # Log levels: $DEBUG $INFO $WARN $ERROR $FATAL | |
12 # ConversionPattern: %d %-5p %F{1} [%M] (line %L): %m%n%n | |
13 my $log_conf = q/ | |
14 log4perl.category = ERROR, Screen | |
15 log4perl.appender.Screen.stderr=1 | |
16 log4perl.appender.Screen.layout=Log::Log4perl::Layout::PatternLayout | |
17 log4perl.appender.Screen.layout.ConversionPattern = %d %-5p %m%n | |
18 log4perl.appender.Screen = Log::Log4perl::Appender::Screen | |
19 /; | |
20 | |
21 Log::Log4perl::init( \$log_conf ); | |
22 my $logger = get_logger(); | |
23 | |
24 # ----------------- Getting parameters file ---------------------------# | |
25 ######################################################################## | |
26 my ($parameters) = @ARGV; | |
27 my (%para, @repeat_blocks); | |
28 open(PARAMETERS,"<$parameters"); | |
29 while (<PARAMETERS>) { | |
30 if (/(\S+)==(.+)$/){ | |
31 $para{$1} = $2; | |
32 } | |
33 } | |
34 close(PARAMETERS); | |
35 | |
36 =Excerpt Config parameters | |
37 counttable=$counttable | |
38 outtab==$outttab | |
39 html_file==$html_file | |
40 | |
41 ##REPEAT BLOCK | |
42 #for $i, $s in enumerate( $replicates ) | |
43 replicates_$i==$s.repl_group_name | |
44 replicates_$i==$s.rep_columns | |
45 #end for | |
46 | |
47 replicates_$s.repl_group_name==$s.rep_columns | |
48 | |
49 =cut | |
50 | |
51 for my $para (keys %para){ | |
52 INFO "$para==$para{$para}"; | |
53 } | |
54 | |
55 # --------------------- KNITR SCRIPT ----------------------------------# | |
56 ######################################################################## | |
57 # This script compile the HTML output from the R Markdown script below. | |
58 my $htmloutput=$para{'html_file'}; | |
59 open(KNITRSCRIPT , ">" , "knitrscript.r"); | |
60 print KNITRSCRIPT <<EOF; | |
61 require(knitr) | |
62 require(markdown) | |
63 knit("rmarkdownscript.rmd", "temp.md") | |
64 markdownToHTML("temp.md", "$htmloutput") | |
65 EOF | |
66 | |
67 # -------------------- R MARKDOWN SCRIPT ------------------------------# | |
68 ######################################################################## | |
69 open(RMARKDOWN , ">>" , "rmarkdownscript.rmd"); | |
70 | |
71 ## Initiating R environment | |
72 ## --------------------------------------------------------------------- | |
73 print RMARKDOWN <<EOF; | |
74 VST transformation | |
75 ================================ | |
76 | |
77 Loading the R environment | |
78 ------------------------- | |
79 | |
80 ```{r initiating environment, message=FALSE, highlight=TRUE, results="hide"} | |
81 library('DESeq') | |
82 library(reshape) | |
83 library(ggplot2) | |
84 ``` | |
85 ---- | |
86 EOF | |
87 | |
88 ## Reading input | |
89 ## Reshaping parameters | |
90 ## --------------------------------------------------------------------- | |
91 my ($col_names,$row_names, $columns, $repnames, $user_col_names)=("","","","",""); | |
92 | |
93 # Getting the column numbers and replicate names | |
94 for my $parameter (keys %para){ | |
95 if ($parameter =~ /^replicates_(\S+)$/ ) { | |
96 my $rep_name=$1; | |
97 my @cols = split(",",$para{$parameter}); | |
98 # we need to subtract one from the column number if | |
99 # an column with identifiers is present. | |
100 # This column will be removed from the data when reading in. | |
101 if ( $para{'header_columns'} eq 'TRUE' ){ | |
102 foreach (@cols){ | |
103 my $colnum = $_-1; | |
104 $columns.= $colnum.","; | |
105 } | |
106 } else { | |
107 $columns.=$para{$parameter}.","; | |
108 } | |
109 # We fill a vector of replicate names as many as there are columns | |
110 $repnames.= "\"$rep_name\"," x @cols; | |
111 } | |
112 } | |
113 chop $columns; | |
114 chop $repnames; | |
115 | |
116 # If header column is present, this column contains the names of the rows. | |
117 if ( $para{'header_columns'} eq 'TRUE' ) { | |
118 $row_names = ' row.names = 1, ' | |
119 } | |
120 | |
121 # If the first row contains the column names? | |
122 if ( $para{'header_row'} eq 'TRUE' ) { | |
123 $col_names = ' header=T, '; | |
124 } else { | |
125 $user_col_names = " col.names=c("; | |
126 if ( $para{'header_columns'} eq 'TRUE' ) { $user_col_names .= '"ID",'; } | |
127 $user_col_names .= " $repnames), "; | |
128 } | |
129 | |
130 print RMARKDOWN <<EOF; | |
131 Reading input | |
132 ------------- | |
133 ```{r reading input, message=FALSE, highlight=TRUE} | |
134 filename="$para{'counttable'}" | |
135 raw_counts = read.csv(filename, sep="\t", $col_names $row_names stringsAsFactors=F) | |
136 raw_counts_deseqct = newCountDataSet(raw_counts[,c($columns)], factor(c($repnames))) | |
137 ``` | |
138 ---- | |
139 EOF | |
140 | |
141 ## Applying variance stabilizing transformation | |
142 ## --------------------------------------------------------------------- | |
143 print RMARKDOWN <<EOF; | |
144 Performing VST transformation | |
145 ----------------------------- | |
146 ```{r vst on raw counts, message=FALSE, highlight=TRUE} | |
147 raw_counts_deseqct = newCountDataSet(raw_counts[,c($columns)], factor(c($repnames))) | |
148 raw_counts_deseqct = estimateSizeFactors(raw_counts_deseqct) | |
149 raw_counts_deseqctBlind = estimateDispersions(raw_counts_deseqct,method="blind") | |
150 vsd = varianceStabilizingTransformation(raw_counts_deseqctBlind) | |
151 vst_raw_counts = exprs(vsd) | |
152 ``` | |
153 ---- | |
154 EOF | |
155 | |
156 ## Plotting and writing results | |
157 ## --------------------------------------------------------------------- | |
158 my ($colnames_out, $gg_legend, $hist_title)=("","",""); | |
159 if($para{'header_row'} eq 'TRUE' ) { | |
160 $colnames_out="col.names=NA , "; | |
161 $hist_title='main=paste("Sample",sample),'; | |
162 } else { | |
163 $colnames_out="col.names=FALSE , "; | |
164 $gg_legend= '+ theme(legend.position="none") '; | |
165 $hist_title='main="Histogram VST counts", '; | |
166 } | |
167 | |
168 print RMARKDOWN <<EOF; | |
169 Histograms | |
170 ---------- | |
171 ```{r plotting and writing, message=FALSE, echo=FALSE} | |
172 write.table( vst_raw_counts, file = "$para{'outtab'}", | |
173 sep = "\\t", quote = F, $colnames_out row.names=$para{'header_columns'} | |
174 ) | |
175 vst_raw_counts_melted = melt(vst_raw_counts) | |
176 colnames(vst_raw_counts_melted)=c("ID","samples","value") | |
177 ggplot( data=vst_raw_counts_melted, aes(value,colour=samples)) + geom_density(alpha=0.2) + labs(x="VST of counts") $gg_legend | |
178 for(sample in colnames(vst_raw_counts)){ | |
179 hist(vst_raw_counts[,colnames(vst_raw_counts)==sample], | |
180 $hist_title breaks=80, xlab="VST counts" ) | |
181 } | |
182 ``` | |
183 ---- | |
184 | |
185 EOF | |
186 | |
187 ## Plotting and writing results out | |
188 ## --------------------------------------------------------------------- | |
189 print RMARKDOWN <<EOF; | |
190 Logging R environment | |
191 --------------------- | |
192 ```{r for the log, highlight=FALSE} | |
193 sessionInfo() | |
194 ``` | |
195 ---- | |
196 EOF | |
197 | |
198 # -------------------- Logging scripts --------------------------------# | |
199 ######################################################################## | |
200 open(my $knitrscript,"<","knitrscript.r"); | |
201 open(my $rmarkdownscript,"<","rmarkdownscript.rmd"); | |
202 while(<$knitrscript>){ | |
203 INFO $_; | |
204 } | |
205 close($knitrscript); | |
206 close($rmarkdownscript); | |
207 | |
208 | |
209 # -------------------- Assembling command ----------------------------# | |
210 ######################################################################## | |
211 my $command = "R CMD BATCH knitrscript.r"; # to execute | |
212 | |
213 # --------------------- Executing command ----------------------------# | |
214 ######################################################################## | |
215 run_process($command, "Knitting RMarkdown script"); | |
216 | |
217 # ------------------- Cleaning up and exiting ------------------------# | |
218 ######################################################################## | |
219 $command = "rm -rf temp.md knitrscript.r knitrscript.r.Rout rmarkdownscript.rmd"; | |
220 run_process($command, "Cleaning up"); | |
221 exit 0; | |
222 | |
223 ### Subroutines ### | |
224 ######################################################################## | |
225 sub run_process { | |
226 my ($command, $name)= @_; | |
227 my $logger = get_logger(); | |
228 INFO "Launching process\n $command\n"; | |
229 system("$command 2>/dev/null") == 0 or die "$name failed\nExit status $?\nCommand: $command"; | |
230 if ($? == -1) { | |
231 print "failed to execute: $!\n"; | |
232 } elsif ($? & 127) { | |
233 printf "child died with signal %d, %s coredump\n", ($? & 127), ($? & 128) ? 'with' : 'without'; | |
234 } else { | |
235 printf "$name executed successfully\n", $? >> 8; | |
236 } | |
237 } | |
238 | |
239 | |
240 |