annotate cuffdiff_mds_plot.pl @ 8:b0d11fcbc3ac

cummerbund add MDS and PCA plots, handle errors from R
author Jim Johnson <jj@umn.edu>
date Thu, 11 Oct 2012 15:14:51 -0500
parents 728b0aa0df6e
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
3
728b0aa0df6e Add cuffdiff_mds_plot for MultiDimensionalScaling plot for read_group_tracking
Jim Johnson <jj@umn.edu>
parents:
diff changeset
1 #!/usr/bin/perl -w
728b0aa0df6e Add cuffdiff_mds_plot for MultiDimensionalScaling plot for read_group_tracking
Jim Johnson <jj@umn.edu>
parents:
diff changeset
2
728b0aa0df6e Add cuffdiff_mds_plot for MultiDimensionalScaling plot for read_group_tracking
Jim Johnson <jj@umn.edu>
parents:
diff changeset
3 ###############################################################
728b0aa0df6e Add cuffdiff_mds_plot for MultiDimensionalScaling plot for read_group_tracking
Jim Johnson <jj@umn.edu>
parents:
diff changeset
4 # cuffdiff_mds_plot.pl
728b0aa0df6e Add cuffdiff_mds_plot for MultiDimensionalScaling plot for read_group_tracking
Jim Johnson <jj@umn.edu>
parents:
diff changeset
5 # John Garbe
728b0aa0df6e Add cuffdiff_mds_plot for MultiDimensionalScaling plot for read_group_tracking
Jim Johnson <jj@umn.edu>
parents:
diff changeset
6 # Septmeber 2012
728b0aa0df6e Add cuffdiff_mds_plot for MultiDimensionalScaling plot for read_group_tracking
Jim Johnson <jj@umn.edu>
parents:
diff changeset
7 #
728b0aa0df6e Add cuffdiff_mds_plot for MultiDimensionalScaling plot for read_group_tracking
Jim Johnson <jj@umn.edu>
parents:
diff changeset
8 # Given a sample tracking file from cuffdiff2, convert it to
728b0aa0df6e Add cuffdiff_mds_plot for MultiDimensionalScaling plot for read_group_tracking
Jim Johnson <jj@umn.edu>
parents:
diff changeset
9 # the proper format for loading into R and generating an MDS plot
728b0aa0df6e Add cuffdiff_mds_plot for MultiDimensionalScaling plot for read_group_tracking
Jim Johnson <jj@umn.edu>
parents:
diff changeset
10 #
728b0aa0df6e Add cuffdiff_mds_plot for MultiDimensionalScaling plot for read_group_tracking
Jim Johnson <jj@umn.edu>
parents:
diff changeset
11 ################################################################
728b0aa0df6e Add cuffdiff_mds_plot for MultiDimensionalScaling plot for read_group_tracking
Jim Johnson <jj@umn.edu>
parents:
diff changeset
12
728b0aa0df6e Add cuffdiff_mds_plot for MultiDimensionalScaling plot for read_group_tracking
Jim Johnson <jj@umn.edu>
parents:
diff changeset
13 # check to make sure having correct files
728b0aa0df6e Add cuffdiff_mds_plot for MultiDimensionalScaling plot for read_group_tracking
Jim Johnson <jj@umn.edu>
parents:
diff changeset
14 my $usage = "usage: cuffdiff_mds_plot.pl [TABULAR.in] [TABULAR.out] [PLOT.png]\n";
728b0aa0df6e Add cuffdiff_mds_plot for MultiDimensionalScaling plot for read_group_tracking
Jim Johnson <jj@umn.edu>
parents:
diff changeset
15 die $usage unless @ARGV == 3;
728b0aa0df6e Add cuffdiff_mds_plot for MultiDimensionalScaling plot for read_group_tracking
Jim Johnson <jj@umn.edu>
parents:
diff changeset
16
728b0aa0df6e Add cuffdiff_mds_plot for MultiDimensionalScaling plot for read_group_tracking
Jim Johnson <jj@umn.edu>
parents:
diff changeset
17 #get the input arguments
728b0aa0df6e Add cuffdiff_mds_plot for MultiDimensionalScaling plot for read_group_tracking
Jim Johnson <jj@umn.edu>
parents:
diff changeset
18 my $inputFile = $ARGV[0];
728b0aa0df6e Add cuffdiff_mds_plot for MultiDimensionalScaling plot for read_group_tracking
Jim Johnson <jj@umn.edu>
parents:
diff changeset
19 my $outputFile = $ARGV[1];
728b0aa0df6e Add cuffdiff_mds_plot for MultiDimensionalScaling plot for read_group_tracking
Jim Johnson <jj@umn.edu>
parents:
diff changeset
20 my $plotFile = $ARGV[2];
728b0aa0df6e Add cuffdiff_mds_plot for MultiDimensionalScaling plot for read_group_tracking
Jim Johnson <jj@umn.edu>
parents:
diff changeset
21
728b0aa0df6e Add cuffdiff_mds_plot for MultiDimensionalScaling plot for read_group_tracking
Jim Johnson <jj@umn.edu>
parents:
diff changeset
22 #Open files
728b0aa0df6e Add cuffdiff_mds_plot for MultiDimensionalScaling plot for read_group_tracking
Jim Johnson <jj@umn.edu>
parents:
diff changeset
23 open (INPUT, "<", $inputFile) || die("Could not open file $inputFile \n");
728b0aa0df6e Add cuffdiff_mds_plot for MultiDimensionalScaling plot for read_group_tracking
Jim Johnson <jj@umn.edu>
parents:
diff changeset
24 open (OUTPUT, ">", $outputFile) || die("Could not open file $outputFile \n");
728b0aa0df6e Add cuffdiff_mds_plot for MultiDimensionalScaling plot for read_group_tracking
Jim Johnson <jj@umn.edu>
parents:
diff changeset
25 open (PLOT, ">", $plotFile) || die("Could not open file $plotFile \n");
728b0aa0df6e Add cuffdiff_mds_plot for MultiDimensionalScaling plot for read_group_tracking
Jim Johnson <jj@umn.edu>
parents:
diff changeset
26
728b0aa0df6e Add cuffdiff_mds_plot for MultiDimensionalScaling plot for read_group_tracking
Jim Johnson <jj@umn.edu>
parents:
diff changeset
27 # header looks like this:
728b0aa0df6e Add cuffdiff_mds_plot for MultiDimensionalScaling plot for read_group_tracking
Jim Johnson <jj@umn.edu>
parents:
diff changeset
28 # tracking_id condition replicate raw_frags internal_scaled_frags external_scaled_frags FPKM effective_length status
728b0aa0df6e Add cuffdiff_mds_plot for MultiDimensionalScaling plot for read_group_tracking
Jim Johnson <jj@umn.edu>
parents:
diff changeset
29 my $header = <INPUT>;
728b0aa0df6e Add cuffdiff_mds_plot for MultiDimensionalScaling plot for read_group_tracking
Jim Johnson <jj@umn.edu>
parents:
diff changeset
30
728b0aa0df6e Add cuffdiff_mds_plot for MultiDimensionalScaling plot for read_group_tracking
Jim Johnson <jj@umn.edu>
parents:
diff changeset
31 # read in the sample tracking file
728b0aa0df6e Add cuffdiff_mds_plot for MultiDimensionalScaling plot for read_group_tracking
Jim Johnson <jj@umn.edu>
parents:
diff changeset
32 while (<INPUT>) {
728b0aa0df6e Add cuffdiff_mds_plot for MultiDimensionalScaling plot for read_group_tracking
Jim Johnson <jj@umn.edu>
parents:
diff changeset
33 chomp;
728b0aa0df6e Add cuffdiff_mds_plot for MultiDimensionalScaling plot for read_group_tracking
Jim Johnson <jj@umn.edu>
parents:
diff changeset
34 @line = split /\t/;
728b0aa0df6e Add cuffdiff_mds_plot for MultiDimensionalScaling plot for read_group_tracking
Jim Johnson <jj@umn.edu>
parents:
diff changeset
35 $tracking_id{$line[0]} = 1;
728b0aa0df6e Add cuffdiff_mds_plot for MultiDimensionalScaling plot for read_group_tracking
Jim Johnson <jj@umn.edu>
parents:
diff changeset
36 $sample = $line[1] . "-" . $line[2];
728b0aa0df6e Add cuffdiff_mds_plot for MultiDimensionalScaling plot for read_group_tracking
Jim Johnson <jj@umn.edu>
parents:
diff changeset
37 $fpkm{$sample}{$line[0]} = $line[6];
728b0aa0df6e Add cuffdiff_mds_plot for MultiDimensionalScaling plot for read_group_tracking
Jim Johnson <jj@umn.edu>
parents:
diff changeset
38 }
728b0aa0df6e Add cuffdiff_mds_plot for MultiDimensionalScaling plot for read_group_tracking
Jim Johnson <jj@umn.edu>
parents:
diff changeset
39 close(INPUT);
728b0aa0df6e Add cuffdiff_mds_plot for MultiDimensionalScaling plot for read_group_tracking
Jim Johnson <jj@umn.edu>
parents:
diff changeset
40
728b0aa0df6e Add cuffdiff_mds_plot for MultiDimensionalScaling plot for read_group_tracking
Jim Johnson <jj@umn.edu>
parents:
diff changeset
41 @sorted_tracking_id = sort( keys(%tracking_id));
728b0aa0df6e Add cuffdiff_mds_plot for MultiDimensionalScaling plot for read_group_tracking
Jim Johnson <jj@umn.edu>
parents:
diff changeset
42
728b0aa0df6e Add cuffdiff_mds_plot for MultiDimensionalScaling plot for read_group_tracking
Jim Johnson <jj@umn.edu>
parents:
diff changeset
43 # print out header
728b0aa0df6e Add cuffdiff_mds_plot for MultiDimensionalScaling plot for read_group_tracking
Jim Johnson <jj@umn.edu>
parents:
diff changeset
44 foreach $tracking_id (@sorted_tracking_id) {
728b0aa0df6e Add cuffdiff_mds_plot for MultiDimensionalScaling plot for read_group_tracking
Jim Johnson <jj@umn.edu>
parents:
diff changeset
45 print OUTPUT "\t$tracking_id";
728b0aa0df6e Add cuffdiff_mds_plot for MultiDimensionalScaling plot for read_group_tracking
Jim Johnson <jj@umn.edu>
parents:
diff changeset
46 }
728b0aa0df6e Add cuffdiff_mds_plot for MultiDimensionalScaling plot for read_group_tracking
Jim Johnson <jj@umn.edu>
parents:
diff changeset
47 print OUTPUT "\n";
728b0aa0df6e Add cuffdiff_mds_plot for MultiDimensionalScaling plot for read_group_tracking
Jim Johnson <jj@umn.edu>
parents:
diff changeset
48
728b0aa0df6e Add cuffdiff_mds_plot for MultiDimensionalScaling plot for read_group_tracking
Jim Johnson <jj@umn.edu>
parents:
diff changeset
49 # print out data
728b0aa0df6e Add cuffdiff_mds_plot for MultiDimensionalScaling plot for read_group_tracking
Jim Johnson <jj@umn.edu>
parents:
diff changeset
50 foreach $sample (keys(%fpkm)) {
728b0aa0df6e Add cuffdiff_mds_plot for MultiDimensionalScaling plot for read_group_tracking
Jim Johnson <jj@umn.edu>
parents:
diff changeset
51 print OUTPUT "$sample";
728b0aa0df6e Add cuffdiff_mds_plot for MultiDimensionalScaling plot for read_group_tracking
Jim Johnson <jj@umn.edu>
parents:
diff changeset
52
728b0aa0df6e Add cuffdiff_mds_plot for MultiDimensionalScaling plot for read_group_tracking
Jim Johnson <jj@umn.edu>
parents:
diff changeset
53 foreach $tracking_id (@sorted_tracking_id) {
728b0aa0df6e Add cuffdiff_mds_plot for MultiDimensionalScaling plot for read_group_tracking
Jim Johnson <jj@umn.edu>
parents:
diff changeset
54 print OUTPUT "\t$fpkm{$sample}{$tracking_id}";
728b0aa0df6e Add cuffdiff_mds_plot for MultiDimensionalScaling plot for read_group_tracking
Jim Johnson <jj@umn.edu>
parents:
diff changeset
55 }
728b0aa0df6e Add cuffdiff_mds_plot for MultiDimensionalScaling plot for read_group_tracking
Jim Johnson <jj@umn.edu>
parents:
diff changeset
56
728b0aa0df6e Add cuffdiff_mds_plot for MultiDimensionalScaling plot for read_group_tracking
Jim Johnson <jj@umn.edu>
parents:
diff changeset
57 print OUTPUT "\n";
728b0aa0df6e Add cuffdiff_mds_plot for MultiDimensionalScaling plot for read_group_tracking
Jim Johnson <jj@umn.edu>
parents:
diff changeset
58 }
728b0aa0df6e Add cuffdiff_mds_plot for MultiDimensionalScaling plot for read_group_tracking
Jim Johnson <jj@umn.edu>
parents:
diff changeset
59 close(OUTPUT);
728b0aa0df6e Add cuffdiff_mds_plot for MultiDimensionalScaling plot for read_group_tracking
Jim Johnson <jj@umn.edu>
parents:
diff changeset
60
728b0aa0df6e Add cuffdiff_mds_plot for MultiDimensionalScaling plot for read_group_tracking
Jim Johnson <jj@umn.edu>
parents:
diff changeset
61 #variables to store the name of the R script file
728b0aa0df6e Add cuffdiff_mds_plot for MultiDimensionalScaling plot for read_group_tracking
Jim Johnson <jj@umn.edu>
parents:
diff changeset
62 my $r_script = "cuffinks2mdf.r";
728b0aa0df6e Add cuffdiff_mds_plot for MultiDimensionalScaling plot for read_group_tracking
Jim Johnson <jj@umn.edu>
parents:
diff changeset
63
728b0aa0df6e Add cuffdiff_mds_plot for MultiDimensionalScaling plot for read_group_tracking
Jim Johnson <jj@umn.edu>
parents:
diff changeset
64 open(Rcmd,">", $r_script) or die "Cannot open $r_script \n\n";
728b0aa0df6e Add cuffdiff_mds_plot for MultiDimensionalScaling plot for read_group_tracking
Jim Johnson <jj@umn.edu>
parents:
diff changeset
65 print Rcmd "
728b0aa0df6e Add cuffdiff_mds_plot for MultiDimensionalScaling plot for read_group_tracking
Jim Johnson <jj@umn.edu>
parents:
diff changeset
66 datat <- read.table(\"$outputFile\");
728b0aa0df6e Add cuffdiff_mds_plot for MultiDimensionalScaling plot for read_group_tracking
Jim Johnson <jj@umn.edu>
parents:
diff changeset
67 cmd <- cmdscale(dist(datat));
728b0aa0df6e Add cuffdiff_mds_plot for MultiDimensionalScaling plot for read_group_tracking
Jim Johnson <jj@umn.edu>
parents:
diff changeset
68 png(filename=\"$plotFile\");
728b0aa0df6e Add cuffdiff_mds_plot for MultiDimensionalScaling plot for read_group_tracking
Jim Johnson <jj@umn.edu>
parents:
diff changeset
69 plot(cmd[,1], cmd[,2], type=\"n\", ann=FALSE);
728b0aa0df6e Add cuffdiff_mds_plot for MultiDimensionalScaling plot for read_group_tracking
Jim Johnson <jj@umn.edu>
parents:
diff changeset
70 text(cmd[,1], cmd[,2], labels = rownames(datat));
728b0aa0df6e Add cuffdiff_mds_plot for MultiDimensionalScaling plot for read_group_tracking
Jim Johnson <jj@umn.edu>
parents:
diff changeset
71 title(main=\"Multidimensional Scaling Plot\");
728b0aa0df6e Add cuffdiff_mds_plot for MultiDimensionalScaling plot for read_group_tracking
Jim Johnson <jj@umn.edu>
parents:
diff changeset
72 title(xlab= \"Dimension 1\");
728b0aa0df6e Add cuffdiff_mds_plot for MultiDimensionalScaling plot for read_group_tracking
Jim Johnson <jj@umn.edu>
parents:
diff changeset
73 title(ylab= \"Dimension 2\");
728b0aa0df6e Add cuffdiff_mds_plot for MultiDimensionalScaling plot for read_group_tracking
Jim Johnson <jj@umn.edu>
parents:
diff changeset
74 dev.off();
728b0aa0df6e Add cuffdiff_mds_plot for MultiDimensionalScaling plot for read_group_tracking
Jim Johnson <jj@umn.edu>
parents:
diff changeset
75 #eof" . "\n";
728b0aa0df6e Add cuffdiff_mds_plot for MultiDimensionalScaling plot for read_group_tracking
Jim Johnson <jj@umn.edu>
parents:
diff changeset
76
728b0aa0df6e Add cuffdiff_mds_plot for MultiDimensionalScaling plot for read_group_tracking
Jim Johnson <jj@umn.edu>
parents:
diff changeset
77 close Rcmd;
728b0aa0df6e Add cuffdiff_mds_plot for MultiDimensionalScaling plot for read_group_tracking
Jim Johnson <jj@umn.edu>
parents:
diff changeset
78
728b0aa0df6e Add cuffdiff_mds_plot for MultiDimensionalScaling plot for read_group_tracking
Jim Johnson <jj@umn.edu>
parents:
diff changeset
79
728b0aa0df6e Add cuffdiff_mds_plot for MultiDimensionalScaling plot for read_group_tracking
Jim Johnson <jj@umn.edu>
parents:
diff changeset
80 system("R --no-restore --no-save --no-readline < $r_script > $r_script.out");
728b0aa0df6e Add cuffdiff_mds_plot for MultiDimensionalScaling plot for read_group_tracking
Jim Johnson <jj@umn.edu>
parents:
diff changeset
81 #close the input and output files
728b0aa0df6e Add cuffdiff_mds_plot for MultiDimensionalScaling plot for read_group_tracking
Jim Johnson <jj@umn.edu>
parents:
diff changeset
82 close(PLOT);
728b0aa0df6e Add cuffdiff_mds_plot for MultiDimensionalScaling plot for read_group_tracking
Jim Johnson <jj@umn.edu>
parents:
diff changeset
83