Mercurial > repos > eduardo > blobplot
annotate makeblobplot.R @ 6:b45fc20c7acd
resize legend and removed facet
| author | Eduardo <eduardoalves@abdn.ac.uk> |
|---|---|
| date | Sat, 28 Nov 2015 11:26:37 +0000 |
| parents | 0101d7d1211f |
| children |
| rev | line source |
|---|---|
| 0 | 1 #!/usr/bin/env Rscript |
| 2 | |
| 3 # 2013-09-16 | |
| 4 # works with R 2.15.2 and ggplot 0.9.3.1 | |
| 5 # Check ggplot2 help forums or contact sujai.kumar@gmail.com if something doesn't run | |
| 6 # because of updated programs/packages | |
| 7 | |
| 8 #Function to ignore low frequency annotations: | |
| 9 | |
| 10 clean.blobs<-function(d,threshold,taxlevel) { | |
| 11 annotated<-d[d[,taxlevel]!="Not annotated",] | |
| 12 total<-dim(annotated)[1] | |
| 13 levels(d[,taxlevel])[which(table(d[,taxlevel])<threshold*total)]<-"Not annotated" | |
| 14 return(d) | |
| 15 } | |
| 16 | |
| 17 ######################################################################### | |
| 18 | |
| 19 #Load data from file and generate plot: | |
| 20 | |
| 21 library(ggplot2) | |
| 22 library(reshape2) | |
| 23 | |
| 24 subplotwidth=1000; | |
| 25 subplotheight=1000; | |
| 26 | |
| 27 args <- commandArgs(trailingOnly = TRUE) | |
| 28 arg_input_file <- args[1] | |
| 29 arg_ignore_below_prop=as.numeric(args[2]) | |
| 30 arg_taxlevel=args[3] | |
| 31 | |
| 32 orig <- read.delim(arg_input_file,header=TRUE,sep="\t") | |
| 33 orig <- orig[orig$len>=200,] | |
| 34 | |
| 35 # if cov_colnames >1, then create a new column cov_total = sum of all cov columns | |
| 36 if (length(grep("^cov_",colnames(orig))) >1) orig$cov_total = rowSums(orig[,grep("^cov_",colnames(orig))]) | |
| 37 | |
| 38 cov_colnames=colnames(orig)[grep("^cov_",colnames(orig))] | |
| 39 tax_colnames=colnames(orig)[grep("^taxlevel_",colnames(orig))] | |
| 40 | |
| 41 numcols=length(cov_colnames) | |
| 42 | |
| 43 taxlevel=arg_taxlevel; | |
| 44 | |
| 45 m<-melt(orig,id.vars=c("seqid","len","gc",taxlevel),measure.vars=cov_colnames, variable.name="read_set", value.name="cov") | |
| 46 | |
| 47 # there aren't many colours available, so to restrict the plot to only 13 colours: | |
| 48 # (thanks to https://github.com/hobrien for the fix) | |
| 49 if (length(levels(m[,taxlevel])) > 14) { | |
| 50 levels(m[,taxlevel])[which(table(m[,taxlevel])<=sort(as.numeric(table(m[,taxlevel])), decreasing=T)[13])]<-"other" | |
| 51 } | |
| 52 | |
| 53 mfilt<-clean.blobs(m,arg_ignore_below_prop,taxlevel) | |
| 54 taxnames=names(sort(table(mfilt[,taxlevel]),decreasing=TRUE)) | |
| 55 taxnames=c("Not annotated", taxnames[taxnames != "Not annotated"]) | |
| 56 mfilt[,taxlevel] <- factor(mfilt[,taxlevel], levels = taxnames) | |
| 57 | |
| 58 png(paste(arg_input_file,taxlevel,"png",sep="."), (numcols * subplotwidth), (1 * subplotheight) + 300, units="px",res=100) | |
| 59 | |
| 60 theme_set(theme_bw()) | |
| 61 # Paul Tol scheme is well documented at http://www.sron.nl/~pault/colourschemes.pdf - Thank you Paul! Added DDDDDD and 777777 to it | |
| 62 paultol=list(c("#DDDDDD") ,c("#DDDDDD","#4477AA"), c("#DDDDDD","#4477AA","#CC6677"), c("#DDDDDD","#4477AA","#DDCC77","#CC6677"), c("#DDDDDD","#4477AA","#117733","#DDCC77","#CC6677"), c("#DDDDDD","#332288","#88CCEE","#117733","#DDCC77","#CC6677"), c("#DDDDDD","#332288","#88CCEE","#117733","#DDCC77","#CC6677","#AA4499"), c("#DDDDDD","#332288","#88CCEE","#44AA99","#117733","#DDCC77","#CC6677","#AA4499"), c("#DDDDDD","#332288","#88CCEE","#44AA99","#117733","#999933","#DDCC77","#CC6677","#AA4499"), c("#DDDDDD","#332288","#88CCEE","#44AA99","#117733","#999933","#DDCC77","#CC6677","#882255","#AA4499"), c("#DDDDDD","#332288","#88CCEE","#44AA99","#117733","#999933","#DDCC77","#661100","#CC6677","#882255","#AA4499"), c("#DDDDDD","#332288","#6699CC","#88CCEE","#44AA99","#117733","#999933","#DDCC77","#661100","#CC6677","#882255","#AA4499"), c("#DDDDDD","#332288","#6699CC","#88CCEE","#44AA99","#117733","#999933","#DDCC77","#661100","#CC6677","#AA4466","#882255","#AA4499"), c("#DDDDDD","#332288","#6699CC","#88CCEE","#44AA99","#117733","#999933","#DDCC77","#661100","#CC6677","#AA4466","#882255","#AA4499","#777777")) | |
| 63 | |
| 64 g<-ggplot() + scale_colour_manual(values=paultol[[length(levels(mfilt[,taxlevel]))]], name="Taxonomic\nClassification", limits=levels(mfilt[,taxlevel]) ) | |
| 65 | |
| 66 for (t in levels(mfilt[,taxlevel])) { | |
| 67 g <- g + geom_point(data=mfilt[mfilt[,taxlevel]==t,],aes_string(x="gc", y="cov", colour=taxlevel), size=2, alpha=I(1/3)) | |
| 68 } | |
| 69 #y_axis_breaks = c(1,2,5,10,20,50,100,200,500,1000); | |
| 70 g<-g + | |
|
6
b45fc20c7acd
resize legend and removed facet
Eduardo <eduardoalves@abdn.ac.uk>
parents:
0
diff
changeset
|
71 #facet_wrap(~read_set, ncol=numcols) + |
|
b45fc20c7acd
resize legend and removed facet
Eduardo <eduardoalves@abdn.ac.uk>
parents:
0
diff
changeset
|
72 scale_y_log10() + |
| 0 | 73 scale_x_continuous(limits=c(1e-307, 1),breaks = seq(0,1,.1)) + |
| 74 labs(x="GC content", y="Read coverage") + | |
| 75 guides(colour = guide_legend(nrow=3, override.aes = list(alpha = 1,size=10))) + | |
| 76 theme ( | |
| 77 strip.text.x = element_text(colour = "black", size = 25, vjust = 0.5), | |
| 78 axis.text.x = element_text(colour = "black", size = 25, vjust = 1), | |
| 79 axis.text.y = element_text(colour = "black", size = 25, vjust = 0.5), | |
| 80 axis.title.x = element_text(colour = "black", size = 25, vjust = 0), | |
| 81 axis.title.y = element_text(colour = "black", size = 25, hjust = 0.5, vjust = 0.5, angle=90), | |
|
6
b45fc20c7acd
resize legend and removed facet
Eduardo <eduardoalves@abdn.ac.uk>
parents:
0
diff
changeset
|
82 legend.text = element_text(colour = "black", size = 15, vjust = 0), |
|
b45fc20c7acd
resize legend and removed facet
Eduardo <eduardoalves@abdn.ac.uk>
parents:
0
diff
changeset
|
83 legend.title = element_text(colour = "black", size = 15, vjust = 0, hjust = 0, lineheight=1), |
| 0 | 84 legend.justification=c(1,1), legend.position="bottom", legend.direction="horizontal" |
| 85 ) | |
| 86 g | |
| 87 dev.off() |
