library(reshape2)
library(ggplot2)

args <- commandArgs(trailingOnly = TRUE)

input=args[1]
omt1=strsplit(args[2], ",")[[1]]
omt2=strsplit(args[3], ",")[[1]]
omt3=strsplit(args[4], ",")[[1]]
omt4=strsplit(args[5], ",")[[1]]
omt5=strsplit(args[6], ",")[[1]]
inheritance=strsplit(args[7], ",")[[1]]
orderby.list=as.character(args[8])
orderby.list = ifelse(orderby.list == "None", "1", orderby.list)
outdir=args[9]

dat =  read.table(input, header=T, sep="\t", comment.char="#", quote = "")

setwd(outdir)
  
dat$disease.gene = paste(dat$diseaseId, dat$gene.symbol)

OMTs = dat$OMT.1 %in% omt1 | dat$OMT.2 %in% omt2 | dat$OMT.3 %in% omt3 | dat$OMT.4 %in% omt4 | dat$OMT.5 %in% omt5

print(sum(OMTs))

dat.sub = dat[OMTs,]

selectA = dcast(dat.sub, OMT.1+OMT.2+OMT.3+OMT.4+OMT.5~disease.gene, length, value.var="disease.gene")

selectA[selectA > 0] = 1

print(selectA)
print(sum(colSums(selectA) == nrow(selectA)))

selectA = selectA[,colSums(selectA) == nrow(selectA)]

disease.in.omt = dat$disease.gene %in% names(selectA)
inheritance.filter = dat$inheritance %in% inheritance

dat.avg = dat[OMTs & disease.in.omt & inheritance.filter ,c("disease.gene", "GROUP.CODE", "GROUP.NAME", "ratio")]
dat.avg = aggregate(dat.avg$ratio, by=list(dat.avg$GROUP.CODE, dat.avg$GROUP.NAME), FUN=mean)
names(dat.avg) = c("GROUP.CODE", "GROUP.NAME", "mean")
dat.avg$disease.gene = "mean"
p = ggplot(dat.avg, aes(disease.gene, reorder(GROUP.NAME, as.numeric(GROUP.CODE)))) + geom_tile(aes(fill = mean), colour = "white") + scale_fill_gradient(low = "white",high = "red")

p = p + theme(axis.text.x = element_text(angle = 45, hjust = 1), legend.position="none")

png("plot1_avg.png", width=200)
print(p)
dev.off()

print(sum(disease.in.omt))

print(sum(inheritance.filter))

dat.sub2 = dat[OMTs & disease.in.omt & inheritance.filter ,c("disease.gene", "GROUP.CODE", "GROUP.NAME", "ratio")]
  
dat.sub2 = dat.sub2[!duplicated(dat.sub2),]

dat.sub2$disease.gene.order = dat.sub2$ratio
for(orderby in strsplit(orderby.list, ",")[[1]]){
	filter = dat.sub2$GROUP.CODE == as.numeric(orderby) & dat.sub2$ratio != 0
	print(paste("orderby:",orderby, "+1:", sum(filter)))
	if(sum(filter) > 0){
		dat.sub2[filter,"disease.gene.order"] = dat.sub2[filter,"disease.gene.order"] + max(dat.sub2$ratio)
	}
}

dat.sub2[dat.sub2$disease.gene.order <= max(dat.sub2$ratio),"disease.gene.order"] = 0

print(paste("min ratio:", min(dat.sub2$ratio)))

write.table(dat.sub2[,c("disease.gene", "GROUP.CODE", "GROUP.NAME", "ratio", "disease.gene.order")], "debug.txt", quote=F, sep="\t", row.names=F, col.names=T)

#dat.sub2.order = dat.sub2[dat.sub2$GROUP.CODE == orderby,c("disease.gene", "ratio")]

#names(dat.sub2.order) = c("disease.gene", "disease.gene.order")

#dat.sub2 = merge(dat.sub2, dat.sub2.order, by="disease.gene", all=T)

p = ggplot(dat.sub2, aes(reorder(disease.gene, -disease.gene.order), reorder(GROUP.NAME, as.numeric(GROUP.CODE)))) + geom_tile(aes(fill = ratio), colour = "white") + scale_fill_gradient(low = "white",high = "red")

p = p + theme(axis.text.x = element_text(angle = 45, hjust = 1))

png("plot1.png", width=length(unique(dat.sub2$disease.gene)) * 30 + 300)
print(p)
dev.off()
write.table(dat.sub2, "plot1.txt", quote=F, sep="\t", row.names=F, col.names=T)

dat.sub3 = dat[OMTs & disease.in.omt & inheritance.filter, c("diseaseId", "HPO.term.name")]
dat.sub3$value = 1
p = ggplot(dat.sub3, aes(diseaseId, HPO.term.name)) + geom_tile(aes(fill = value), colour = "white") + scale_fill_gradient(low = "red",high = "red")

p = p + theme(axis.text.x = element_text(angle = 45, hjust = 1), legend.position="none")

png("plot2.png", width=length(unique(dat.sub3$diseaseId)) * 20 + 300)
print(p)
dev.off()
write.table(dat.sub3, "plot2.txt", quote=F, sep="\t", row.names=F, col.names=T)

dat.sub4 = dat[disease.in.omt & dat$diseaseId %in% dat.sub3$diseaseId, c("diseaseId", "OMT.5.name")]
dat.sub4 = dat.sub4[!duplicated(dat.sub4),]
dat.sub4$value = 1
dat.sub4.freq = data.frame(table(as.character(dat.sub4$diseaseId)))
names(dat.sub4.freq) = c("diseaseId", "diseaseId.order")
dat.sub4 = merge(dat.sub4, dat.sub4.freq, by="diseaseId")
p = ggplot(dat.sub4, aes(reorder(diseaseId, diseaseId.order), OMT.5.name)) + geom_tile(aes(fill = value), colour = "white") + scale_fill_gradient(low = "red",high = "red")

p = p + theme(axis.text.x = element_text(angle = 45, hjust = 1), legend.position="none")

png("plot3.png", width=length(unique(dat.sub3$diseaseId)) * 20 + 300)
print(p)
dev.off()
write.table(dat.sub4, "plot3.txt", quote=F, sep="\t", row.names=F, col.names=T)

#order the table the same as the first plot x axis
disease.gene.url = dat[OMTs & disease.in.omt & inheritance.filter ,c("disease.gene", "diseaseId", "gene.symbol", "url")]
disease.gene.url = disease.gene.url[!duplicated(disease.gene.url),]
disease.gene.url = merge(disease.gene.url, dat.sub2[!duplicated(dat.sub2$disease.gene),c("disease.gene", "disease.gene.order")], by="disease.gene", all.x=T)
disease.gene.url = disease.gene.url[order(-disease.gene.url$disease.gene.order),]
disease.gene.url = disease.gene.url[,c("diseaseId", "gene.symbol", "url")]

names(disease.gene.url) = c("Disease ID","Gene Symbol","URL")

write.table(disease.gene.url, "disease_gene_url_1.txt", quote=F, sep="\t", row.names=F, col.names=T)


disease.gene.url = dat[OMTs & disease.in.omt & inheritance.filter ,c("diseaseId", "gene.symbol", "url")]
disease.gene.url = disease.gene.url[!duplicated(disease.gene.url),]
disease.gene.url = merge(disease.gene.url, dat.sub4.freq, by.x="diseaseId", by.y="diseaseId")
disease.gene.url = disease.gene.url[order(disease.gene.url$diseaseId.order),]
disease.gene.url = disease.gene.url[,c("diseaseId", "gene.symbol", "url")]

names(disease.gene.url) = c("Disease ID","Gene Symbol","URL")

write.table(disease.gene.url, "disease_gene_url_2.txt", quote=F, sep="\t", row.names=F, col.names=T)






















