changeset 6:752528a5934d draft

Uploaded
author davidvanzessen
date Thu, 06 Aug 2015 03:34:49 -0400
parents 84c6f3bcfd4a
children a5233fbfc603
files phenotype_gene_relations.r
diffstat 1 files changed, 26 insertions(+), 7 deletions(-) [+]
line wrap: on
line diff
--- a/phenotype_gene_relations.r	Wed Aug 05 10:22:10 2015 -0400
+++ b/phenotype_gene_relations.r	Thu Aug 06 03:34:49 2015 -0400
@@ -51,10 +51,33 @@
 hpo.to.omt = read.table(hpo.to.omt.file, header=T, sep="\t", comment.char="#", quote = "", na.strings="N/C")
 hpo.to.omt[is.na(hpo.to.omt)] = 0
 
+omt.coding = read.table(omt.coding.file, header=T, sep="\t", comment.char="#", quote = "", stringsAsFactors=F)
+
+omt.coding.1 = omt.coding[grepl("^.$", omt.coding$ID),]
+names(omt.coding.1) = c("ID", "OMT.1.name")
+omt.coding.2 = omt.coding[grepl("^..$", omt.coding$ID) | omt.coding$ID == "0",]
+names(omt.coding.2) = c("ID", "OMT.2.name")
+omt.coding.3 = omt.coding[grepl("^...$", omt.coding$ID) | omt.coding$ID == "0",]
+names(omt.coding.3) = c("ID", "OMT.3.name")
+omt.coding.4 = omt.coding[grepl("^....$", omt.coding$ID) | omt.coding$ID == "0",]
+names(omt.coding.4) = c("ID", "OMT.4.name")
+omt.coding.5 = omt.coding[grepl("^.....$", omt.coding$ID) | omt.coding$ID == "0",]
+names(omt.coding.5) = c("ID", "OMT.5.name")
+
+hpo.to.omt = merge(hpo.to.omt, omt.coding.1, by.x="OMT.1", by.y="ID")
+hpo.to.omt = merge(hpo.to.omt, omt.coding.2, by.x="OMT.2", by.y="ID")
+hpo.to.omt = merge(hpo.to.omt, omt.coding.3, by.x="OMT.3", by.y="ID")
+hpo.to.omt = merge(hpo.to.omt, omt.coding.4, by.x="OMT.4", by.y="ID")
+hpo.to.omt = merge(hpo.to.omt, omt.coding.5, by.x="OMT.5", by.y="ID")
+
+
+
+
 #create FILTER SUBSET HAND ANNOTATIONS + OMT KEY
-filter.subset.hand.omt.1 = merge(subset.hand.filter.1, hpo.to.omt[,c("UNIQUE.HPO.IDENTIFIER", "OMT.1", "OMT.2", "OMT.3", "OMT.4", "OMT.5")], by.x="HPO.ID", by.y="UNIQUE.HPO.IDENTIFIER", all.x=T)
+filter.subset.hand.omt.1 = merge(subset.hand.filter.1, hpo.to.omt[,c("UNIQUE.HPO.IDENTIFIER", "OMT.1", "OMT.1.name", "OMT.2", "OMT.2.name", "OMT.3", "OMT.3.name", "OMT.4", "OMT.4.name", "OMT.5", "OMT.5.name")], by.x="HPO.ID", by.y="UNIQUE.HPO.IDENTIFIER", all.x=T)
 filter.subset.hand.omt.1[is.na(filter.subset.hand.omt.1)] = 0
 
+
 #create COUNT NUMBER OF ANNOTATIONS IN PHENOTYPIC GROUPS FOR EACH GENE-DISEASE RELATION
 count.phenotype.in.disease = data.frame(data.table(subset.hand)[, list(count=.N), by=c("gene.symbol", "diseaseId","GROUP.CODE")])
 count.phenotype.in.disease = dcast(count.phenotype.in.disease, gene.symbol + diseaseId ~ GROUP.CODE, value.var = "count")
@@ -69,12 +92,8 @@
 final$HPO.term.name.y = as.character(final$HPO.term.name.y)
 final$HPO.term.name.y[is.na(final$HPO.term.name.y)] = "No inheritance pattern available"
 
-omt.coding = read.table(omt.coding.file, header=T, sep="\t", comment.char="#", quote = "", stringsAsFactors=F)
-omt.coding = rbind(omt.coding, c("0", "N/C"))
-final = merge(final, omt.coding, by.x="OMT.5", by.y="ID", all.x=T)
-
-final = final[,c("diseaseId", "gene.symbol", "gene.id.entrez.", "HPO.term.name.x", "variable", "value", "OMT.1", "OMT.2", "OMT.3", "OMT.4", "OMT.5", "HPO.term.name.y", "OMT.name")]
-names(final) = c("diseaseId", "gene.symbol", "gene.id.entrez.", "HPO.term.name", "GROUP.CODE", "ratio", "OMT.1", "OMT.2", "OMT.3", "OMT.4", "OMT.5", "inheritance", "OMT.name")
+final = final[,c("diseaseId", "gene.symbol", "gene.id.entrez.", "HPO.term.name.x", "variable", "value", "OMT.1", "OMT.1.name", "OMT.2", "OMT.2.name", "OMT.3", "OMT.3.name", "OMT.4", "OMT.4.name", "OMT.5", "OMT.5.name", "HPO.term.name.y")]
+names(final) = c("diseaseId", "gene.symbol", "gene.id.entrez.", "HPO.term.name", "GROUP.CODE", "ratio", "OMT.1", "OMT.1.name", "OMT.2", "OMT.2.name", "OMT.3", "OMT.3.name", "OMT.4", "OMT.4.name", "OMT.5", "OMT.5.name", "inheritance")
 write.table(final, file=final.file, quote=F, sep="\t", row.names=F, col.names=T)