Mercurial > repos > davidvanzessen > phenotype_gene_relations
changeset 6:752528a5934d draft
Uploaded
author | davidvanzessen |
---|---|
date | Thu, 06 Aug 2015 03:34:49 -0400 |
parents | 84c6f3bcfd4a |
children | a5233fbfc603 |
files | phenotype_gene_relations.r |
diffstat | 1 files changed, 26 insertions(+), 7 deletions(-) [+] |
line wrap: on
line diff
--- a/phenotype_gene_relations.r Wed Aug 05 10:22:10 2015 -0400 +++ b/phenotype_gene_relations.r Thu Aug 06 03:34:49 2015 -0400 @@ -51,10 +51,33 @@ hpo.to.omt = read.table(hpo.to.omt.file, header=T, sep="\t", comment.char="#", quote = "", na.strings="N/C") hpo.to.omt[is.na(hpo.to.omt)] = 0 +omt.coding = read.table(omt.coding.file, header=T, sep="\t", comment.char="#", quote = "", stringsAsFactors=F) + +omt.coding.1 = omt.coding[grepl("^.$", omt.coding$ID),] +names(omt.coding.1) = c("ID", "OMT.1.name") +omt.coding.2 = omt.coding[grepl("^..$", omt.coding$ID) | omt.coding$ID == "0",] +names(omt.coding.2) = c("ID", "OMT.2.name") +omt.coding.3 = omt.coding[grepl("^...$", omt.coding$ID) | omt.coding$ID == "0",] +names(omt.coding.3) = c("ID", "OMT.3.name") +omt.coding.4 = omt.coding[grepl("^....$", omt.coding$ID) | omt.coding$ID == "0",] +names(omt.coding.4) = c("ID", "OMT.4.name") +omt.coding.5 = omt.coding[grepl("^.....$", omt.coding$ID) | omt.coding$ID == "0",] +names(omt.coding.5) = c("ID", "OMT.5.name") + +hpo.to.omt = merge(hpo.to.omt, omt.coding.1, by.x="OMT.1", by.y="ID") +hpo.to.omt = merge(hpo.to.omt, omt.coding.2, by.x="OMT.2", by.y="ID") +hpo.to.omt = merge(hpo.to.omt, omt.coding.3, by.x="OMT.3", by.y="ID") +hpo.to.omt = merge(hpo.to.omt, omt.coding.4, by.x="OMT.4", by.y="ID") +hpo.to.omt = merge(hpo.to.omt, omt.coding.5, by.x="OMT.5", by.y="ID") + + + + #create FILTER SUBSET HAND ANNOTATIONS + OMT KEY -filter.subset.hand.omt.1 = merge(subset.hand.filter.1, hpo.to.omt[,c("UNIQUE.HPO.IDENTIFIER", "OMT.1", "OMT.2", "OMT.3", "OMT.4", "OMT.5")], by.x="HPO.ID", by.y="UNIQUE.HPO.IDENTIFIER", all.x=T) +filter.subset.hand.omt.1 = merge(subset.hand.filter.1, hpo.to.omt[,c("UNIQUE.HPO.IDENTIFIER", "OMT.1", "OMT.1.name", "OMT.2", "OMT.2.name", "OMT.3", "OMT.3.name", "OMT.4", "OMT.4.name", "OMT.5", "OMT.5.name")], by.x="HPO.ID", by.y="UNIQUE.HPO.IDENTIFIER", all.x=T) filter.subset.hand.omt.1[is.na(filter.subset.hand.omt.1)] = 0 + #create COUNT NUMBER OF ANNOTATIONS IN PHENOTYPIC GROUPS FOR EACH GENE-DISEASE RELATION count.phenotype.in.disease = data.frame(data.table(subset.hand)[, list(count=.N), by=c("gene.symbol", "diseaseId","GROUP.CODE")]) count.phenotype.in.disease = dcast(count.phenotype.in.disease, gene.symbol + diseaseId ~ GROUP.CODE, value.var = "count") @@ -69,12 +92,8 @@ final$HPO.term.name.y = as.character(final$HPO.term.name.y) final$HPO.term.name.y[is.na(final$HPO.term.name.y)] = "No inheritance pattern available" -omt.coding = read.table(omt.coding.file, header=T, sep="\t", comment.char="#", quote = "", stringsAsFactors=F) -omt.coding = rbind(omt.coding, c("0", "N/C")) -final = merge(final, omt.coding, by.x="OMT.5", by.y="ID", all.x=T) - -final = final[,c("diseaseId", "gene.symbol", "gene.id.entrez.", "HPO.term.name.x", "variable", "value", "OMT.1", "OMT.2", "OMT.3", "OMT.4", "OMT.5", "HPO.term.name.y", "OMT.name")] -names(final) = c("diseaseId", "gene.symbol", "gene.id.entrez.", "HPO.term.name", "GROUP.CODE", "ratio", "OMT.1", "OMT.2", "OMT.3", "OMT.4", "OMT.5", "inheritance", "OMT.name") +final = final[,c("diseaseId", "gene.symbol", "gene.id.entrez.", "HPO.term.name.x", "variable", "value", "OMT.1", "OMT.1.name", "OMT.2", "OMT.2.name", "OMT.3", "OMT.3.name", "OMT.4", "OMT.4.name", "OMT.5", "OMT.5.name", "HPO.term.name.y")] +names(final) = c("diseaseId", "gene.symbol", "gene.id.entrez.", "HPO.term.name", "GROUP.CODE", "ratio", "OMT.1", "OMT.1.name", "OMT.2", "OMT.2.name", "OMT.3", "OMT.3.name", "OMT.4", "OMT.4.name", "OMT.5", "OMT.5.name", "inheritance") write.table(final, file=final.file, quote=F, sep="\t", row.names=F, col.names=T)