Mercurial > repos > davidvanzessen > baseline_bayesian_estimation
diff filter.r @ 5:d8de51314d3f draft
Uploaded
author | davidvanzessen |
---|---|
date | Thu, 14 Aug 2014 07:17:26 -0400 |
parents | 38c038c17d0c |
children | 82016fd934c8 |
line wrap: on
line diff
--- a/filter.r Wed Aug 13 09:09:56 2014 -0400 +++ b/filter.r Thu Aug 14 07:17:26 2014 -0400 @@ -1,17 +1,21 @@ -arg = commandArgs(TRUE) +arg = commandArgs(TRUE) summaryfile = arg[1] gappedfile = arg[2] selection = arg[3] output = arg[4] -print(paste("-----", selection, "------")) +print(paste("selection = ", selection)) -summarydat = read.table(summaryfile, header=T, sep="\t", fill=T, stringsAsFactors=F)[,c("Sequence.ID", "AA.JUNCTION")] + +summarydat = read.table(summaryfile, header=T, sep="\t", fill=T, stringsAsFactors=F) gappeddat = read.table(gappedfile, header=T, sep="\t", fill=T, stringsAsFactors=F) -head(summarydat) -head(gappeddat) + + -dat = merge(gappeddat, summarydat, by="Sequence.ID") -head(dat) +#dat = data.frame(merge(gappeddat, summarydat, by="Sequence.ID", all.x=T)) + +dat = cbind(gappeddat, summarydat$AA.JUNCTION) + +colnames(dat)[length(dat)] = "AA.JUNCTION" dat$VGene = gsub("^Homsap ", "", dat$V.GENE.and.allele) dat$VGene = gsub("[*].*", "", dat$VGene) @@ -22,6 +26,8 @@ dat$JGene = gsub("^Homsap ", "", dat$J.GENE.and.allele) dat$JGene = gsub("[*].*", "", dat$JGene) +print(str(dat)) + dat$past = do.call(paste, c(dat[unlist(strsplit(selection, ","))], sep = ":")) dat = dat[!duplicated(dat$past), ]