Mercurial > repos > davidvanzessen > baseline_bayesian_estimation
diff filter.r @ 3:38c038c17d0c draft
Uploaded
author | davidvanzessen |
---|---|
date | Wed, 13 Aug 2014 09:07:54 -0400 |
parents | |
children | d8de51314d3f |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/filter.r Wed Aug 13 09:07:54 2014 -0400 @@ -0,0 +1,29 @@ +arg = commandArgs(TRUE) +summaryfile = arg[1] +gappedfile = arg[2] +selection = arg[3] +output = arg[4] +print(paste("-----", selection, "------")) + +summarydat = read.table(summaryfile, header=T, sep="\t", fill=T, stringsAsFactors=F)[,c("Sequence.ID", "AA.JUNCTION")] +gappeddat = read.table(gappedfile, header=T, sep="\t", fill=T, stringsAsFactors=F) +head(summarydat) +head(gappeddat) + +dat = merge(gappeddat, summarydat, by="Sequence.ID") +head(dat) + +dat$VGene = gsub("^Homsap ", "", dat$V.GENE.and.allele) +dat$VGene = gsub("[*].*", "", dat$VGene) + +dat$DGene = gsub("^Homsap ", "", dat$D.GENE.and.allele) +dat$DGene = gsub("[*].*", "", dat$DGene) + +dat$JGene = gsub("^Homsap ", "", dat$J.GENE.and.allele) +dat$JGene = gsub("[*].*", "", dat$JGene) + +dat$past = do.call(paste, c(dat[unlist(strsplit(selection, ","))], sep = ":")) + +dat = dat[!duplicated(dat$past), ] + +write.table(x=dat, file=output, sep="\t",quote=F,row.names=F,col.names=T)