comparison filter.r @ 5:d8de51314d3f draft

Uploaded
author davidvanzessen
date Thu, 14 Aug 2014 07:17:26 -0400
parents 38c038c17d0c
children 82016fd934c8
comparison
equal deleted inserted replaced
4:1726dc8f3e0a 5:d8de51314d3f
1 arg = commandArgs(TRUE) 1 arg = commandArgs(TRUE)
2 summaryfile = arg[1] 2 summaryfile = arg[1]
3 gappedfile = arg[2] 3 gappedfile = arg[2]
4 selection = arg[3] 4 selection = arg[3]
5 output = arg[4] 5 output = arg[4]
6 print(paste("-----", selection, "------")) 6 print(paste("selection = ", selection))
7 7
8 summarydat = read.table(summaryfile, header=T, sep="\t", fill=T, stringsAsFactors=F)[,c("Sequence.ID", "AA.JUNCTION")] 8
9 summarydat = read.table(summaryfile, header=T, sep="\t", fill=T, stringsAsFactors=F)
9 gappeddat = read.table(gappedfile, header=T, sep="\t", fill=T, stringsAsFactors=F) 10 gappeddat = read.table(gappedfile, header=T, sep="\t", fill=T, stringsAsFactors=F)
10 head(summarydat)
11 head(gappeddat)
12 11
13 dat = merge(gappeddat, summarydat, by="Sequence.ID") 12
14 head(dat) 13
14 #dat = data.frame(merge(gappeddat, summarydat, by="Sequence.ID", all.x=T))
15
16 dat = cbind(gappeddat, summarydat$AA.JUNCTION)
17
18 colnames(dat)[length(dat)] = "AA.JUNCTION"
15 19
16 dat$VGene = gsub("^Homsap ", "", dat$V.GENE.and.allele) 20 dat$VGene = gsub("^Homsap ", "", dat$V.GENE.and.allele)
17 dat$VGene = gsub("[*].*", "", dat$VGene) 21 dat$VGene = gsub("[*].*", "", dat$VGene)
18 22
19 dat$DGene = gsub("^Homsap ", "", dat$D.GENE.and.allele) 23 dat$DGene = gsub("^Homsap ", "", dat$D.GENE.and.allele)
20 dat$DGene = gsub("[*].*", "", dat$DGene) 24 dat$DGene = gsub("[*].*", "", dat$DGene)
21 25
22 dat$JGene = gsub("^Homsap ", "", dat$J.GENE.and.allele) 26 dat$JGene = gsub("^Homsap ", "", dat$J.GENE.and.allele)
23 dat$JGene = gsub("[*].*", "", dat$JGene) 27 dat$JGene = gsub("[*].*", "", dat$JGene)
24 28
29 print(str(dat))
30
25 dat$past = do.call(paste, c(dat[unlist(strsplit(selection, ","))], sep = ":")) 31 dat$past = do.call(paste, c(dat[unlist(strsplit(selection, ","))], sep = ":"))
26 32
27 dat = dat[!duplicated(dat$past), ] 33 dat = dat[!duplicated(dat$past), ]
28 34
29 write.table(x=dat, file=output, sep="\t",quote=F,row.names=F,col.names=T) 35 write.table(x=dat, file=output, sep="\t",quote=F,row.names=F,col.names=T)