annotate filter.r @ 3:38c038c17d0c draft

Uploaded
author davidvanzessen
date Wed, 13 Aug 2014 09:07:54 -0400
parents
children d8de51314d3f
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
3
38c038c17d0c Uploaded
davidvanzessen
parents:
diff changeset
1 arg = commandArgs(TRUE)
38c038c17d0c Uploaded
davidvanzessen
parents:
diff changeset
2 summaryfile = arg[1]
38c038c17d0c Uploaded
davidvanzessen
parents:
diff changeset
3 gappedfile = arg[2]
38c038c17d0c Uploaded
davidvanzessen
parents:
diff changeset
4 selection = arg[3]
38c038c17d0c Uploaded
davidvanzessen
parents:
diff changeset
5 output = arg[4]
38c038c17d0c Uploaded
davidvanzessen
parents:
diff changeset
6 print(paste("-----", selection, "------"))
38c038c17d0c Uploaded
davidvanzessen
parents:
diff changeset
7
38c038c17d0c Uploaded
davidvanzessen
parents:
diff changeset
8 summarydat = read.table(summaryfile, header=T, sep="\t", fill=T, stringsAsFactors=F)[,c("Sequence.ID", "AA.JUNCTION")]
38c038c17d0c Uploaded
davidvanzessen
parents:
diff changeset
9 gappeddat = read.table(gappedfile, header=T, sep="\t", fill=T, stringsAsFactors=F)
38c038c17d0c Uploaded
davidvanzessen
parents:
diff changeset
10 head(summarydat)
38c038c17d0c Uploaded
davidvanzessen
parents:
diff changeset
11 head(gappeddat)
38c038c17d0c Uploaded
davidvanzessen
parents:
diff changeset
12
38c038c17d0c Uploaded
davidvanzessen
parents:
diff changeset
13 dat = merge(gappeddat, summarydat, by="Sequence.ID")
38c038c17d0c Uploaded
davidvanzessen
parents:
diff changeset
14 head(dat)
38c038c17d0c Uploaded
davidvanzessen
parents:
diff changeset
15
38c038c17d0c Uploaded
davidvanzessen
parents:
diff changeset
16 dat$VGene = gsub("^Homsap ", "", dat$V.GENE.and.allele)
38c038c17d0c Uploaded
davidvanzessen
parents:
diff changeset
17 dat$VGene = gsub("[*].*", "", dat$VGene)
38c038c17d0c Uploaded
davidvanzessen
parents:
diff changeset
18
38c038c17d0c Uploaded
davidvanzessen
parents:
diff changeset
19 dat$DGene = gsub("^Homsap ", "", dat$D.GENE.and.allele)
38c038c17d0c Uploaded
davidvanzessen
parents:
diff changeset
20 dat$DGene = gsub("[*].*", "", dat$DGene)
38c038c17d0c Uploaded
davidvanzessen
parents:
diff changeset
21
38c038c17d0c Uploaded
davidvanzessen
parents:
diff changeset
22 dat$JGene = gsub("^Homsap ", "", dat$J.GENE.and.allele)
38c038c17d0c Uploaded
davidvanzessen
parents:
diff changeset
23 dat$JGene = gsub("[*].*", "", dat$JGene)
38c038c17d0c Uploaded
davidvanzessen
parents:
diff changeset
24
38c038c17d0c Uploaded
davidvanzessen
parents:
diff changeset
25 dat$past = do.call(paste, c(dat[unlist(strsplit(selection, ","))], sep = ":"))
38c038c17d0c Uploaded
davidvanzessen
parents:
diff changeset
26
38c038c17d0c Uploaded
davidvanzessen
parents:
diff changeset
27 dat = dat[!duplicated(dat$past), ]
38c038c17d0c Uploaded
davidvanzessen
parents:
diff changeset
28
38c038c17d0c Uploaded
davidvanzessen
parents:
diff changeset
29 write.table(x=dat, file=output, sep="\t",quote=F,row.names=F,col.names=T)