Mercurial > repos > rhpvorderman > shm_csr
annotate new_imgt.r @ 6:ea9d5fc4c001 draft default tip
"planemo upload commit 9ada186a78831ca2618ec817a23a77de6adf1a5d"
| author | rhpvorderman |
|---|---|
| date | Wed, 22 Dec 2021 11:29:16 +0000 |
| parents | 64d74ba01a7c |
| children |
| rev | line source |
|---|---|
|
0
64d74ba01a7c
"planemo upload commit 78d1fae87dbcf490e49a9f99e7a06de7328e16d4"
rhpvorderman
parents:
diff
changeset
|
1 args <- commandArgs(trailingOnly = TRUE) |
|
64d74ba01a7c
"planemo upload commit 78d1fae87dbcf490e49a9f99e7a06de7328e16d4"
rhpvorderman
parents:
diff
changeset
|
2 |
|
64d74ba01a7c
"planemo upload commit 78d1fae87dbcf490e49a9f99e7a06de7328e16d4"
rhpvorderman
parents:
diff
changeset
|
3 imgt.dir = args[1] |
|
64d74ba01a7c
"planemo upload commit 78d1fae87dbcf490e49a9f99e7a06de7328e16d4"
rhpvorderman
parents:
diff
changeset
|
4 merged.file = args[2] |
|
64d74ba01a7c
"planemo upload commit 78d1fae87dbcf490e49a9f99e7a06de7328e16d4"
rhpvorderman
parents:
diff
changeset
|
5 gene = args[3] |
|
64d74ba01a7c
"planemo upload commit 78d1fae87dbcf490e49a9f99e7a06de7328e16d4"
rhpvorderman
parents:
diff
changeset
|
6 |
|
64d74ba01a7c
"planemo upload commit 78d1fae87dbcf490e49a9f99e7a06de7328e16d4"
rhpvorderman
parents:
diff
changeset
|
7 merged = read.table(merged.file, header=T, sep="\t", fill=T, stringsAsFactors=F, comment.char="", quote="") |
|
64d74ba01a7c
"planemo upload commit 78d1fae87dbcf490e49a9f99e7a06de7328e16d4"
rhpvorderman
parents:
diff
changeset
|
8 |
|
64d74ba01a7c
"planemo upload commit 78d1fae87dbcf490e49a9f99e7a06de7328e16d4"
rhpvorderman
parents:
diff
changeset
|
9 if(!("Sequence.ID" %in% names(merged))){ #change-o db |
|
64d74ba01a7c
"planemo upload commit 78d1fae87dbcf490e49a9f99e7a06de7328e16d4"
rhpvorderman
parents:
diff
changeset
|
10 print("Change-O DB changing 'SEQUENCE_ID' to 'Sequence.ID'") |
|
64d74ba01a7c
"planemo upload commit 78d1fae87dbcf490e49a9f99e7a06de7328e16d4"
rhpvorderman
parents:
diff
changeset
|
11 names(merged)[which(names[merged] == "SEQUENCE_ID")] = "Sequence.ID" |
|
64d74ba01a7c
"planemo upload commit 78d1fae87dbcf490e49a9f99e7a06de7328e16d4"
rhpvorderman
parents:
diff
changeset
|
12 } |
|
64d74ba01a7c
"planemo upload commit 78d1fae87dbcf490e49a9f99e7a06de7328e16d4"
rhpvorderman
parents:
diff
changeset
|
13 |
|
64d74ba01a7c
"planemo upload commit 78d1fae87dbcf490e49a9f99e7a06de7328e16d4"
rhpvorderman
parents:
diff
changeset
|
14 if(gene != "-"){ |
|
64d74ba01a7c
"planemo upload commit 78d1fae87dbcf490e49a9f99e7a06de7328e16d4"
rhpvorderman
parents:
diff
changeset
|
15 merged = merged[grepl(paste("^", gene, sep=""), merged$best_match),] |
|
64d74ba01a7c
"planemo upload commit 78d1fae87dbcf490e49a9f99e7a06de7328e16d4"
rhpvorderman
parents:
diff
changeset
|
16 } |
|
64d74ba01a7c
"planemo upload commit 78d1fae87dbcf490e49a9f99e7a06de7328e16d4"
rhpvorderman
parents:
diff
changeset
|
17 |
|
64d74ba01a7c
"planemo upload commit 78d1fae87dbcf490e49a9f99e7a06de7328e16d4"
rhpvorderman
parents:
diff
changeset
|
18 if("best_match" %in% names(merged)){ |
|
64d74ba01a7c
"planemo upload commit 78d1fae87dbcf490e49a9f99e7a06de7328e16d4"
rhpvorderman
parents:
diff
changeset
|
19 merged = merged[!grepl("unmatched", merged$best_match),] |
|
64d74ba01a7c
"planemo upload commit 78d1fae87dbcf490e49a9f99e7a06de7328e16d4"
rhpvorderman
parents:
diff
changeset
|
20 } |
|
64d74ba01a7c
"planemo upload commit 78d1fae87dbcf490e49a9f99e7a06de7328e16d4"
rhpvorderman
parents:
diff
changeset
|
21 |
|
64d74ba01a7c
"planemo upload commit 78d1fae87dbcf490e49a9f99e7a06de7328e16d4"
rhpvorderman
parents:
diff
changeset
|
22 nrow_dat = 0 |
|
64d74ba01a7c
"planemo upload commit 78d1fae87dbcf490e49a9f99e7a06de7328e16d4"
rhpvorderman
parents:
diff
changeset
|
23 |
|
64d74ba01a7c
"planemo upload commit 78d1fae87dbcf490e49a9f99e7a06de7328e16d4"
rhpvorderman
parents:
diff
changeset
|
24 for(f in list.files(imgt.dir, pattern="*.txt$")){ |
|
64d74ba01a7c
"planemo upload commit 78d1fae87dbcf490e49a9f99e7a06de7328e16d4"
rhpvorderman
parents:
diff
changeset
|
25 #print(paste("filtering", f)) |
|
64d74ba01a7c
"planemo upload commit 78d1fae87dbcf490e49a9f99e7a06de7328e16d4"
rhpvorderman
parents:
diff
changeset
|
26 path = file.path(imgt.dir, f) |
|
64d74ba01a7c
"planemo upload commit 78d1fae87dbcf490e49a9f99e7a06de7328e16d4"
rhpvorderman
parents:
diff
changeset
|
27 dat = read.table(path, header=T, sep="\t", fill=T, quote="", stringsAsFactors=F, check.names=FALSE, comment.char="") |
|
64d74ba01a7c
"planemo upload commit 78d1fae87dbcf490e49a9f99e7a06de7328e16d4"
rhpvorderman
parents:
diff
changeset
|
28 |
|
64d74ba01a7c
"planemo upload commit 78d1fae87dbcf490e49a9f99e7a06de7328e16d4"
rhpvorderman
parents:
diff
changeset
|
29 dat = dat[dat[,"Sequence ID"] %in% merged$Sequence.ID,] |
|
64d74ba01a7c
"planemo upload commit 78d1fae87dbcf490e49a9f99e7a06de7328e16d4"
rhpvorderman
parents:
diff
changeset
|
30 |
|
64d74ba01a7c
"planemo upload commit 78d1fae87dbcf490e49a9f99e7a06de7328e16d4"
rhpvorderman
parents:
diff
changeset
|
31 nrow_dat = nrow(dat) |
|
64d74ba01a7c
"planemo upload commit 78d1fae87dbcf490e49a9f99e7a06de7328e16d4"
rhpvorderman
parents:
diff
changeset
|
32 |
|
64d74ba01a7c
"planemo upload commit 78d1fae87dbcf490e49a9f99e7a06de7328e16d4"
rhpvorderman
parents:
diff
changeset
|
33 if(nrow(dat) > 0 & grepl("^8_", f)){ #change the FR1 columns to 0 in the "8_..." file |
|
64d74ba01a7c
"planemo upload commit 78d1fae87dbcf490e49a9f99e7a06de7328e16d4"
rhpvorderman
parents:
diff
changeset
|
34 dat[,grepl("^FR1", names(dat))] = 0 |
|
64d74ba01a7c
"planemo upload commit 78d1fae87dbcf490e49a9f99e7a06de7328e16d4"
rhpvorderman
parents:
diff
changeset
|
35 } |
|
64d74ba01a7c
"planemo upload commit 78d1fae87dbcf490e49a9f99e7a06de7328e16d4"
rhpvorderman
parents:
diff
changeset
|
36 |
|
64d74ba01a7c
"planemo upload commit 78d1fae87dbcf490e49a9f99e7a06de7328e16d4"
rhpvorderman
parents:
diff
changeset
|
37 write.table(dat, path, quote=F, sep="\t", row.names=F, col.names=T, na="") |
|
64d74ba01a7c
"planemo upload commit 78d1fae87dbcf490e49a9f99e7a06de7328e16d4"
rhpvorderman
parents:
diff
changeset
|
38 } |
|
64d74ba01a7c
"planemo upload commit 78d1fae87dbcf490e49a9f99e7a06de7328e16d4"
rhpvorderman
parents:
diff
changeset
|
39 |
|
64d74ba01a7c
"planemo upload commit 78d1fae87dbcf490e49a9f99e7a06de7328e16d4"
rhpvorderman
parents:
diff
changeset
|
40 print(paste("Creating new zip for ", gene, "with", nrow_dat, "sequences")) |
