annotate IDchoice/IDchoice_script.R @ 0:931f326198ba draft

Uploaded
author melpetera
date Mon, 14 Jan 2019 08:47:08 -0500
parents
children dfd23f54f61f
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
931f326198ba Uploaded
melpetera
parents:
diff changeset
1 ################################################################################################
931f326198ba Uploaded
melpetera
parents:
diff changeset
2 # ID CHOICE #
931f326198ba Uploaded
melpetera
parents:
diff changeset
3 # #
931f326198ba Uploaded
melpetera
parents:
diff changeset
4 # User: Galaxy #
931f326198ba Uploaded
melpetera
parents:
diff changeset
5 # Starting date: 01-06-2017 #
931f326198ba Uploaded
melpetera
parents:
diff changeset
6 # V-0.1: First version of code #
931f326198ba Uploaded
melpetera
parents:
diff changeset
7 # #
931f326198ba Uploaded
melpetera
parents:
diff changeset
8 # #
931f326198ba Uploaded
melpetera
parents:
diff changeset
9 # Input files: dataMatrix ; Metadata file #
931f326198ba Uploaded
melpetera
parents:
diff changeset
10 # Output files: dataMatrix ; Metadata file #
931f326198ba Uploaded
melpetera
parents:
diff changeset
11 # #
931f326198ba Uploaded
melpetera
parents:
diff changeset
12 # Dependencies: RcheckLibrary.R ; miniTools.R (easyRlibrary) #
931f326198ba Uploaded
melpetera
parents:
diff changeset
13 # #
931f326198ba Uploaded
melpetera
parents:
diff changeset
14 ################################################################################################
931f326198ba Uploaded
melpetera
parents:
diff changeset
15
931f326198ba Uploaded
melpetera
parents:
diff changeset
16 # Parameters (for dev)
931f326198ba Uploaded
melpetera
parents:
diff changeset
17 if(FALSE){
931f326198ba Uploaded
melpetera
parents:
diff changeset
18 DM.name <- "CaracSpe_dataMatrix.txt"
931f326198ba Uploaded
melpetera
parents:
diff changeset
19 meta.name <- "CaracSpe_variableMetadata.txt"
931f326198ba Uploaded
melpetera
parents:
diff changeset
20 metype <- "variable"
931f326198ba Uploaded
melpetera
parents:
diff changeset
21 #coloname <- "namecustom"
931f326198ba Uploaded
melpetera
parents:
diff changeset
22 coloname <- "B"
931f326198ba Uploaded
melpetera
parents:
diff changeset
23 makeun <- "yes"
931f326198ba Uploaded
melpetera
parents:
diff changeset
24 DMout <- "ID_DM.txt"
931f326198ba Uploaded
melpetera
parents:
diff changeset
25 metaout <- paste0("ID_",metype,"meta.txt")
931f326198ba Uploaded
melpetera
parents:
diff changeset
26 }
931f326198ba Uploaded
melpetera
parents:
diff changeset
27
931f326198ba Uploaded
melpetera
parents:
diff changeset
28
931f326198ba Uploaded
melpetera
parents:
diff changeset
29 id.choice <- function(DM.name,meta.name,metype,coloname,makeun,DMout,metaout){
931f326198ba Uploaded
melpetera
parents:
diff changeset
30 # This function allows to replace original IDs with other ones from one metadata table.
931f326198ba Uploaded
melpetera
parents:
diff changeset
31 #
931f326198ba Uploaded
melpetera
parents:
diff changeset
32 # Parameters:
931f326198ba Uploaded
melpetera
parents:
diff changeset
33 # - DM.name, meta.name: dataMatrix and metadata files' access respectively
931f326198ba Uploaded
melpetera
parents:
diff changeset
34 # - metype: "sample" or "variable" depending on metadata content
931f326198ba Uploaded
melpetera
parents:
diff changeset
35 # - coloname: name of the metadata column to be used as new ID
931f326198ba Uploaded
melpetera
parents:
diff changeset
36 # - makeun: "yes" or "no" depending on user choice if new IDs are not unique ("yes"=conversion to unique ID)
931f326198ba Uploaded
melpetera
parents:
diff changeset
37 # - DMout, metaout: output files' access
931f326198ba Uploaded
melpetera
parents:
diff changeset
38
931f326198ba Uploaded
melpetera
parents:
diff changeset
39
931f326198ba Uploaded
melpetera
parents:
diff changeset
40 # Input --------------------------------------------------------------
931f326198ba Uploaded
melpetera
parents:
diff changeset
41
931f326198ba Uploaded
melpetera
parents:
diff changeset
42 DM <- read.table(DM.name,header=TRUE,sep="\t",check.names=FALSE)
931f326198ba Uploaded
melpetera
parents:
diff changeset
43 meta <- read.table(meta.name,header=TRUE,sep="\t",check.names=FALSE,colClasses="character")
931f326198ba Uploaded
melpetera
parents:
diff changeset
44
931f326198ba Uploaded
melpetera
parents:
diff changeset
45 # Table match check
931f326198ba Uploaded
melpetera
parents:
diff changeset
46 table.check <- match2(DM,meta,metype)
931f326198ba Uploaded
melpetera
parents:
diff changeset
47 check.err(table.check)
931f326198ba Uploaded
melpetera
parents:
diff changeset
48
931f326198ba Uploaded
melpetera
parents:
diff changeset
49
931f326198ba Uploaded
melpetera
parents:
diff changeset
50 # Checking unicity of new IDs ----------------------------------------
931f326198ba Uploaded
melpetera
parents:
diff changeset
51
931f326198ba Uploaded
melpetera
parents:
diff changeset
52 numcol <- which(colnames(meta)==coloname)
931f326198ba Uploaded
melpetera
parents:
diff changeset
53 if(length(numcol)==0) {
931f326198ba Uploaded
melpetera
parents:
diff changeset
54 stop(paste0("\n-------\nWarning: no '",coloname,"' column detected in ",metype," metadata!",
931f326198ba Uploaded
melpetera
parents:
diff changeset
55 "\nPlease check your metadata file (column names are case-sensitive).\n-------\n"))
931f326198ba Uploaded
melpetera
parents:
diff changeset
56 }
931f326198ba Uploaded
melpetera
parents:
diff changeset
57
931f326198ba Uploaded
melpetera
parents:
diff changeset
58 unicity <- duplicated(meta[,numcol])
931f326198ba Uploaded
melpetera
parents:
diff changeset
59
931f326198ba Uploaded
melpetera
parents:
diff changeset
60 if(sum(unicity)>0){
931f326198ba Uploaded
melpetera
parents:
diff changeset
61 if(makeun=="no"){
931f326198ba Uploaded
melpetera
parents:
diff changeset
62 #Sending back an explicit error
931f326198ba Uploaded
melpetera
parents:
diff changeset
63 duptable <- t(t(table(meta[,numcol][unicity])+1))
931f326198ba Uploaded
melpetera
parents:
diff changeset
64 stop(paste0("\n-------\nYour '",coloname,"' column contains duplicates:\n"),
931f326198ba Uploaded
melpetera
parents:
diff changeset
65 paste(rownames(duptable),duptable,sep=": ",collapse="\n"),paste0("\nSince identifiers are meant to be unique, ",
931f326198ba Uploaded
melpetera
parents:
diff changeset
66 "please check your data or use the 'Force unicity' option to force unicity.\n-------\n"))
931f326198ba Uploaded
melpetera
parents:
diff changeset
67
931f326198ba Uploaded
melpetera
parents:
diff changeset
68 }else{
931f326198ba Uploaded
melpetera
parents:
diff changeset
69 #Making unique names
931f326198ba Uploaded
melpetera
parents:
diff changeset
70 meta <- cbind(meta,newID=make.unique(meta[,numcol],sep="_"),ori=c(1:nrow(meta)))
931f326198ba Uploaded
melpetera
parents:
diff changeset
71 }
931f326198ba Uploaded
melpetera
parents:
diff changeset
72 }else{
931f326198ba Uploaded
melpetera
parents:
diff changeset
73 #No unicity problem
931f326198ba Uploaded
melpetera
parents:
diff changeset
74 meta <- cbind(meta,newID=meta[,numcol],ori=c(1:nrow(meta)))
931f326198ba Uploaded
melpetera
parents:
diff changeset
75 }
931f326198ba Uploaded
melpetera
parents:
diff changeset
76
931f326198ba Uploaded
melpetera
parents:
diff changeset
77
931f326198ba Uploaded
melpetera
parents:
diff changeset
78 # Merging tables -----------------------------------------------------
931f326198ba Uploaded
melpetera
parents:
diff changeset
79
931f326198ba Uploaded
melpetera
parents:
diff changeset
80 #Transposing the dataMatrix if necessary
931f326198ba Uploaded
melpetera
parents:
diff changeset
81 if(metype=="sample"){
931f326198ba Uploaded
melpetera
parents:
diff changeset
82 rownames(DM) <- DM[,1]
931f326198ba Uploaded
melpetera
parents:
diff changeset
83 DM <- DM[,-1]
931f326198ba Uploaded
melpetera
parents:
diff changeset
84 DM <- t(DM)
931f326198ba Uploaded
melpetera
parents:
diff changeset
85 DM <- data.frame(sample=row.names(DM),DM,check.names=FALSE)
931f326198ba Uploaded
melpetera
parents:
diff changeset
86 rownames(DM) <- NULL
931f326198ba Uploaded
melpetera
parents:
diff changeset
87 }
931f326198ba Uploaded
melpetera
parents:
diff changeset
88
931f326198ba Uploaded
melpetera
parents:
diff changeset
89 comb.data <- merge(x=meta,y=DM,by.x=1,by.y=1)
931f326198ba Uploaded
melpetera
parents:
diff changeset
90 comb.data <- comb.data[order(comb.data$ori),]
931f326198ba Uploaded
melpetera
parents:
diff changeset
91
931f326198ba Uploaded
melpetera
parents:
diff changeset
92
931f326198ba Uploaded
melpetera
parents:
diff changeset
93 # Changing IDs -------------------------------------------------------
931f326198ba Uploaded
melpetera
parents:
diff changeset
94
931f326198ba Uploaded
melpetera
parents:
diff changeset
95 DM <- comb.data[,-c(1:(ncol(meta)-2),ncol(meta))]
931f326198ba Uploaded
melpetera
parents:
diff changeset
96 if(makeun=="no"){
931f326198ba Uploaded
melpetera
parents:
diff changeset
97 comb.data <- comb.data[,c(numcol,which(colnames(meta)!=coloname))]
931f326198ba Uploaded
melpetera
parents:
diff changeset
98 meta <- comb.data[,c(1:(ncol(meta)-2))]
931f326198ba Uploaded
melpetera
parents:
diff changeset
99 }else{
931f326198ba Uploaded
melpetera
parents:
diff changeset
100 meta <- comb.data[,c(ncol(meta)-1,1:(ncol(meta)-2))]
931f326198ba Uploaded
melpetera
parents:
diff changeset
101 }
931f326198ba Uploaded
melpetera
parents:
diff changeset
102
931f326198ba Uploaded
melpetera
parents:
diff changeset
103 #Transposing back the dataMatrix if necessary
931f326198ba Uploaded
melpetera
parents:
diff changeset
104 if(metype=="sample"){
931f326198ba Uploaded
melpetera
parents:
diff changeset
105 rownames(DM) <- DM[,1]
931f326198ba Uploaded
melpetera
parents:
diff changeset
106 DM <- DM[,-1]
931f326198ba Uploaded
melpetera
parents:
diff changeset
107 DM <- t(DM)
931f326198ba Uploaded
melpetera
parents:
diff changeset
108 DM <- data.frame(sample=row.names(DM),DM,check.names=FALSE)
931f326198ba Uploaded
melpetera
parents:
diff changeset
109 rownames(DM) <- NULL
931f326198ba Uploaded
melpetera
parents:
diff changeset
110 }
931f326198ba Uploaded
melpetera
parents:
diff changeset
111
931f326198ba Uploaded
melpetera
parents:
diff changeset
112
931f326198ba Uploaded
melpetera
parents:
diff changeset
113 # Output -------------------------------------------------------------
931f326198ba Uploaded
melpetera
parents:
diff changeset
114
931f326198ba Uploaded
melpetera
parents:
diff changeset
115 # Writing the table
931f326198ba Uploaded
melpetera
parents:
diff changeset
116 write.table(DM,DMout,sep="\t",quote=FALSE,row.names=FALSE)
931f326198ba Uploaded
melpetera
parents:
diff changeset
117 write.table(meta,metaout,sep="\t",quote=FALSE,row.names=FALSE)
931f326198ba Uploaded
melpetera
parents:
diff changeset
118
931f326198ba Uploaded
melpetera
parents:
diff changeset
119
931f326198ba Uploaded
melpetera
parents:
diff changeset
120 } # End of id.choice
931f326198ba Uploaded
melpetera
parents:
diff changeset
121
931f326198ba Uploaded
melpetera
parents:
diff changeset
122
931f326198ba Uploaded
melpetera
parents:
diff changeset
123 # Typical function call
931f326198ba Uploaded
melpetera
parents:
diff changeset
124 # id.choice(DM.name,meta.name,metype,coloname,makeun,DMout,metaout)