Mercurial > repos > mvdbeek > r_goseq_1_22_0
annotate getgo.r @ 9:04b9c519d3e1 draft
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 6574ffcb63770ff8de2d496894292cb7ce0492a8
| author | mvdbeek |
|---|---|
| date | Thu, 31 Mar 2016 12:23:45 -0400 |
| parents | 0e9424413ab0 |
| children |
| rev | line source |
|---|---|
|
6
0e9424413ab0
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit f95b47ed1a09ce14d3b565e8ea56d8bf12c35814-dirty
mvdbeek
parents:
5
diff
changeset
|
1 options( show.error.messages=F, error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } ) |
|
0e9424413ab0
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit f95b47ed1a09ce14d3b565e8ea56d8bf12c35814-dirty
mvdbeek
parents:
5
diff
changeset
|
2 |
|
0e9424413ab0
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit f95b47ed1a09ce14d3b565e8ea56d8bf12c35814-dirty
mvdbeek
parents:
5
diff
changeset
|
3 # we need that to not crash galaxy with an UTF8 error on German LC settings. |
|
0e9424413ab0
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit f95b47ed1a09ce14d3b565e8ea56d8bf12c35814-dirty
mvdbeek
parents:
5
diff
changeset
|
4 loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") |
|
0e9424413ab0
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit f95b47ed1a09ce14d3b565e8ea56d8bf12c35814-dirty
mvdbeek
parents:
5
diff
changeset
|
5 |
|
0e9424413ab0
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit f95b47ed1a09ce14d3b565e8ea56d8bf12c35814-dirty
mvdbeek
parents:
5
diff
changeset
|
6 suppressPackageStartupMessages({ |
|
0e9424413ab0
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit f95b47ed1a09ce14d3b565e8ea56d8bf12c35814-dirty
mvdbeek
parents:
5
diff
changeset
|
7 library("goseq") |
|
0e9424413ab0
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit f95b47ed1a09ce14d3b565e8ea56d8bf12c35814-dirty
mvdbeek
parents:
5
diff
changeset
|
8 library("optparse") |
|
0e9424413ab0
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit f95b47ed1a09ce14d3b565e8ea56d8bf12c35814-dirty
mvdbeek
parents:
5
diff
changeset
|
9 library("reshape2") |
|
0e9424413ab0
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit f95b47ed1a09ce14d3b565e8ea56d8bf12c35814-dirty
mvdbeek
parents:
5
diff
changeset
|
10 }) |
|
0e9424413ab0
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit f95b47ed1a09ce14d3b565e8ea56d8bf12c35814-dirty
mvdbeek
parents:
5
diff
changeset
|
11 |
|
5
b79c65c90744
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 81aedf1b50849160f6c048c0da4bb1038bb813a5
mvdbeek
parents:
diff
changeset
|
12 sink(stdout(), type = "message") |
|
b79c65c90744
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 81aedf1b50849160f6c048c0da4bb1038bb813a5
mvdbeek
parents:
diff
changeset
|
13 |
|
b79c65c90744
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 81aedf1b50849160f6c048c0da4bb1038bb813a5
mvdbeek
parents:
diff
changeset
|
14 option_list <- list( |
|
9
04b9c519d3e1
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 6574ffcb63770ff8de2d496894292cb7ce0492a8
mvdbeek
parents:
6
diff
changeset
|
15 make_option(c("-p", "--package"), type="character", help = "Genome [used for looking up GO categories]"), |
|
5
b79c65c90744
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 81aedf1b50849160f6c048c0da4bb1038bb813a5
mvdbeek
parents:
diff
changeset
|
16 make_option(c("-i", "--gene_id"), type="character", help="Gene ID format"), |
|
b79c65c90744
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 81aedf1b50849160f6c048c0da4bb1038bb813a5
mvdbeek
parents:
diff
changeset
|
17 make_option(c("-c", "--cats"), type="character", help="Comma-seperated list of categories to fetch"), |
|
b79c65c90744
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 81aedf1b50849160f6c048c0da4bb1038bb813a5
mvdbeek
parents:
diff
changeset
|
18 make_option(c("-o", "--output"), type="character", help="Path to output file") |
|
b79c65c90744
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 81aedf1b50849160f6c048c0da4bb1038bb813a5
mvdbeek
parents:
diff
changeset
|
19 ) |
|
b79c65c90744
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 81aedf1b50849160f6c048c0da4bb1038bb813a5
mvdbeek
parents:
diff
changeset
|
20 |
|
b79c65c90744
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 81aedf1b50849160f6c048c0da4bb1038bb813a5
mvdbeek
parents:
diff
changeset
|
21 parser <- OptionParser(usage = "%prog [options] file", option_list=option_list) |
|
b79c65c90744
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 81aedf1b50849160f6c048c0da4bb1038bb813a5
mvdbeek
parents:
diff
changeset
|
22 args = parse_args(parser) |
|
b79c65c90744
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 81aedf1b50849160f6c048c0da4bb1038bb813a5
mvdbeek
parents:
diff
changeset
|
23 |
|
6
0e9424413ab0
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit f95b47ed1a09ce14d3b565e8ea56d8bf12c35814-dirty
mvdbeek
parents:
5
diff
changeset
|
24 # vars |
|
5
b79c65c90744
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 81aedf1b50849160f6c048c0da4bb1038bb813a5
mvdbeek
parents:
diff
changeset
|
25 |
|
9
04b9c519d3e1
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 6574ffcb63770ff8de2d496894292cb7ce0492a8
mvdbeek
parents:
6
diff
changeset
|
26 package = args$package |
|
5
b79c65c90744
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 81aedf1b50849160f6c048c0da4bb1038bb813a5
mvdbeek
parents:
diff
changeset
|
27 gene_id = args$gene_id |
|
b79c65c90744
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 81aedf1b50849160f6c048c0da4bb1038bb813a5
mvdbeek
parents:
diff
changeset
|
28 output = args$output |
|
b79c65c90744
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 81aedf1b50849160f6c048c0da4bb1038bb813a5
mvdbeek
parents:
diff
changeset
|
29 cats = unlist(strsplit(args$cats, ',')) |
|
6
0e9424413ab0
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit f95b47ed1a09ce14d3b565e8ea56d8bf12c35814-dirty
mvdbeek
parents:
5
diff
changeset
|
30 |
|
9
04b9c519d3e1
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 6574ffcb63770ff8de2d496894292cb7ce0492a8
mvdbeek
parents:
6
diff
changeset
|
31 get_categories = function(package_str, gen, cat) { |
|
04b9c519d3e1
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 6574ffcb63770ff8de2d496894292cb7ce0492a8
mvdbeek
parents:
6
diff
changeset
|
32 # gen should be ENSEMBL, UNIGENE, REFSEQ, SYMBOL or GENENAME |
|
04b9c519d3e1
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 6574ffcb63770ff8de2d496894292cb7ce0492a8
mvdbeek
parents:
6
diff
changeset
|
33 # package should be org.Xx.eg.db |
|
04b9c519d3e1
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 6574ffcb63770ff8de2d496894292cb7ce0492a8
mvdbeek
parents:
6
diff
changeset
|
34 # cat should be PMID, GO2ALLEGS, ENZYME or PATH |
|
04b9c519d3e1
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 6574ffcb63770ff8de2d496894292cb7ce0492a8
mvdbeek
parents:
6
diff
changeset
|
35 library(package_str, character.only = TRUE) |
|
04b9c519d3e1
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 6574ffcb63770ff8de2d496894292cb7ce0492a8
mvdbeek
parents:
6
diff
changeset
|
36 package = eval( parse( text=package_str ) ) |
|
04b9c519d3e1
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 6574ffcb63770ff8de2d496894292cb7ce0492a8
mvdbeek
parents:
6
diff
changeset
|
37 if( cat %in% c("GO2ALLEGS", "GO2ALLTAIRS", "GO2ALLORFS") ) { |
|
04b9c519d3e1
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 6574ffcb63770ff8de2d496894292cb7ce0492a8
mvdbeek
parents:
6
diff
changeset
|
38 cat = "GOALL" |
|
04b9c519d3e1
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 6574ffcb63770ff8de2d496894292cb7ce0492a8
mvdbeek
parents:
6
diff
changeset
|
39 } |
|
04b9c519d3e1
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 6574ffcb63770ff8de2d496894292cb7ce0492a8
mvdbeek
parents:
6
diff
changeset
|
40 if(package_str == "org.Pf.plasmo.db") { |
|
04b9c519d3e1
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 6574ffcb63770ff8de2d496894292cb7ce0492a8
mvdbeek
parents:
6
diff
changeset
|
41 keytype = "ORF" |
|
04b9c519d3e1
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 6574ffcb63770ff8de2d496894292cb7ce0492a8
mvdbeek
parents:
6
diff
changeset
|
42 } else if(package_str == "org.At.tair.db") { |
|
04b9c519d3e1
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 6574ffcb63770ff8de2d496894292cb7ce0492a8
mvdbeek
parents:
6
diff
changeset
|
43 keytype = "TAIR" |
|
04b9c519d3e1
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 6574ffcb63770ff8de2d496894292cb7ce0492a8
mvdbeek
parents:
6
diff
changeset
|
44 } else { |
|
04b9c519d3e1
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 6574ffcb63770ff8de2d496894292cb7ce0492a8
mvdbeek
parents:
6
diff
changeset
|
45 keytype = "ENTREZID" |
|
04b9c519d3e1
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 6574ffcb63770ff8de2d496894292cb7ce0492a8
mvdbeek
parents:
6
diff
changeset
|
46 } |
|
04b9c519d3e1
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 6574ffcb63770ff8de2d496894292cb7ce0492a8
mvdbeek
parents:
6
diff
changeset
|
47 entrez_cat = select(package, keys(package), cat, keytype) |
|
04b9c519d3e1
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 6574ffcb63770ff8de2d496894292cb7ce0492a8
mvdbeek
parents:
6
diff
changeset
|
48 entrez_cat = entrez_cat[complete.cases(entrez_cat),] |
|
04b9c519d3e1
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 6574ffcb63770ff8de2d496894292cb7ce0492a8
mvdbeek
parents:
6
diff
changeset
|
49 if( cat != "GOALL" ) { |
|
04b9c519d3e1
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 6574ffcb63770ff8de2d496894292cb7ce0492a8
mvdbeek
parents:
6
diff
changeset
|
50 # add the origin of the term, so that there are no duplicate values e.g between ENZYME and PATH |
|
04b9c519d3e1
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 6574ffcb63770ff8de2d496894292cb7ce0492a8
mvdbeek
parents:
6
diff
changeset
|
51 entrez_cat[,2] = sapply(entrez_cat[,2], function(x) paste(cat, x, sep=":")) |
|
04b9c519d3e1
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 6574ffcb63770ff8de2d496894292cb7ce0492a8
mvdbeek
parents:
6
diff
changeset
|
52 } else { |
|
04b9c519d3e1
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 6574ffcb63770ff8de2d496894292cb7ce0492a8
mvdbeek
parents:
6
diff
changeset
|
53 entrez_cat = entrez_cat[,c(1,2)] # we are discarding ontology (MF, CC, BP) and evidence class here |
|
04b9c519d3e1
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 6574ffcb63770ff8de2d496894292cb7ce0492a8
mvdbeek
parents:
6
diff
changeset
|
54 } |
|
04b9c519d3e1
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 6574ffcb63770ff8de2d496894292cb7ce0492a8
mvdbeek
parents:
6
diff
changeset
|
55 colnames(entrez_cat) = c(gen, "category") |
|
04b9c519d3e1
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 6574ffcb63770ff8de2d496894292cb7ce0492a8
mvdbeek
parents:
6
diff
changeset
|
56 if( gen == "ENTREZ" ) { |
|
04b9c519d3e1
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 6574ffcb63770ff8de2d496894292cb7ce0492a8
mvdbeek
parents:
6
diff
changeset
|
57 return( entrez_cat ) |
|
04b9c519d3e1
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 6574ffcb63770ff8de2d496894292cb7ce0492a8
mvdbeek
parents:
6
diff
changeset
|
58 } else { |
|
04b9c519d3e1
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 6574ffcb63770ff8de2d496894292cb7ce0492a8
mvdbeek
parents:
6
diff
changeset
|
59 # We map ENTREZ to `gen`, but are potentially loosing gene identifiers where multiple identifiers match a single ENTREZ gene id. |
|
04b9c519d3e1
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 6574ffcb63770ff8de2d496894292cb7ce0492a8
mvdbeek
parents:
6
diff
changeset
|
60 entrez_cat[,1] = mapIds(package, keys=as.character(entrez_cat[,1]), keytype=keytype, column=gen, multiVals="first") |
|
04b9c519d3e1
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 6574ffcb63770ff8de2d496894292cb7ce0492a8
mvdbeek
parents:
6
diff
changeset
|
61 entrez_cat = entrez_cat[complete.cases(entrez_cat),] |
|
04b9c519d3e1
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 6574ffcb63770ff8de2d496894292cb7ce0492a8
mvdbeek
parents:
6
diff
changeset
|
62 return(entrez_cat) |
|
04b9c519d3e1
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 6574ffcb63770ff8de2d496894292cb7ce0492a8
mvdbeek
parents:
6
diff
changeset
|
63 } |
|
04b9c519d3e1
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 6574ffcb63770ff8de2d496894292cb7ce0492a8
mvdbeek
parents:
6
diff
changeset
|
64 } |
|
5
b79c65c90744
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 81aedf1b50849160f6c048c0da4bb1038bb813a5
mvdbeek
parents:
diff
changeset
|
65 |
|
9
04b9c519d3e1
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 6574ffcb63770ff8de2d496894292cb7ce0492a8
mvdbeek
parents:
6
diff
changeset
|
66 result = lapply( cats, function(x) get_categories(package, gene_id, x ) ) |
|
04b9c519d3e1
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 6574ffcb63770ff8de2d496894292cb7ce0492a8
mvdbeek
parents:
6
diff
changeset
|
67 result = do.call(rbind, result) |
|
04b9c519d3e1
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 6574ffcb63770ff8de2d496894292cb7ce0492a8
mvdbeek
parents:
6
diff
changeset
|
68 |
|
04b9c519d3e1
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 6574ffcb63770ff8de2d496894292cb7ce0492a8
mvdbeek
parents:
6
diff
changeset
|
69 write.table(result, output, sep="\t", col.names = FALSE, row.names = FALSE, quote = FALSE) |
|
04b9c519d3e1
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit 6574ffcb63770ff8de2d496894292cb7ce0492a8
mvdbeek
parents:
6
diff
changeset
|
70 sessionInfo() |
