annotate GetRPKMvalues/GetRPKMvalues.xml @ 0:888cb13321fa draft default tip

Uploaded
author hspitia
date Sun, 07 Sep 2014 02:02:23 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
888cb13321fa Uploaded
hspitia
parents:
diff changeset
1 <tool id="GetRPKMvalues" name="GetRPKMvalues" version="0.01">
888cb13321fa Uploaded
hspitia
parents:
diff changeset
2 <description> from BAM file</description>
888cb13321fa Uploaded
hspitia
parents:
diff changeset
3
888cb13321fa Uploaded
hspitia
parents:
diff changeset
4 <command interpreter="python">
888cb13321fa Uploaded
hspitia
parents:
diff changeset
5
888cb13321fa Uploaded
hspitia
parents:
diff changeset
6 GetRPKMvalues.py --script_path "$runMe" --interpreter "Rscript"
888cb13321fa Uploaded
hspitia
parents:
diff changeset
7 --tool_name "GetRPKMvalues" --input_tab "$input1" --output_dir "./" --output_tab "$tab_file"
888cb13321fa Uploaded
hspitia
parents:
diff changeset
8 </command>
888cb13321fa Uploaded
hspitia
parents:
diff changeset
9 <inputs>
888cb13321fa Uploaded
hspitia
parents:
diff changeset
10 <param name="input1" type="data" format="bam" label="Select a suitable input file from your history"/>
888cb13321fa Uploaded
hspitia
parents:
diff changeset
11 <param name="job_name" type="text" label="Supply a name for the outputs to remind you what they contain" value="GetRPKMvalues"/>
888cb13321fa Uploaded
hspitia
parents:
diff changeset
12
888cb13321fa Uploaded
hspitia
parents:
diff changeset
13 </inputs>
888cb13321fa Uploaded
hspitia
parents:
diff changeset
14 <outputs>
888cb13321fa Uploaded
hspitia
parents:
diff changeset
15 <data format="tabular" name="tab_file" label="${job_name}"/>
888cb13321fa Uploaded
hspitia
parents:
diff changeset
16
888cb13321fa Uploaded
hspitia
parents:
diff changeset
17 </outputs>
888cb13321fa Uploaded
hspitia
parents:
diff changeset
18 <configfiles>
888cb13321fa Uploaded
hspitia
parents:
diff changeset
19 <configfile name="runMe">
888cb13321fa Uploaded
hspitia
parents:
diff changeset
20 #!/usr/bin/Rscript
888cb13321fa Uploaded
hspitia
parents:
diff changeset
21
888cb13321fa Uploaded
hspitia
parents:
diff changeset
22 # source("http://bioconductor.org/biocLite.R")
888cb13321fa Uploaded
hspitia
parents:
diff changeset
23 # biocLite("Rsamtools")
888cb13321fa Uploaded
hspitia
parents:
diff changeset
24
888cb13321fa Uploaded
hspitia
parents:
diff changeset
25 ourargs = commandArgs(trailingOnly = TRUE)
888cb13321fa Uploaded
hspitia
parents:
diff changeset
26 inf = ourargs[1]
888cb13321fa Uploaded
hspitia
parents:
diff changeset
27 outf = ourargs[2]
888cb13321fa Uploaded
hspitia
parents:
diff changeset
28
888cb13321fa Uploaded
hspitia
parents:
diff changeset
29
888cb13321fa Uploaded
hspitia
parents:
diff changeset
30 library(Rsamtools)
888cb13321fa Uploaded
hspitia
parents:
diff changeset
31 library(foreach)
888cb13321fa Uploaded
hspitia
parents:
diff changeset
32
888cb13321fa Uploaded
hspitia
parents:
diff changeset
33 GetMappedReadsFreqs = function(bam.file){
888cb13321fa Uploaded
hspitia
parents:
diff changeset
34 require(Rsamtools)
888cb13321fa Uploaded
hspitia
parents:
diff changeset
35 mapped.flag = scanBamFlag(isUnmappedQuery = FALSE) # Get all mapped reads
888cb13321fa Uploaded
hspitia
parents:
diff changeset
36 mapped.param = ScanBamParam(flag = mapped.flag, what = c("rname")) # Get only the reference name to which reads were mapped
888cb13321fa Uploaded
hspitia
parents:
diff changeset
37 bam.index.file = indexBam(bam.file)
888cb13321fa Uploaded
hspitia
parents:
diff changeset
38 mapped.scan.res = scanBam(param = mapped.param, file = bam.file, index = bam.index.file)
888cb13321fa Uploaded
hspitia
parents:
diff changeset
39 mapped = as.data.frame(table(mapped.scan.res[[1]]\$rname))
888cb13321fa Uploaded
hspitia
parents:
diff changeset
40 colnames(mapped) = c("ref.sequence", "mapped.reads")
888cb13321fa Uploaded
hspitia
parents:
diff changeset
41 mapped
888cb13321fa Uploaded
hspitia
parents:
diff changeset
42 }
888cb13321fa Uploaded
hspitia
parents:
diff changeset
43
888cb13321fa Uploaded
hspitia
parents:
diff changeset
44 GetRefSeqsLengths = function(bam.file){
888cb13321fa Uploaded
hspitia
parents:
diff changeset
45 require(Rsamtools)
888cb13321fa Uploaded
hspitia
parents:
diff changeset
46 header = scanBamHeader(files = bam.file)
888cb13321fa Uploaded
hspitia
parents:
diff changeset
47 ref.seqs.data = as.data.frame(header[[1]]\$targets)
888cb13321fa Uploaded
hspitia
parents:
diff changeset
48 data.frame(length = header[[1]]\$targets, row.names = names(header[[1]]\$targets))
888cb13321fa Uploaded
hspitia
parents:
diff changeset
49 }
888cb13321fa Uploaded
hspitia
parents:
diff changeset
50
888cb13321fa Uploaded
hspitia
parents:
diff changeset
51 GetIdxStats = function(bam.file) {
888cb13321fa Uploaded
hspitia
parents:
diff changeset
52 mapped = GetMappedReadsFreqs(bam.file = bam.file)
888cb13321fa Uploaded
hspitia
parents:
diff changeset
53 ref.seqs.lengths = GetRefSeqsLengths(bam.file = bam.file)
888cb13321fa Uploaded
hspitia
parents:
diff changeset
54 merge(x = mapped, y = ref.seqs.lengths,
888cb13321fa Uploaded
hspitia
parents:
diff changeset
55 by.x = "ref.sequence", by.y = "row.names")
888cb13321fa Uploaded
hspitia
parents:
diff changeset
56 }
888cb13321fa Uploaded
hspitia
parents:
diff changeset
57
888cb13321fa Uploaded
hspitia
parents:
diff changeset
58 GetRPKMValues = function(bam.file) {
888cb13321fa Uploaded
hspitia
parents:
diff changeset
59 bam.stats = GetIdxStats(bam.file = bam.file)
888cb13321fa Uploaded
hspitia
parents:
diff changeset
60 total.mapped = sum(bam.stats\$mapped.reads)
888cb13321fa Uploaded
hspitia
parents:
diff changeset
61 mapped.factor = total.mapped / 10^9
888cb13321fa Uploaded
hspitia
parents:
diff changeset
62
888cb13321fa Uploaded
hspitia
parents:
diff changeset
63 bam.stats\$RPKM = (bam.stats\$mapped.reads / (bam.stats\$length * mapped.factor))
888cb13321fa Uploaded
hspitia
parents:
diff changeset
64 data.frame(RPKM = bam.stats\$RPKM, row.names = bam.stats\$ref.sequence)
888cb13321fa Uploaded
hspitia
parents:
diff changeset
65 }
888cb13321fa Uploaded
hspitia
parents:
diff changeset
66
888cb13321fa Uploaded
hspitia
parents:
diff changeset
67 bam.files = inf
888cb13321fa Uploaded
hspitia
parents:
diff changeset
68
888cb13321fa Uploaded
hspitia
parents:
diff changeset
69 if(any(!file.exists(bam.files))){
888cb13321fa Uploaded
hspitia
parents:
diff changeset
70 not.found = bam.files[!file.exists(bam.files)]
888cb13321fa Uploaded
hspitia
parents:
diff changeset
71 out.names = paste(not.found, collapse = "\n ")
888cb13321fa Uploaded
hspitia
parents:
diff changeset
72 stop(paste("One or more files does not exist:\n", out.names))
888cb13321fa Uploaded
hspitia
parents:
diff changeset
73 } else {
888cb13321fa Uploaded
hspitia
parents:
diff changeset
74 rpkm.data = foreach(c.bam.file = bam.files, .combine = "cbind") %do% {
888cb13321fa Uploaded
hspitia
parents:
diff changeset
75 GetRPKMValues(bam.file = c.bam.file)
888cb13321fa Uploaded
hspitia
parents:
diff changeset
76 }
888cb13321fa Uploaded
hspitia
parents:
diff changeset
77 colnames(rpkm.data) = sapply(bam.files, basename)
888cb13321fa Uploaded
hspitia
parents:
diff changeset
78 out.data = cbind(ref.sequence = rownames(rpkm.data), rpkm.data)
888cb13321fa Uploaded
hspitia
parents:
diff changeset
79 write.table(out.data, quote = FALSE, sep = "\t", row.names = FALSE, col.names = TRUE,
888cb13321fa Uploaded
hspitia
parents:
diff changeset
80 file = outf)
888cb13321fa Uploaded
hspitia
parents:
diff changeset
81 }
888cb13321fa Uploaded
hspitia
parents:
diff changeset
82 </configfile>
888cb13321fa Uploaded
hspitia
parents:
diff changeset
83 </configfiles>
888cb13321fa Uploaded
hspitia
parents:
diff changeset
84
888cb13321fa Uploaded
hspitia
parents:
diff changeset
85
888cb13321fa Uploaded
hspitia
parents:
diff changeset
86 <tests>
888cb13321fa Uploaded
hspitia
parents:
diff changeset
87 <test>
888cb13321fa Uploaded
hspitia
parents:
diff changeset
88 <param name="input1" value="GetRPKMvalues_test1_input.xls" ftype="bam"/>
888cb13321fa Uploaded
hspitia
parents:
diff changeset
89 <param name="job_name" value="test1"/>
888cb13321fa Uploaded
hspitia
parents:
diff changeset
90 <param name="runMe" value="$runMe"/>
888cb13321fa Uploaded
hspitia
parents:
diff changeset
91 <output name="tab_file" file="GetRPKMvalues_test1_output.xls" ftype="tabular"/>
888cb13321fa Uploaded
hspitia
parents:
diff changeset
92 </test>
888cb13321fa Uploaded
hspitia
parents:
diff changeset
93 </tests>
888cb13321fa Uploaded
hspitia
parents:
diff changeset
94
888cb13321fa Uploaded
hspitia
parents:
diff changeset
95
888cb13321fa Uploaded
hspitia
parents:
diff changeset
96 <help>
888cb13321fa Uploaded
hspitia
parents:
diff changeset
97
888cb13321fa Uploaded
hspitia
parents:
diff changeset
98
888cb13321fa Uploaded
hspitia
parents:
diff changeset
99 This tool computes the RPKM values from a given BAM file.
888cb13321fa Uploaded
hspitia
parents:
diff changeset
100
888cb13321fa Uploaded
hspitia
parents:
diff changeset
101 This tool is a R script and requires Rsamtools and foreach packages.
888cb13321fa Uploaded
hspitia
parents:
diff changeset
102
888cb13321fa Uploaded
hspitia
parents:
diff changeset
103 The output is a tabular file with two columns:
888cb13321fa Uploaded
hspitia
parents:
diff changeset
104 1. ref.sequence: which contains the names of reference sequences (genes/transcripts), and
888cb13321fa Uploaded
hspitia
parents:
diff changeset
105
888cb13321fa Uploaded
hspitia
parents:
diff changeset
106 2. dataset_#.dat: which referes to the name of input dataset (BAM file specified) and contains the RPKM values for each reference sequence. The symbol # represents a consecutive number automatically assigned by Galaxy.
888cb13321fa Uploaded
hspitia
parents:
diff changeset
107
888cb13321fa Uploaded
hspitia
parents:
diff changeset
108 **Script**
888cb13321fa Uploaded
hspitia
parents:
diff changeset
109 Pressing execute will run the following code over your input file and generate some outputs in your history::
888cb13321fa Uploaded
hspitia
parents:
diff changeset
110
888cb13321fa Uploaded
hspitia
parents:
diff changeset
111
888cb13321fa Uploaded
hspitia
parents:
diff changeset
112 #!/usr/bin/Rscript
888cb13321fa Uploaded
hspitia
parents:
diff changeset
113
888cb13321fa Uploaded
hspitia
parents:
diff changeset
114 # source("http://bioconductor.org/biocLite.R")
888cb13321fa Uploaded
hspitia
parents:
diff changeset
115 # biocLite("Rsamtools")
888cb13321fa Uploaded
hspitia
parents:
diff changeset
116
888cb13321fa Uploaded
hspitia
parents:
diff changeset
117 ourargs = commandArgs(trailingOnly = TRUE)
888cb13321fa Uploaded
hspitia
parents:
diff changeset
118 inf = ourargs[1]
888cb13321fa Uploaded
hspitia
parents:
diff changeset
119 outf = ourargs[2]
888cb13321fa Uploaded
hspitia
parents:
diff changeset
120
888cb13321fa Uploaded
hspitia
parents:
diff changeset
121
888cb13321fa Uploaded
hspitia
parents:
diff changeset
122 library(Rsamtools)
888cb13321fa Uploaded
hspitia
parents:
diff changeset
123 library(foreach)
888cb13321fa Uploaded
hspitia
parents:
diff changeset
124
888cb13321fa Uploaded
hspitia
parents:
diff changeset
125 GetMappedReadsFreqs = function(bam.file){
888cb13321fa Uploaded
hspitia
parents:
diff changeset
126 require(Rsamtools)
888cb13321fa Uploaded
hspitia
parents:
diff changeset
127 mapped.flag = scanBamFlag(isUnmappedQuery = FALSE) # Get all mapped reads
888cb13321fa Uploaded
hspitia
parents:
diff changeset
128 mapped.param = ScanBamParam(flag = mapped.flag, what = c("rname")) # Get only the reference name to which reads were mapped
888cb13321fa Uploaded
hspitia
parents:
diff changeset
129 bam.index.file = indexBam(bam.file)
888cb13321fa Uploaded
hspitia
parents:
diff changeset
130 mapped.scan.res = scanBam(param = mapped.param, file = bam.file, index = bam.index.file)
888cb13321fa Uploaded
hspitia
parents:
diff changeset
131 mapped = as.data.frame(table(mapped.scan.res[[1]]\$rname))
888cb13321fa Uploaded
hspitia
parents:
diff changeset
132 colnames(mapped) = c("ref.sequence", "mapped.reads")
888cb13321fa Uploaded
hspitia
parents:
diff changeset
133 mapped
888cb13321fa Uploaded
hspitia
parents:
diff changeset
134 }
888cb13321fa Uploaded
hspitia
parents:
diff changeset
135
888cb13321fa Uploaded
hspitia
parents:
diff changeset
136 GetRefSeqsLengths = function(bam.file){
888cb13321fa Uploaded
hspitia
parents:
diff changeset
137 require(Rsamtools)
888cb13321fa Uploaded
hspitia
parents:
diff changeset
138 header = scanBamHeader(files = bam.file)
888cb13321fa Uploaded
hspitia
parents:
diff changeset
139 ref.seqs.data = as.data.frame(header[[1]]\$targets)
888cb13321fa Uploaded
hspitia
parents:
diff changeset
140 data.frame(length = header[[1]]\$targets, row.names = names(header[[1]]\$targets))
888cb13321fa Uploaded
hspitia
parents:
diff changeset
141 }
888cb13321fa Uploaded
hspitia
parents:
diff changeset
142
888cb13321fa Uploaded
hspitia
parents:
diff changeset
143 GetIdxStats = function(bam.file) {
888cb13321fa Uploaded
hspitia
parents:
diff changeset
144 mapped = GetMappedReadsFreqs(bam.file = bam.file)
888cb13321fa Uploaded
hspitia
parents:
diff changeset
145 ref.seqs.lengths = GetRefSeqsLengths(bam.file = bam.file)
888cb13321fa Uploaded
hspitia
parents:
diff changeset
146 merge(x = mapped, y = ref.seqs.lengths,
888cb13321fa Uploaded
hspitia
parents:
diff changeset
147 by.x = "ref.sequence", by.y = "row.names")
888cb13321fa Uploaded
hspitia
parents:
diff changeset
148 }
888cb13321fa Uploaded
hspitia
parents:
diff changeset
149
888cb13321fa Uploaded
hspitia
parents:
diff changeset
150 GetRPKMValues = function(bam.file) {
888cb13321fa Uploaded
hspitia
parents:
diff changeset
151 bam.stats = GetIdxStats(bam.file = bam.file)
888cb13321fa Uploaded
hspitia
parents:
diff changeset
152 total.mapped = sum(bam.stats\$mapped.reads)
888cb13321fa Uploaded
hspitia
parents:
diff changeset
153 mapped.factor = total.mapped / 10^9
888cb13321fa Uploaded
hspitia
parents:
diff changeset
154
888cb13321fa Uploaded
hspitia
parents:
diff changeset
155 bam.stats\$RPKM = (bam.stats\$mapped.reads / (bam.stats\$length * mapped.factor))
888cb13321fa Uploaded
hspitia
parents:
diff changeset
156 data.frame(RPKM = bam.stats\$RPKM, row.names = bam.stats\$ref.sequence)
888cb13321fa Uploaded
hspitia
parents:
diff changeset
157 }
888cb13321fa Uploaded
hspitia
parents:
diff changeset
158
888cb13321fa Uploaded
hspitia
parents:
diff changeset
159 bam.files = inf
888cb13321fa Uploaded
hspitia
parents:
diff changeset
160
888cb13321fa Uploaded
hspitia
parents:
diff changeset
161 if(any(!file.exists(bam.files))){
888cb13321fa Uploaded
hspitia
parents:
diff changeset
162 not.found = bam.files[!file.exists(bam.files)]
888cb13321fa Uploaded
hspitia
parents:
diff changeset
163 out.names = paste(not.found, collapse = "\n ")
888cb13321fa Uploaded
hspitia
parents:
diff changeset
164 stop(paste("One or more files does not exist:\n", out.names))
888cb13321fa Uploaded
hspitia
parents:
diff changeset
165 } else {
888cb13321fa Uploaded
hspitia
parents:
diff changeset
166 rpkm.data = foreach(c.bam.file = bam.files, .combine = "cbind") %do% {
888cb13321fa Uploaded
hspitia
parents:
diff changeset
167 GetRPKMValues(bam.file = c.bam.file)
888cb13321fa Uploaded
hspitia
parents:
diff changeset
168 }
888cb13321fa Uploaded
hspitia
parents:
diff changeset
169 colnames(rpkm.data) = sapply(bam.files, basename)
888cb13321fa Uploaded
hspitia
parents:
diff changeset
170 out.data = cbind(ref.sequence = rownames(rpkm.data), rpkm.data)
888cb13321fa Uploaded
hspitia
parents:
diff changeset
171 write.table(out.data, quote = FALSE, sep = "\t", row.names = FALSE, col.names = TRUE,
888cb13321fa Uploaded
hspitia
parents:
diff changeset
172 file = outf)
888cb13321fa Uploaded
hspitia
parents:
diff changeset
173 }
888cb13321fa Uploaded
hspitia
parents:
diff changeset
174
888cb13321fa Uploaded
hspitia
parents:
diff changeset
175 **Attribution**
888cb13321fa Uploaded
hspitia
parents:
diff changeset
176 This Galaxy tool was created by hfespitia@cenicana.org at 06/09/2014 23:41:54
888cb13321fa Uploaded
hspitia
parents:
diff changeset
177 using the Galaxy Tool Factory.
888cb13321fa Uploaded
hspitia
parents:
diff changeset
178
888cb13321fa Uploaded
hspitia
parents:
diff changeset
179 See https://bitbucket.org/fubar/galaxytoolfactory for details of that project
888cb13321fa Uploaded
hspitia
parents:
diff changeset
180 Please cite: Creating re-usable tools from scripts: The Galaxy Tool Factory. Ross Lazarus; Antony Kaspi; Mark Ziemann; The Galaxy Team.
888cb13321fa Uploaded
hspitia
parents:
diff changeset
181 Bioinformatics 2012; doi: 10.1093/bioinformatics/bts573
888cb13321fa Uploaded
hspitia
parents:
diff changeset
182
888cb13321fa Uploaded
hspitia
parents:
diff changeset
183
888cb13321fa Uploaded
hspitia
parents:
diff changeset
184 </help>
888cb13321fa Uploaded
hspitia
parents:
diff changeset
185 <citations>
888cb13321fa Uploaded
hspitia
parents:
diff changeset
186
888cb13321fa Uploaded
hspitia
parents:
diff changeset
187 <citation type="doi">10.1093/bioinformatics/bts573</citation>
888cb13321fa Uploaded
hspitia
parents:
diff changeset
188 </citations>
888cb13321fa Uploaded
hspitia
parents:
diff changeset
189 </tool>