Mercurial > repos > hspitia > rpkm_values
comparison GetRPKMvalues/GetRPKMvalues.xml @ 0:888cb13321fa draft default tip
Uploaded
| author | hspitia |
|---|---|
| date | Sun, 07 Sep 2014 02:02:23 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:888cb13321fa |
|---|---|
| 1 <tool id="GetRPKMvalues" name="GetRPKMvalues" version="0.01"> | |
| 2 <description> from BAM file</description> | |
| 3 | |
| 4 <command interpreter="python"> | |
| 5 | |
| 6 GetRPKMvalues.py --script_path "$runMe" --interpreter "Rscript" | |
| 7 --tool_name "GetRPKMvalues" --input_tab "$input1" --output_dir "./" --output_tab "$tab_file" | |
| 8 </command> | |
| 9 <inputs> | |
| 10 <param name="input1" type="data" format="bam" label="Select a suitable input file from your history"/> | |
| 11 <param name="job_name" type="text" label="Supply a name for the outputs to remind you what they contain" value="GetRPKMvalues"/> | |
| 12 | |
| 13 </inputs> | |
| 14 <outputs> | |
| 15 <data format="tabular" name="tab_file" label="${job_name}"/> | |
| 16 | |
| 17 </outputs> | |
| 18 <configfiles> | |
| 19 <configfile name="runMe"> | |
| 20 #!/usr/bin/Rscript | |
| 21 | |
| 22 # source("http://bioconductor.org/biocLite.R") | |
| 23 # biocLite("Rsamtools") | |
| 24 | |
| 25 ourargs = commandArgs(trailingOnly = TRUE) | |
| 26 inf = ourargs[1] | |
| 27 outf = ourargs[2] | |
| 28 | |
| 29 | |
| 30 library(Rsamtools) | |
| 31 library(foreach) | |
| 32 | |
| 33 GetMappedReadsFreqs = function(bam.file){ | |
| 34 require(Rsamtools) | |
| 35 mapped.flag = scanBamFlag(isUnmappedQuery = FALSE) # Get all mapped reads | |
| 36 mapped.param = ScanBamParam(flag = mapped.flag, what = c("rname")) # Get only the reference name to which reads were mapped | |
| 37 bam.index.file = indexBam(bam.file) | |
| 38 mapped.scan.res = scanBam(param = mapped.param, file = bam.file, index = bam.index.file) | |
| 39 mapped = as.data.frame(table(mapped.scan.res[[1]]\$rname)) | |
| 40 colnames(mapped) = c("ref.sequence", "mapped.reads") | |
| 41 mapped | |
| 42 } | |
| 43 | |
| 44 GetRefSeqsLengths = function(bam.file){ | |
| 45 require(Rsamtools) | |
| 46 header = scanBamHeader(files = bam.file) | |
| 47 ref.seqs.data = as.data.frame(header[[1]]\$targets) | |
| 48 data.frame(length = header[[1]]\$targets, row.names = names(header[[1]]\$targets)) | |
| 49 } | |
| 50 | |
| 51 GetIdxStats = function(bam.file) { | |
| 52 mapped = GetMappedReadsFreqs(bam.file = bam.file) | |
| 53 ref.seqs.lengths = GetRefSeqsLengths(bam.file = bam.file) | |
| 54 merge(x = mapped, y = ref.seqs.lengths, | |
| 55 by.x = "ref.sequence", by.y = "row.names") | |
| 56 } | |
| 57 | |
| 58 GetRPKMValues = function(bam.file) { | |
| 59 bam.stats = GetIdxStats(bam.file = bam.file) | |
| 60 total.mapped = sum(bam.stats\$mapped.reads) | |
| 61 mapped.factor = total.mapped / 10^9 | |
| 62 | |
| 63 bam.stats\$RPKM = (bam.stats\$mapped.reads / (bam.stats\$length * mapped.factor)) | |
| 64 data.frame(RPKM = bam.stats\$RPKM, row.names = bam.stats\$ref.sequence) | |
| 65 } | |
| 66 | |
| 67 bam.files = inf | |
| 68 | |
| 69 if(any(!file.exists(bam.files))){ | |
| 70 not.found = bam.files[!file.exists(bam.files)] | |
| 71 out.names = paste(not.found, collapse = "\n ") | |
| 72 stop(paste("One or more files does not exist:\n", out.names)) | |
| 73 } else { | |
| 74 rpkm.data = foreach(c.bam.file = bam.files, .combine = "cbind") %do% { | |
| 75 GetRPKMValues(bam.file = c.bam.file) | |
| 76 } | |
| 77 colnames(rpkm.data) = sapply(bam.files, basename) | |
| 78 out.data = cbind(ref.sequence = rownames(rpkm.data), rpkm.data) | |
| 79 write.table(out.data, quote = FALSE, sep = "\t", row.names = FALSE, col.names = TRUE, | |
| 80 file = outf) | |
| 81 } | |
| 82 </configfile> | |
| 83 </configfiles> | |
| 84 | |
| 85 | |
| 86 <tests> | |
| 87 <test> | |
| 88 <param name="input1" value="GetRPKMvalues_test1_input.xls" ftype="bam"/> | |
| 89 <param name="job_name" value="test1"/> | |
| 90 <param name="runMe" value="$runMe"/> | |
| 91 <output name="tab_file" file="GetRPKMvalues_test1_output.xls" ftype="tabular"/> | |
| 92 </test> | |
| 93 </tests> | |
| 94 | |
| 95 | |
| 96 <help> | |
| 97 | |
| 98 | |
| 99 This tool computes the RPKM values from a given BAM file. | |
| 100 | |
| 101 This tool is a R script and requires Rsamtools and foreach packages. | |
| 102 | |
| 103 The output is a tabular file with two columns: | |
| 104 1. ref.sequence: which contains the names of reference sequences (genes/transcripts), and | |
| 105 | |
| 106 2. dataset_#.dat: which referes to the name of input dataset (BAM file specified) and contains the RPKM values for each reference sequence. The symbol # represents a consecutive number automatically assigned by Galaxy. | |
| 107 | |
| 108 **Script** | |
| 109 Pressing execute will run the following code over your input file and generate some outputs in your history:: | |
| 110 | |
| 111 | |
| 112 #!/usr/bin/Rscript | |
| 113 | |
| 114 # source("http://bioconductor.org/biocLite.R") | |
| 115 # biocLite("Rsamtools") | |
| 116 | |
| 117 ourargs = commandArgs(trailingOnly = TRUE) | |
| 118 inf = ourargs[1] | |
| 119 outf = ourargs[2] | |
| 120 | |
| 121 | |
| 122 library(Rsamtools) | |
| 123 library(foreach) | |
| 124 | |
| 125 GetMappedReadsFreqs = function(bam.file){ | |
| 126 require(Rsamtools) | |
| 127 mapped.flag = scanBamFlag(isUnmappedQuery = FALSE) # Get all mapped reads | |
| 128 mapped.param = ScanBamParam(flag = mapped.flag, what = c("rname")) # Get only the reference name to which reads were mapped | |
| 129 bam.index.file = indexBam(bam.file) | |
| 130 mapped.scan.res = scanBam(param = mapped.param, file = bam.file, index = bam.index.file) | |
| 131 mapped = as.data.frame(table(mapped.scan.res[[1]]\$rname)) | |
| 132 colnames(mapped) = c("ref.sequence", "mapped.reads") | |
| 133 mapped | |
| 134 } | |
| 135 | |
| 136 GetRefSeqsLengths = function(bam.file){ | |
| 137 require(Rsamtools) | |
| 138 header = scanBamHeader(files = bam.file) | |
| 139 ref.seqs.data = as.data.frame(header[[1]]\$targets) | |
| 140 data.frame(length = header[[1]]\$targets, row.names = names(header[[1]]\$targets)) | |
| 141 } | |
| 142 | |
| 143 GetIdxStats = function(bam.file) { | |
| 144 mapped = GetMappedReadsFreqs(bam.file = bam.file) | |
| 145 ref.seqs.lengths = GetRefSeqsLengths(bam.file = bam.file) | |
| 146 merge(x = mapped, y = ref.seqs.lengths, | |
| 147 by.x = "ref.sequence", by.y = "row.names") | |
| 148 } | |
| 149 | |
| 150 GetRPKMValues = function(bam.file) { | |
| 151 bam.stats = GetIdxStats(bam.file = bam.file) | |
| 152 total.mapped = sum(bam.stats\$mapped.reads) | |
| 153 mapped.factor = total.mapped / 10^9 | |
| 154 | |
| 155 bam.stats\$RPKM = (bam.stats\$mapped.reads / (bam.stats\$length * mapped.factor)) | |
| 156 data.frame(RPKM = bam.stats\$RPKM, row.names = bam.stats\$ref.sequence) | |
| 157 } | |
| 158 | |
| 159 bam.files = inf | |
| 160 | |
| 161 if(any(!file.exists(bam.files))){ | |
| 162 not.found = bam.files[!file.exists(bam.files)] | |
| 163 out.names = paste(not.found, collapse = "\n ") | |
| 164 stop(paste("One or more files does not exist:\n", out.names)) | |
| 165 } else { | |
| 166 rpkm.data = foreach(c.bam.file = bam.files, .combine = "cbind") %do% { | |
| 167 GetRPKMValues(bam.file = c.bam.file) | |
| 168 } | |
| 169 colnames(rpkm.data) = sapply(bam.files, basename) | |
| 170 out.data = cbind(ref.sequence = rownames(rpkm.data), rpkm.data) | |
| 171 write.table(out.data, quote = FALSE, sep = "\t", row.names = FALSE, col.names = TRUE, | |
| 172 file = outf) | |
| 173 } | |
| 174 | |
| 175 **Attribution** | |
| 176 This Galaxy tool was created by hfespitia@cenicana.org at 06/09/2014 23:41:54 | |
| 177 using the Galaxy Tool Factory. | |
| 178 | |
| 179 See https://bitbucket.org/fubar/galaxytoolfactory for details of that project | |
| 180 Please cite: Creating re-usable tools from scripts: The Galaxy Tool Factory. Ross Lazarus; Antony Kaspi; Mark Ziemann; The Galaxy Team. | |
| 181 Bioinformatics 2012; doi: 10.1093/bioinformatics/bts573 | |
| 182 | |
| 183 | |
| 184 </help> | |
| 185 <citations> | |
| 186 | |
| 187 <citation type="doi">10.1093/bioinformatics/bts573</citation> | |
| 188 </citations> | |
| 189 </tool> |
