changeset 2:740902fb1c5a draft

Uploaded
author modencode-dcc
date Fri, 18 Jan 2013 17:28:21 -0500
parents 82b54b51b6cf
children 52e7e6f6783e
files genome_tables/.DS_Store genome_tables/._.DS_Store genome_tables/genome_table.dmel.r5.32.txt genome_tables/genome_table.human.hg18.txt genome_tables/genome_table.human.hg19.txt genome_tables/genome_table.mm9.txt genome_tables/genome_table.worm.ws220.txt idrPlotWrapper.sh idrToolDef.xml
diffstat 9 files changed, 94 insertions(+), 100 deletions(-) [+]
line wrap: on
line diff
Binary file genome_tables/.DS_Store has changed
Binary file genome_tables/._.DS_Store has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_tables/genome_table.dmel.r5.32.txt	Fri Jan 18 17:28:21 2013 -0500
@@ -0,0 +1,15 @@
+YHet	347038
+dmel_mitochondrion_genome	19517
+2L	23011544
+X	22422827
+3L	24543557
+4	1351857
+2R	21146708
+3R	27905053
+Uextra	29004656
+2RHet	3288761
+2LHet	368872
+3LHet	2555491
+3RHet	2517507
+U	10049037
+XHet	204112
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_tables/genome_table.human.hg18.txt	Fri Jan 18 17:28:21 2013 -0500
@@ -0,0 +1,25 @@
+chr1 247249719
+chr2 242951149
+chr3 199501827
+chr4 191273063
+chr5 180857866
+chr6 170899992
+chr7 158821424
+chr8 146274826
+chr9 140273252
+chr10 135374737
+chr11 134452384
+chr12 132349534
+chr13 114142980
+chr14 106368585
+chr15 100338915
+chr16 88827254
+chr17 78774742
+chr18 76117153
+chr19 63811651
+chr20 62435964
+chr21 46944323
+chr22 49691432
+chrX 154913754
+chrY 57772954
+chrM 16571
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_tables/genome_table.human.hg19.txt	Fri Jan 18 17:28:21 2013 -0500
@@ -0,0 +1,25 @@
+chr1 249250621
+chr2 243199373
+chr3 198022430
+chr4 191154276
+chr5 180915260
+chr6 171115067
+chr7 159138663
+chr8 146364022
+chr9 141213431
+chr10 135534747
+chr11 135006516
+chr12 133851895
+chr13 115169878
+chr14 107349540
+chr15 102531392
+chr16 90354753
+chr17 81195210
+chr18 78077248
+chr19 59128983
+chr20 63025520
+chr21 48129895
+chr22 51304566
+chrX 155270560
+chrY 59373566
+chrM 16571
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_tables/genome_table.mm9.txt	Fri Jan 18 17:28:21 2013 -0500
@@ -0,0 +1,22 @@
+chr1 197195432
+chr2 181748087
+chr3 159599783
+chr4 155630120
+chr5 152537259
+chr6 149517037
+chr7 152524553
+chr8 131738871
+chr9 124076172
+chr10 129993255
+chr11 121843856
+chr12 121257530
+chr13 120284312
+chr14 125194864
+chr15 103494974
+chr16 98319150
+chr17 95272651
+chr18 90772031
+chr19 61342430
+chrX 166650296
+chrY 15902555
+chrM 16299
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_tables/genome_table.worm.ws220.txt	Fri Jan 18 17:28:21 2013 -0500
@@ -0,0 +1,7 @@
+I 15072423
+II 15279345
+III 13783700
+IV 17493793
+V 20924149
+X 17718866
+MtDNA 13794
\ No newline at end of file
--- a/idrPlotWrapper.sh	Fri Jan 18 17:28:12 2013 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,39 +0,0 @@
-#!/bin/bash
-
-# idrPlotWrapper.sh
-# OICR: Kar Ming Chu
-# July 2012
-
-# BASH wrapper for batch-consistency-plot.r (part of the IDR package)
-# For use with Galaxy 
-
-# Usage of batch-consistency-plot.r: Rscript batch-consistency-plot-merged.r [npairs] [output.dir] [input.file.prefix 1, 2, 3 ...]
-#	npairs - will be a constant, since Galaxy requires explicit control over input and output files
-
-# Usage of THIS SCRIPT: ./idrPlotWrapper.sh em uri outputfile
-#	em - em.sav file provided by Galaxy
-#	uri - uri.sav file provided by Galaxy
-#	outputfile - output file name specified by Galaxy
-
-main() {
-	EM="${1}" 		# absolute file path to em.sav file, provided by Galaxy
-	URI="${2}"		# absolute file parth to uri.sav file, provided by Galaxy
-	OUTFILE="${3}"		# name of desired output file
-
-	cp "${EM}" ./idrPlot-em.sav	# cp to this directory and rename so they can be found by idrPlot
-	cp "${URI}" ./idrPlot-uri.sav
-	
-	Rscript /mnt/galaxyTools/galaxy-central/tools/modENCODE_DCC_tools/idr/batch-consistency-plot.r 1 ./idrPlot idrPlot
-
-	# convert post script to pdf file
- 	ps2pdf ./idrPlot-plot.ps ./idrPlot-plot.pdf
-
-	# rename to output file name
-	mv ./idrPlot-plot.pdf "${OUTFILE}"
-
-	# clean up
-	rm idrPlot-em.sav
-	rm idrPlot-uri.sav
-}
-
-main "${@}"
--- a/idrToolDef.xml	Fri Jan 18 17:28:12 2013 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,61 +0,0 @@
-<!--
-
-Script Usage:
-Rscript batch-consistency-analysis.r
-../3066_rep1_VS_input0.macs14.out.regionPeak
-../3066_rep2_VS_input0.macs14.out.regionPeak
-1000
-3066_rep1_VS_rep2
-0
-F
-p.value
-genome_table.txt [ drop down to select ]
--->
-
-<tool id="batch_consistency_analysis_2" name="IDR">
-  <description>Consistency Analysis on a pair of narrowPeak files</description>
-  <command interpreter="Rscript">batch-consistency-analysis.r $input1 $input2 $halfwidth $overlap $option $sigvalue $gtable $rout $aboveIDR $ratio $emSav $uriSav</command>
-  <inputs>
-    <param format="narrowPeak" name="input1" type="data" label="First NarrowPeak File"/>
-    <param format="narrowPeak" name="input2" type="data" label="Second NarrowPeak File"/>
-    <param name="halfwidth" size="4" type="integer" value="1000" label="Half-Width" help="-1 if using reported peak width"/>
-<!--    <param name="outputprefix" type="text" size="50" label="Output Prefix" value="3066_rep1_VS_rep2"/> -->
-    <param name="option" type="select" label="File Type" value="F">
-      <option value="F">Narrow Peak</option>
-      <option value="T">Broad Peak</option>
-    </param>
-    <param name="overlap" size="4" type="float" value="0" label="Over-Lap Ratio" help="Between 0 and 1, inclusively" min="0" max="1"/>
-    <param name="sigvalue" type="select" label="Significant Value" value="p.value" help="Select p-value if the input peak files are generated by MAC. Select q-value if the input peak files are generated by SPP.">
-      <option value="p.value">p-value</option>
-      <option value="q.value">q-value</option>
-      <option value="signal.value">Significant Value</option>
-    </param>
-    <param name="gtable" type="select" label="Genome Table" value="/mnt/galaxyTools/galaxy-central/tools/modENCODE_DCC_tools/idr/genome_tables/genome_table.worm.ws220.txt">
-      <option value="/mnt/galaxyTools/galaxy-central/tools/modENCODE_DCC_tools/idr/genome_tables/genome_table.human.hg19.txt">human hg19</option>
-      <option value="/mnt/galaxyTools/galaxy-central/tools/modENCODE_DCC_tools/idr/genome_tables/genome_table.mm9.txt">mouse mm9</option>
-      <option value="/mnt/galaxyTools/galaxy-central/tools/modENCODE_DCC_tools/idr/genome_tables/genome_table.worm.ws220.txt">worm ws220</option>
-      <option value="/mnt/galaxyTools/galaxy-central/tools/modENCODE_DCC_tools/idr/genome_tables/genome_table.dmel.r5.32.txt">dmel r5.32</option>
-    </param>
-  </inputs>
-  <outputs>
-    <data format="txt" name="rout" label="IDR.Rout.txt"/>
-    <data format="txt" name="aboveIDR" label="IDR.npeaks-aboveIDR.txt"/>
-    <data format="txt" name="ratio" label="IDR.overlapped-peaks.txt"/>
-    <data format="txt" name="emSav" label="IDR.em.sav"/>
-    <data format="txt" name="uriSav" label="IDR.uri.sav"/>
-  </outputs>
-
-  <tests>
-    <test>
-<!--
-      <param name="input" value="fa_gc_content_input.fa"/>
-      <output name="out_file1" file="fa_gc_content_output.txt"/>
--->
-    </test>
-  </tests>
-
-  <help>
-Reproducibility is essential to reliable scientific discovery in high-throughput experiments. The IDR (Irreproducible Discovery Rate) framework is a unified approach to measure the reproducibility of findings identified from replicate experiments and provide highly stable thresholds based on reproducibility. Unlike the usual scalar measures of reproducibility, the IDR approach creates a curve, which quantitatively assesses when the findings are no longer consistent across replicates. In layman's terms, the IDR method compares a pair of ranked lists of identifications (such as ChIP-seq peaks). These ranked lists should not be pre-thresholded i.e. they should provide identifications across the entire spectrum of high confidence/enrichment (signal) and low confidence/enrichment (noise). The IDR method then fits the bivariate rank distributions over the replicates in order to separate signal from noise based on a defined confidence of rank consistency and reproducibility of identifications i.e the IDR threshold. For more information on IDR, see https://sites.google.com/site/anshulkundaje/projects/idr
-  </help>
-
-</tool>