Mercurial > repos > jjohnson > cistrome_ceas

diff gca.xml @ 0:4e52505adaa6
Imported from capsule None
author: jjohnson
date: Wed, 17 Sep 2014 15:03:26 -0400
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/gca.xml	Wed Sep 17 15:03:26 2014 -0400
@@ -0,0 +1,95 @@
+<tool name="GCA: Gene centered annotation" id="ceas_gca" version="0.1.0">
+  <description>Find the nearest interval in the given intervals set fo every annotated coding gene</description>
+  <macros>
+    <import>ceas_macros.xml</import>
+  </macros>
+  <expand macro="requirements" />
+  <command>
+gca -b $bfile --span=$span 
+#include source=$gtpath_ceasdb_ref#
+--name=$name &amp;> $log
+  </command>
+  <inputs>
+    <param name="name" type="hidden" value="gca_out"/>
+    <param ftype="bed" format="bed" name="bfile" type="data" label="BED file(100,000 lines max)">
+      <validator type="unspecified_build" />
+    </param>
+    <expand macro="ceasdb_ref" />
+    <param name="span" type="text" label="Span" value="3000">
+    	<validator type="in_range" max="1000000" min="100" message="Span is out of range, Span has to be between 100 to 1000000" />
+    </param>
+  </inputs>
+  <outputs>
+    <data format="xls" name="output" from_work_dir="gca_out.xls"/>
+    <data format="txt" name="log" label="GCA job log"/>
+  </outputs>
+  <expand macro="stdio" />
+  <tests>
+    <test maxseconds="3600" name="GCA_1">
+      <param name="bfile" value="peaks.bed" />
+      <param name="span" value="3000" />
+      <param name="refsrc" value="history"/>
+      <param name="gdb" ftype="ceasdb" value="mm9.refGene.ceasdb"/>
+      <output name="output">
+        <assert_contents>
+          <has_text_matching expression="NM_013495\tchr19\t3323300\t3385733\t+\t2994\t754\t31798\t224353\t0.07\t0.26\t0.12\t0.03\t0.0\t0.0\t0.0" />
+        </assert_contents>
+      </output>
+    </test>
+  </tests>
+  <help>
+This tool finds the nearest binding sites in the given BED file for
+every annotated coding gene. It's a module in CEAS package which is
+written by Hyunjin Gene Shin, published in Bioinformatics (pubmed
+id:19689956).
+
+@EXTERNAL_DOCUMENTATION@
+
+@CITATION_SECTION@
+
+.. class:: warningmark
+
+**NEED IMPROVEMENT**
+
+-----
+
+**Parameters**
+
+- **BED file** contains the transcription factor binding sites,
+  generally the BED files for peaks from peak calling tools.
+- **Span** is the span for ChIP regions.
+- **Genome Annotation Version** to specify the annotations according to
+  the data set. The annotations are downloaded from UCSC genome site.
+
+-----
+
+**Output**
+
+- **XLS file** is the tab-delimited file.
+
+-----
+
+**script parameter list of GCA**
+
+Options:
+  --version            show program's version number and exit
+  -h, --help           Show this help message and exit.
+  -b BED, --bed=BED    BED file of ChIP regions.
+  -g GDB, --gt=GDB     Gene annotation table. This can be a sqlite3 local db
+                       file, BED file or genome version of UCSC. The BED file
+                       must have an extension of '.bed'
+  --span=SPAN          Span in search of ChIP regions from TSS and TTS,
+                       DEFAULT=3000bp
+  --name=NAME          Experiment name. This will be used to name the output
+                       file. If an experiment name is not given, input BED
+                       file name will be used instead.
+  --gn-group=GN_GROUP  A particular group of genes of interest. If a txt file
+                       with one column of gene names (eg RefSeq IDs in case of
+                       using a refGene table) is given, gca returns the gene-
+                       centered annotation of this particular gene group.
+  --gname2=NAME2       The gene names of --gn-group will be regarded as
+                       'name2.' See the schema of the gene annotation table.
+
+  </help>
+
+</tool>
author	jjohnson
date	Wed, 17 Sep 2014 15:03:26 -0400
parents
children