comparison beta_basic.xml @ 0:20453b656907

Imported from capsule None
author jjohnson
date Tue, 16 Sep 2014 13:35:24 -0400
parents
children 9c5241259454
comparison
equal deleted inserted replaced
-1:000000000000 0:20453b656907
1 <tool id="beta_basic" name="BETA-basic: Binding and Expression Target Analysis" version="0.1.0">
2 <description>Predict the factors (TFs or CRs) direct target genes by combining the binding and expression data</description>
3 <macros>
4 <import>beta_macros.xml</import>
5 </macros>
6 <expand macro="requirements" />
7 <command>
8 BETA basic
9 #include source=$common_opts#
10 #include source=$genome_opts#
11 #include source=$extended_opts#
12 &amp;> $log
13 </command>
14 <inputs>
15 <expand macro="common_params" />
16 <expand macro="genome_params" />
17 <expand macro="extended_params" />
18 </inputs>
19 <expand macro="stdio" />
20 <outputs>
21 <data format="txt" name="log" label="Log of BETA basic"/>
22 <data format="pdf" name="functionoutput" label="BETA functional prediction on ${peakfile.name}" from_work_dir="BETA_OUTPUT/NA_function_prediction.pdf"/>
23 <data format="tabular" name="uptargetsoutput" label="BETA direct targets prediction on up regulated genes" from_work_dir="BETA_OUTPUT/NA_uptarget.txt"/>
24 <data format="tabular" name="downtargetsoutput" label="BETA direct targets prediction on down regulated genes" from_work_dir="BETA_OUTPUT/NA_downtarget.txt"/>
25 <data format="bed" name="uptargetpeaks" label="BETA Uptarget associated peaks" from_work_dir="BETA_OUTPUT/NA_uptarget_associate_peaks.bed"/>
26 <data format="bed" name="downtargetpeaks" label="BETA Downtarget associated peaks" from_work_dir="BETA_OUTPUT/NA_downtarget_associate_peaks.bed"/>
27 </outputs>
28 <tests>
29 <test>
30 <param name='peakfile' value="peaks.bed" ftype="bed" dbkey="hg19"/>
31 <param name="distance" value="100000"/>
32 <param name="peaknumber" value="10000"/>
33 <param name="genomeName" value="hg19"/>
34 <param name='exprefile' value="diff_expr.xls" ftype="tabular" dbkey="hg19"/>
35 <param name="kind" value="LIM"/>
36 <param name="expreinfo" type="text" value="2,5,7"/>
37 <param name="gname2" value="Refseq"/>
38 <param name="diff_fdr" value="1.0"/>
39 <param name="diff_amount" value="0.5"/>
40 <param name="method" value="score"/>
41 <output name="log">
42 <assert_contents>
43 <has_text_matching expression="Finished" />
44 </assert_contents>
45 </output>
46 <output name="targetsoutput">
47 <assert_contents>
48 <has_text_matching expression="chr19\t4675243\t4723855\tNM_139159\t1.1.*\t-\tDPP" />
49 </assert_contents>
50 </output>
51 <output name="targetpeaks">
52 <assert_contents>
53 <has_text_matching expression="chr19\t4723422\t4724314\tregion_9\tNM_139159\tDPP9\t13\t0.6.*" />
54 </assert_contents>
55 </output>
56 </test>
57 </tests>
58 <help>
59 ** BETA basic **
60
61 @EXTERNAL_DOCUMENTATION@
62
63 @CITATION_SECTION@
64
65 This tool annotates the given intervals and scores with genome
66 features such as gene body. It's the major module in CEAS package
67 which is written by Hyunjin Gene Shin, published in Bioinformatics
68 (pubmed id:19689956).
69
70 .. class:: warningmark
71
72 **NEED IMPROVEMENT**
73
74 -----
75
76 **Parameters**
77
78 - **PEAKFILE file** contains peaks for the experiment in a bed
79 format file. Normally, it's produced by the peak calling tool. It's
80 required.
81 - **EXPREFILE file** contains the differentially expressed genes in a tab
82 delimited text file. It's required.
83 - **Kind** The kind of your expression file format, LIM for LIMMA standard
84 output with Microarray, CUF for Cuffdiffs standard output with RNA-seq,
85 BSF for BETA specific format, and O for other formats.
86 - **genome** hg19 for human and mm9 for mouse. Others, don't set this parameter.
87 - **gname2** If this switch is on, gene or transcript IDs in files given
88 through -e will be considered as official gene symbols, DEFAULT=FALSE
89 - **EXPREINFO** is the columns info of the geneID, up/down status and statistcal
90 values column of your expression data,NOTE: use a comma as an connector.
91 for example: 2,5,7 means geneID in the 2nd column, Tscore in 5th column
92 and FDR in 7 column.
93 - **REFERENCE** is the refgene info file downloaded from UCSC genome browser.
94 It is a tab delimited text file with gene annotation with refseq and gene symbol.
95 Input this file only if your genome is neither hg19 nor mm9.
96 profiling
97 - **OUTPUT** to specify the output files directory
98 - **bl** Whether or not to use CTCF boundary file to get the contributed peaks
99 - **BOUNDARYFILE** is the file with reasonable boundaries if --bl is on and genome
100 is neither hg19 nor mm9.
101 - **NAME** specify the name of the output files.
102 - **DISTANCE** specify the distance wich peaks within it will be considered.
103 - **DIFF_FDR** specify the differential genes by the 3rd column in file input
104 via -e, genes with less than this value will be considered as the differentially
105 changed genes.
106 - **DIFF_AMOUNT** specify the differential genes the top #(DIFF_AMOUNT) ranked by
107 the 3rd column in file input via -e, genes ranked in the top # will be considered
108 as the differentially expressed genes.
109 - **CUTOFF** specify a cutoff of ks-test in the function prediction part
110
111 -----
112
113 **Script parameter list of BETA basic**
114
115 ::
116
117 -h, --help show this help message and exit
118 -p PEAKFILE, --peakfile PEAKFILE
119 The bed format of peaks binding sites. (BETA support 3
120 or 5 columns bed format, CHROM, START, END (NAME,
121 SCORE))
122 -e EXPREFILE, --diff_expr EXPREFILE
123 The differential expression file get from limma for
124 MicroArray ddata and cuffdiff for RNAseq data
125 -k {LIM,CUF,BSF,O}, --kind {LIM,CUF,BSF,O}
126 The kind of your expression file,this is required,it
127 can be LIM, CUF, BSF, O. LIM for LIMMA standard
128 format. CUF for CUFDIFF standard format, BSF for BETA
129 specific format and O for other formats, if is 'O',
130 columns infor required via --info
131 -g {hg19,mm9}, --genome {hg19,mm9}
132 Specify your species, hg19, mm9. For other genome
133 assembily versions of human and mouse or other
134 species, ignore this parameter.
135 --gname2 If this switch is on, gene or transcript IDs in files
136 given through -e will be considered as official gene
137 symbols, DEFAULT=FALSE
138 --info EXPREINFO Specify the geneID, up/down status and statistcal
139 values column of your expression data,NOTE: use a
140 comma as an connector. for example: 2,5,7 means geneID
141 in the 2nd column, Tscore in 5th column and FDR in 7
142 column DEFAULT:2,5,7 for LIMMA; 2,10,13 for Cuffdiff
143 and 1,2,3 for BETA specific format
144 -r REFERENCE, --reference REFERENCE
145 The refgene info file downloaded from UCSC genome
146 browser.input this file only if your genome is neither
147 hg19 nor mm9
148 -o OUTPUT, --output OUTPUT
149 The directory to store all the output files, if you
150 don't set this, files will be output into the current
151 directory
152 --bl Whether or not use CTCF boundary to filter peaks
153 around a gene, DEFAULT=FALSE
154 --bf BOUNDARYFILE CTCF conserved peaks bed file, use this only when you
155 set --bl and the genome is neither hg19 nor mm9
156 --pn PEAKNUMBER The number of peaks you want to consider,
157 DEFAULT=10000
158 --method {score,distance}
159 Define the method to do the TF/CR function prediction,
160 score for regulatory potential, distance for the
161 distance to the proximal binding peak. DEFAULT:SCORE
162 -n NAME, --name NAME This argument is used to name the result file.If not
163 set, the peakfile name will be used instead
164 -d DISTANCE, --distance DISTANCE
165 Set a number which unit is 'base'. It will get peaks
166 within this distance from gene TSS. default:100000
167 (100kb)
168 --df DIFF_FDR Input a number 0~1 as a threshold to pick out the most
169 significant differential expressed genes by FDR,
170 DEFAULT = 1, that is select all the genes
171 --da DIFF_AMOUNT Get the most significant differential expressed genes
172 by the percentage(0-1) or number(larger than 1)Input a
173 number between 0-1, the rank based on fdr for example,
174 2000, so that the script will only consider top 2000
175 genes as the differentially expressed genes. DEFAULT =
176 0.5, that is select top 50 percent genes of up and
177 down seprately. NOTE: if you want to use diff_fdr,
178 please set this parameter to 1, otherwise it will get
179 the intersection of these two parameters
180 -c CUTOFF, --cutoff CUTOFF
181 Input a number between 0~1 as a threshold to select
182 the closer target gene list(up regulate or down
183 regulate or both) with the p value was called by one
184 side ks-test, DEFAULT = 0.001
185
186 </help>
187
188 </tool>