comparison beta_plus.xml @ 0:20453b656907

Imported from capsule None
author jjohnson
date Tue, 16 Sep 2014 13:35:24 -0400
parents
children 9c5241259454
comparison
equal deleted inserted replaced
-1:000000000000 0:20453b656907
1 <tool id="beta_plus" name="BETA-plus: Binding and Expression Target prediction and motif analysis" version="0.1.0">
2 <description>Predict the factors (TFs or CRs) direct target genes by combining the binding and expression data, then do motif analysis on target regions</description>
3 <macros>
4 <import>beta_macros.xml</import>
5 </macros>
6 <expand macro="requirements" />
7 <command>
8 BETA plus
9 #include source=$common_opts#
10 #include source=$genome_opts#
11 #include source=$ref_genome_seq_opts#
12 #include source=$extended_opts#
13 --mn $motifs
14 &amp;> $log &amp;&amp;
15 mkdir -p $motifresult.extra_files_path &amp;&amp;
16 cp BETA_OUTPUT/motifresult/betamotif.html $motifresult &amp;&amp;
17 cp BETA_OUTPUT/motifresult/*.js $motifresult.extra_files_path &amp;&amp;
18 cp BETA_OUTPUT/motifresult/*.css $motifresult.extra_files_path &amp;&amp;
19 cp -r BETA_OUTPUT/motifresult/img $motifresult.extra_files_path
20
21 </command>
22 <inputs>
23 <expand macro="common_params" />
24 <expand macro="genome_params" />
25 <expand macro="refGenomeSourceConditional" />
26 <expand macro="extended_params" />
27 <param name="motifs" type="float" value="10" optional="true" label="Motifs to retrieve"
28 help="a number between 0 and 1 as the p-value cutoff or an integer larger than 1 as the number of motifs">
29 <validator type="in_range" max="20000" min="0" message="A float between 0 and 1 or an integer greater than 1" />
30 </param>
31 </inputs>
32 <expand macro="stdio" />
33 <outputs>
34 <data format="txt" name="log" label="Log of BETA plus"/>
35 <data format="pdf" name="functionoutput" label="BETA functional prediction on ${peakfile.name}" from_work_dir="BETA_OUTPUT/NA_function_prediction.pdf"/>
36 <data format="tabular" name="uptargetsoutput" label="BETA direct targets prediction on up regulated genes" from_work_dir="BETA_OUTPUT/NA_uptarget.txt"/>
37 <data format="tabular" name="downtargetsoutput" label="BETA direct targets prediction on down regulated genes" from_work_dir="BETA_OUTPUT/NA_downtarget.txt"/>
38 <data format="bed" name="uptargetpeaks" label="BETA Uptarget associated peaks" from_work_dir="BETA_OUTPUT/NA_uptarget_associate_peaks.bed"/>
39 <data format="bed" name="downtargetpeaks" label="BETA Downtarget associated peaks" from_work_dir="BETA_OUTPUT/NA_downtarget_associate_peaks.bed"/>
40 <data format="txt" name="upmotifs" label="BETA Motifs in up-target regions" from_work_dir="BETA_OUTPUT/motifresult/UP_MOTIFS.txt" />
41 <data format="txt" name="up_non_motifs" label="BETA Motifs in up-target regions versus non-target regions" from_work_dir="BETA_OUTPUT/motifresult/UP_NON_MOTIFS.txt" />
42 <data format="txt" name="downmotifs" label="BETA Motifs in down-target regions" from_work_dir="BETA_OUTPUT/motifresult/DOWN_MOTIFS.txt" />
43 <data format="txt" name="down_non_motifs" label="BETA Motifs in down-target regions versus non-target regions" from_work_dir="BETA_OUTPUT/motifresult/DOWN_NON_MOTIFS.txt" />
44 <data format="txt" name="differentialmotifs" label="BETA Motifs up-target regions versus down-target regions" from_work_dir="BETA_OUTPUT/motifresult/DIFFERENTIAL_MOTIF_UP_DOWN.txt" />
45 <data format="html" name="motifresult" label="BETA Motif analysis on target regions"/>
46 </outputs>
47 <tests>
48 <test>
49 <param name='peakfile' value="peaks.bed" ftype="bed" dbkey="hg19"/>
50 <param name="distance" value="100000"/>
51 <param name="peaknumber" value="10000"/>
52 <param name="genomeName" value="hg19"/>
53 <param name='exprefile' value="diff_expr.xls" ftype="tabular" dbkey="hg19"/>
54 <param name="kind" value="LIM"/>
55 <param name="expreinfo" type="text" value="2,5,7"/>
56 <param name="gname2" value="Refseq"/>
57 <param name="diff_fdr" value="1.0"/>
58 <param name="diff_amount" value="0.5"/>
59 <param name="method" value="score"/>
60 <output name="log">
61 <assert_contents>
62 <has_text_matching expression="Finished" />
63 </assert_contents>
64 </output>
65 <output name="uptargetsoutput">
66 <assert_contents>
67 <has_text_matching expression="NM_001002231" />
68 </assert_contents>
69 </output>
70 <output name="downtargetsoutput">
71 <assert_contents>
72 <has_text_matching expression="NM_001280" />
73 </assert_contents>
74 </output>
75 <output name="differentialmotifs">
76 <assert_contents>
77 <has_text_matching expression="CDX1\tHomeodomain Family" />
78 </assert_contents>
79 </output>
80 </test>
81 </tests>
82 <help>
83 ** BETA plus **
84
85 @EXTERNAL_DOCUMENTATION@
86
87 @CITATION_SECTION@
88
89 This tool annotates the given intervals and scores with genome
90 features such as gene body.
91 Predicts Direct targets of TF and the active/repressive function
92 prediction. Does motif analysis at targets region as well.
93 It's the major module in CEAS package
94 which is written by Hyunjin Gene Shin, published in Bioinformatics
95 (pubmed id:19689956).
96
97 .. class:: warningmark
98
99 **NEED IMPROVEMENT**
100
101 -----
102
103 **Parameters**
104
105 - **PEAKFILE file** contains peaks for the experiment in a bed
106 format file. Normally, it's produced by the peak calling tool. It's
107 required.
108 - **EXPREFILE file** contains the differentially expressed genes in a tab
109 delimited text file. It's required.
110 - **Kind** The kind of your expression file format, LIM for LIMMA standard
111 output with Microarray, CUF for Cuffdiffs standard output with RNA-seq,
112 BSF for BETA specific format, and O for other formats.
113 - **genome** hg19 for human and mm9 for mouse. Others, don't set this parameter.
114 - **genomereference** Genome reference data with fasta format
115 - **gname2** If this switch is on, gene or transcript IDs in files given
116 through -e will be considered as official gene symbols, DEFAULT=FALSE
117 - **EXPREINFO** is the columns info of the geneID, up/down status and statistcal
118 values column of your expression data,NOTE: use a comma as an connector.
119 for example: 2,5,7 means geneID in the 2nd column, Tscore in 5th column
120 and FDR in 7 column.
121 - **REFERENCE** is the refgene info file downloaded from UCSC genome browser.
122 It is a tab delimited text file with gene annotation with refseq and gene symbol.
123 Input this file only if your genome is neither hg19 nor mm9.
124 profiling
125 - **OUTPUT** to specify the output files directory
126 - **bl** Whether or not to use CTCF boundary file to get the contributed peaks
127 - **BOUNDARYFILE** is the file with reasonable boundaries if --bl is on and genome
128 is neither hg19 nor mm9.
129 - **NAME** specify the name of the output files.
130 - **DISTANCE** specify the distance wich peaks within it will be considered.
131 - **DIFF_FDR** specify the differential genes by the 3rd column in file input
132 via -e, genes with less than this value will be considered as the differentially
133 changed genes.
134 - **DIFF_AMOUNT** specify the differential genes the top #(DIFF_AMOUNT) ranked by
135 the 3rd column in file input via -e, genes ranked in the top # will be considered
136 as the differentially expressed genes.
137 - **CUTOFF** specify a cutoff of ks-test in the function prediction part
138
139
140 -----
141
142 **Script parameter list of BETA plus**
143
144 ::
145
146 -h, --help show this help message and exit
147 -p PEAKFILE, --peakfile PEAKFILE
148 The bed format of peaks binding sites. (BETA support 3
149 or 5 columns bed format, CHROM, START, END (NAME,
150 SCORE))
151 -e EXPREFILE, --diff_expr EXPREFILE
152 The differential expression file get from limma for
153 MicroArray ddata and cuffdiff for RNAseq data
154 -k {LIM,CUF,BSF,O}, --kind {LIM,CUF,BSF,O}
155 The kind of your expression file,this is required,it
156 can be LIM, CUF, BSF, O. LIM for LIMMA standard
157 format. CUF for CUFDIFF standard format, BSF for BETA
158 specific format and O for other formats, if is 'O',
159 columns infor required via --info
160 -g {hg19,mm9}, --genome {hg19,mm9}
161 Specify your species, hg19, mm9
162 --gs GENOMEREFERNCE GenomeReference file with fasta format
163 --gname2 If this switch is on, gene or transcript IDs in files
164 given through -e will be considered as official gene
165 symbols, DEFAULT=FALSE
166 --info EXPREINFO Specify the geneID, up/down status and statistcal
167 values column of your expression data,NOTE: use a
168 comma as an connector. for example: 2,5,7 means geneID
169 in the 2nd column, Tscore in 5th column and FDR in 7
170 column DEFAULT:2,5,7 for LIMMA; 2,10,13 for Cuffdiff
171 and 1,2,3 for BETA specific format
172 -r REFERENCE, --reference REFERENCE
173 The refgene info file downloaded from UCSC genome
174 browser.input this file only if your genome is neither
175 hg19 nor mm9
176 -o OUTPUT, --output OUTPUT
177 The directory to store all the output files, if you
178 don't set this, files will be output into the current
179 directory
180 --bl Whether or not use CTCF boundary to filter peaks
181 around a gene, DEFAULT=FALSE
182 --bf BOUNDARYFILE CTCF conserved peaks bed file, use this only when you
183 set --bl and the genome is neither hg19 nor mm9
184 --pn PEAKNUMBER The number of peaks you want to consider,
185 DEFAULT=10000
186 --method {score,distance}
187 Define the method to do the TF/CR function prediction,
188 score for regulatory potential, distance for the
189 distance to the proximal binding peak. DEFAULT:SCORE
190 -n NAME, --name NAME This argument is used to name the result file.If not
191 set, the peakfile name will be used instead
192 -d DISTANCE, --distance DISTANCE
193 Set a number which unit is 'base'. It will get peaks
194 within this distance from gene TSS. default:100000
195 (100kb)
196 --df DIFF_FDR Input a number 0~1 as a threshold to pick out the most
197 significant differential expressed genes by FDR,
198 DEFAULT = 1, that is select all the genes
199 --da DIFF_AMOUNT Get the most significant differential expressed genes
200 by the percentage(0-1) or number(larger than 1)Input a
201 number between 0-1, the rank based on fdr for example,
202 2000, so that the script will only consider top 2000
203 genes as the differentially expressed genes. DEFAULT =
204 0.5, that is select top 50 percent genes of up and
205 down seprately. NOTE: if you want to use diff_fdr,
206 please set this parameter to 1, otherwise it will get
207 the intersection of these two parameters
208 -c CUTOFF, --cutoff CUTOFF
209 Input a number between 0~1 as a threshold to select
210 the closer target gene list(up regulate or down
211 regulate or both) with the p value was called by one
212 side ks-test, DEFAULT = 0.001
213
214 </help>
215 </tool>