annotate beta_basic.xml @ 0:20453b656907

Imported from capsule None
author jjohnson
date Tue, 16 Sep 2014 13:35:24 -0400
parents
children 9c5241259454
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
1 <tool id="beta_basic" name="BETA-basic: Binding and Expression Target Analysis" version="0.1.0">
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
2 <description>Predict the factors (TFs or CRs) direct target genes by combining the binding and expression data</description>
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
3 <macros>
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
4 <import>beta_macros.xml</import>
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
5 </macros>
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
6 <expand macro="requirements" />
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
7 <command>
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
8 BETA basic
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
9 #include source=$common_opts#
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
10 #include source=$genome_opts#
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
11 #include source=$extended_opts#
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
12 &amp;> $log
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
13 </command>
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
14 <inputs>
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
15 <expand macro="common_params" />
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
16 <expand macro="genome_params" />
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
17 <expand macro="extended_params" />
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
18 </inputs>
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
19 <expand macro="stdio" />
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
20 <outputs>
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
21 <data format="txt" name="log" label="Log of BETA basic"/>
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
22 <data format="pdf" name="functionoutput" label="BETA functional prediction on ${peakfile.name}" from_work_dir="BETA_OUTPUT/NA_function_prediction.pdf"/>
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
23 <data format="tabular" name="uptargetsoutput" label="BETA direct targets prediction on up regulated genes" from_work_dir="BETA_OUTPUT/NA_uptarget.txt"/>
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
24 <data format="tabular" name="downtargetsoutput" label="BETA direct targets prediction on down regulated genes" from_work_dir="BETA_OUTPUT/NA_downtarget.txt"/>
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
25 <data format="bed" name="uptargetpeaks" label="BETA Uptarget associated peaks" from_work_dir="BETA_OUTPUT/NA_uptarget_associate_peaks.bed"/>
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
26 <data format="bed" name="downtargetpeaks" label="BETA Downtarget associated peaks" from_work_dir="BETA_OUTPUT/NA_downtarget_associate_peaks.bed"/>
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
27 </outputs>
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
28 <tests>
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
29 <test>
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
30 <param name='peakfile' value="peaks.bed" ftype="bed" dbkey="hg19"/>
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
31 <param name="distance" value="100000"/>
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
32 <param name="peaknumber" value="10000"/>
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
33 <param name="genomeName" value="hg19"/>
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
34 <param name='exprefile' value="diff_expr.xls" ftype="tabular" dbkey="hg19"/>
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
35 <param name="kind" value="LIM"/>
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
36 <param name="expreinfo" type="text" value="2,5,7"/>
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
37 <param name="gname2" value="Refseq"/>
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
38 <param name="diff_fdr" value="1.0"/>
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
39 <param name="diff_amount" value="0.5"/>
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
40 <param name="method" value="score"/>
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
41 <output name="log">
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
42 <assert_contents>
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
43 <has_text_matching expression="Finished" />
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
44 </assert_contents>
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
45 </output>
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
46 <output name="targetsoutput">
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
47 <assert_contents>
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
48 <has_text_matching expression="chr19\t4675243\t4723855\tNM_139159\t1.1.*\t-\tDPP" />
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
49 </assert_contents>
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
50 </output>
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
51 <output name="targetpeaks">
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
52 <assert_contents>
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
53 <has_text_matching expression="chr19\t4723422\t4724314\tregion_9\tNM_139159\tDPP9\t13\t0.6.*" />
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
54 </assert_contents>
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
55 </output>
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
56 </test>
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
57 </tests>
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
58 <help>
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
59 ** BETA basic **
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
60
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
61 @EXTERNAL_DOCUMENTATION@
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
62
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
63 @CITATION_SECTION@
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
64
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
65 This tool annotates the given intervals and scores with genome
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
66 features such as gene body. It's the major module in CEAS package
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
67 which is written by Hyunjin Gene Shin, published in Bioinformatics
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
68 (pubmed id:19689956).
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
69
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
70 .. class:: warningmark
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
71
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
72 **NEED IMPROVEMENT**
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
73
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
74 -----
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
75
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
76 **Parameters**
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
77
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
78 - **PEAKFILE file** contains peaks for the experiment in a bed
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
79 format file. Normally, it's produced by the peak calling tool. It's
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
80 required.
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
81 - **EXPREFILE file** contains the differentially expressed genes in a tab
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
82 delimited text file. It's required.
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
83 - **Kind** The kind of your expression file format, LIM for LIMMA standard
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
84 output with Microarray, CUF for Cuffdiffs standard output with RNA-seq,
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
85 BSF for BETA specific format, and O for other formats.
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
86 - **genome** hg19 for human and mm9 for mouse. Others, don't set this parameter.
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
87 - **gname2** If this switch is on, gene or transcript IDs in files given
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
88 through -e will be considered as official gene symbols, DEFAULT=FALSE
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
89 - **EXPREINFO** is the columns info of the geneID, up/down status and statistcal
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
90 values column of your expression data,NOTE: use a comma as an connector.
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
91 for example: 2,5,7 means geneID in the 2nd column, Tscore in 5th column
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
92 and FDR in 7 column.
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
93 - **REFERENCE** is the refgene info file downloaded from UCSC genome browser.
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
94 It is a tab delimited text file with gene annotation with refseq and gene symbol.
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
95 Input this file only if your genome is neither hg19 nor mm9.
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
96 profiling
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
97 - **OUTPUT** to specify the output files directory
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
98 - **bl** Whether or not to use CTCF boundary file to get the contributed peaks
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
99 - **BOUNDARYFILE** is the file with reasonable boundaries if --bl is on and genome
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
100 is neither hg19 nor mm9.
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
101 - **NAME** specify the name of the output files.
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
102 - **DISTANCE** specify the distance wich peaks within it will be considered.
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
103 - **DIFF_FDR** specify the differential genes by the 3rd column in file input
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
104 via -e, genes with less than this value will be considered as the differentially
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
105 changed genes.
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
106 - **DIFF_AMOUNT** specify the differential genes the top #(DIFF_AMOUNT) ranked by
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
107 the 3rd column in file input via -e, genes ranked in the top # will be considered
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
108 as the differentially expressed genes.
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
109 - **CUTOFF** specify a cutoff of ks-test in the function prediction part
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
110
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
111 -----
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
112
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
113 **Script parameter list of BETA basic**
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
114
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
115 ::
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
116
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
117 -h, --help show this help message and exit
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
118 -p PEAKFILE, --peakfile PEAKFILE
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
119 The bed format of peaks binding sites. (BETA support 3
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
120 or 5 columns bed format, CHROM, START, END (NAME,
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
121 SCORE))
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
122 -e EXPREFILE, --diff_expr EXPREFILE
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
123 The differential expression file get from limma for
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
124 MicroArray ddata and cuffdiff for RNAseq data
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
125 -k {LIM,CUF,BSF,O}, --kind {LIM,CUF,BSF,O}
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
126 The kind of your expression file,this is required,it
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
127 can be LIM, CUF, BSF, O. LIM for LIMMA standard
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
128 format. CUF for CUFDIFF standard format, BSF for BETA
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
129 specific format and O for other formats, if is 'O',
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
130 columns infor required via --info
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
131 -g {hg19,mm9}, --genome {hg19,mm9}
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
132 Specify your species, hg19, mm9. For other genome
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
133 assembily versions of human and mouse or other
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
134 species, ignore this parameter.
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
135 --gname2 If this switch is on, gene or transcript IDs in files
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
136 given through -e will be considered as official gene
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
137 symbols, DEFAULT=FALSE
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
138 --info EXPREINFO Specify the geneID, up/down status and statistcal
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
139 values column of your expression data,NOTE: use a
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
140 comma as an connector. for example: 2,5,7 means geneID
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
141 in the 2nd column, Tscore in 5th column and FDR in 7
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
142 column DEFAULT:2,5,7 for LIMMA; 2,10,13 for Cuffdiff
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
143 and 1,2,3 for BETA specific format
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
144 -r REFERENCE, --reference REFERENCE
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
145 The refgene info file downloaded from UCSC genome
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
146 browser.input this file only if your genome is neither
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
147 hg19 nor mm9
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
148 -o OUTPUT, --output OUTPUT
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
149 The directory to store all the output files, if you
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
150 don't set this, files will be output into the current
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
151 directory
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
152 --bl Whether or not use CTCF boundary to filter peaks
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
153 around a gene, DEFAULT=FALSE
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
154 --bf BOUNDARYFILE CTCF conserved peaks bed file, use this only when you
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
155 set --bl and the genome is neither hg19 nor mm9
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
156 --pn PEAKNUMBER The number of peaks you want to consider,
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
157 DEFAULT=10000
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
158 --method {score,distance}
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
159 Define the method to do the TF/CR function prediction,
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
160 score for regulatory potential, distance for the
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
161 distance to the proximal binding peak. DEFAULT:SCORE
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
162 -n NAME, --name NAME This argument is used to name the result file.If not
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
163 set, the peakfile name will be used instead
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
164 -d DISTANCE, --distance DISTANCE
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
165 Set a number which unit is 'base'. It will get peaks
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
166 within this distance from gene TSS. default:100000
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
167 (100kb)
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
168 --df DIFF_FDR Input a number 0~1 as a threshold to pick out the most
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
169 significant differential expressed genes by FDR,
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
170 DEFAULT = 1, that is select all the genes
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
171 --da DIFF_AMOUNT Get the most significant differential expressed genes
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
172 by the percentage(0-1) or number(larger than 1)Input a
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
173 number between 0-1, the rank based on fdr for example,
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
174 2000, so that the script will only consider top 2000
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
175 genes as the differentially expressed genes. DEFAULT =
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
176 0.5, that is select top 50 percent genes of up and
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
177 down seprately. NOTE: if you want to use diff_fdr,
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
178 please set this parameter to 1, otherwise it will get
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
179 the intersection of these two parameters
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
180 -c CUTOFF, --cutoff CUTOFF
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
181 Input a number between 0~1 as a threshold to select
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
182 the closer target gene list(up regulate or down
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
183 regulate or both) with the p value was called by one
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
184 side ks-test, DEFAULT = 0.001
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
185
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
186 </help>
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
187
20453b656907 Imported from capsule None
jjohnson
parents:
diff changeset
188 </tool>