Mercurial > repos > jjohnson > cistrome_beta
comparison beta_plus.xml @ 0:20453b656907
Imported from capsule None
author | jjohnson |
---|---|
date | Tue, 16 Sep 2014 13:35:24 -0400 |
parents | |
children | 9c5241259454 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:20453b656907 |
---|---|
1 <tool id="beta_plus" name="BETA-plus: Binding and Expression Target prediction and motif analysis" version="0.1.0"> | |
2 <description>Predict the factors (TFs or CRs) direct target genes by combining the binding and expression data, then do motif analysis on target regions</description> | |
3 <macros> | |
4 <import>beta_macros.xml</import> | |
5 </macros> | |
6 <expand macro="requirements" /> | |
7 <command> | |
8 BETA plus | |
9 #include source=$common_opts# | |
10 #include source=$genome_opts# | |
11 #include source=$ref_genome_seq_opts# | |
12 #include source=$extended_opts# | |
13 --mn $motifs | |
14 &> $log && | |
15 mkdir -p $motifresult.extra_files_path && | |
16 cp BETA_OUTPUT/motifresult/betamotif.html $motifresult && | |
17 cp BETA_OUTPUT/motifresult/*.js $motifresult.extra_files_path && | |
18 cp BETA_OUTPUT/motifresult/*.css $motifresult.extra_files_path && | |
19 cp -r BETA_OUTPUT/motifresult/img $motifresult.extra_files_path | |
20 | |
21 </command> | |
22 <inputs> | |
23 <expand macro="common_params" /> | |
24 <expand macro="genome_params" /> | |
25 <expand macro="refGenomeSourceConditional" /> | |
26 <expand macro="extended_params" /> | |
27 <param name="motifs" type="float" value="10" optional="true" label="Motifs to retrieve" | |
28 help="a number between 0 and 1 as the p-value cutoff or an integer larger than 1 as the number of motifs"> | |
29 <validator type="in_range" max="20000" min="0" message="A float between 0 and 1 or an integer greater than 1" /> | |
30 </param> | |
31 </inputs> | |
32 <expand macro="stdio" /> | |
33 <outputs> | |
34 <data format="txt" name="log" label="Log of BETA plus"/> | |
35 <data format="pdf" name="functionoutput" label="BETA functional prediction on ${peakfile.name}" from_work_dir="BETA_OUTPUT/NA_function_prediction.pdf"/> | |
36 <data format="tabular" name="uptargetsoutput" label="BETA direct targets prediction on up regulated genes" from_work_dir="BETA_OUTPUT/NA_uptarget.txt"/> | |
37 <data format="tabular" name="downtargetsoutput" label="BETA direct targets prediction on down regulated genes" from_work_dir="BETA_OUTPUT/NA_downtarget.txt"/> | |
38 <data format="bed" name="uptargetpeaks" label="BETA Uptarget associated peaks" from_work_dir="BETA_OUTPUT/NA_uptarget_associate_peaks.bed"/> | |
39 <data format="bed" name="downtargetpeaks" label="BETA Downtarget associated peaks" from_work_dir="BETA_OUTPUT/NA_downtarget_associate_peaks.bed"/> | |
40 <data format="txt" name="upmotifs" label="BETA Motifs in up-target regions" from_work_dir="BETA_OUTPUT/motifresult/UP_MOTIFS.txt" /> | |
41 <data format="txt" name="up_non_motifs" label="BETA Motifs in up-target regions versus non-target regions" from_work_dir="BETA_OUTPUT/motifresult/UP_NON_MOTIFS.txt" /> | |
42 <data format="txt" name="downmotifs" label="BETA Motifs in down-target regions" from_work_dir="BETA_OUTPUT/motifresult/DOWN_MOTIFS.txt" /> | |
43 <data format="txt" name="down_non_motifs" label="BETA Motifs in down-target regions versus non-target regions" from_work_dir="BETA_OUTPUT/motifresult/DOWN_NON_MOTIFS.txt" /> | |
44 <data format="txt" name="differentialmotifs" label="BETA Motifs up-target regions versus down-target regions" from_work_dir="BETA_OUTPUT/motifresult/DIFFERENTIAL_MOTIF_UP_DOWN.txt" /> | |
45 <data format="html" name="motifresult" label="BETA Motif analysis on target regions"/> | |
46 </outputs> | |
47 <tests> | |
48 <test> | |
49 <param name='peakfile' value="peaks.bed" ftype="bed" dbkey="hg19"/> | |
50 <param name="distance" value="100000"/> | |
51 <param name="peaknumber" value="10000"/> | |
52 <param name="genomeName" value="hg19"/> | |
53 <param name='exprefile' value="diff_expr.xls" ftype="tabular" dbkey="hg19"/> | |
54 <param name="kind" value="LIM"/> | |
55 <param name="expreinfo" type="text" value="2,5,7"/> | |
56 <param name="gname2" value="Refseq"/> | |
57 <param name="diff_fdr" value="1.0"/> | |
58 <param name="diff_amount" value="0.5"/> | |
59 <param name="method" value="score"/> | |
60 <output name="log"> | |
61 <assert_contents> | |
62 <has_text_matching expression="Finished" /> | |
63 </assert_contents> | |
64 </output> | |
65 <output name="uptargetsoutput"> | |
66 <assert_contents> | |
67 <has_text_matching expression="NM_001002231" /> | |
68 </assert_contents> | |
69 </output> | |
70 <output name="downtargetsoutput"> | |
71 <assert_contents> | |
72 <has_text_matching expression="NM_001280" /> | |
73 </assert_contents> | |
74 </output> | |
75 <output name="differentialmotifs"> | |
76 <assert_contents> | |
77 <has_text_matching expression="CDX1\tHomeodomain Family" /> | |
78 </assert_contents> | |
79 </output> | |
80 </test> | |
81 </tests> | |
82 <help> | |
83 ** BETA plus ** | |
84 | |
85 @EXTERNAL_DOCUMENTATION@ | |
86 | |
87 @CITATION_SECTION@ | |
88 | |
89 This tool annotates the given intervals and scores with genome | |
90 features such as gene body. | |
91 Predicts Direct targets of TF and the active/repressive function | |
92 prediction. Does motif analysis at targets region as well. | |
93 It's the major module in CEAS package | |
94 which is written by Hyunjin Gene Shin, published in Bioinformatics | |
95 (pubmed id:19689956). | |
96 | |
97 .. class:: warningmark | |
98 | |
99 **NEED IMPROVEMENT** | |
100 | |
101 ----- | |
102 | |
103 **Parameters** | |
104 | |
105 - **PEAKFILE file** contains peaks for the experiment in a bed | |
106 format file. Normally, it's produced by the peak calling tool. It's | |
107 required. | |
108 - **EXPREFILE file** contains the differentially expressed genes in a tab | |
109 delimited text file. It's required. | |
110 - **Kind** The kind of your expression file format, LIM for LIMMA standard | |
111 output with Microarray, CUF for Cuffdiffs standard output with RNA-seq, | |
112 BSF for BETA specific format, and O for other formats. | |
113 - **genome** hg19 for human and mm9 for mouse. Others, don't set this parameter. | |
114 - **genomereference** Genome reference data with fasta format | |
115 - **gname2** If this switch is on, gene or transcript IDs in files given | |
116 through -e will be considered as official gene symbols, DEFAULT=FALSE | |
117 - **EXPREINFO** is the columns info of the geneID, up/down status and statistcal | |
118 values column of your expression data,NOTE: use a comma as an connector. | |
119 for example: 2,5,7 means geneID in the 2nd column, Tscore in 5th column | |
120 and FDR in 7 column. | |
121 - **REFERENCE** is the refgene info file downloaded from UCSC genome browser. | |
122 It is a tab delimited text file with gene annotation with refseq and gene symbol. | |
123 Input this file only if your genome is neither hg19 nor mm9. | |
124 profiling | |
125 - **OUTPUT** to specify the output files directory | |
126 - **bl** Whether or not to use CTCF boundary file to get the contributed peaks | |
127 - **BOUNDARYFILE** is the file with reasonable boundaries if --bl is on and genome | |
128 is neither hg19 nor mm9. | |
129 - **NAME** specify the name of the output files. | |
130 - **DISTANCE** specify the distance wich peaks within it will be considered. | |
131 - **DIFF_FDR** specify the differential genes by the 3rd column in file input | |
132 via -e, genes with less than this value will be considered as the differentially | |
133 changed genes. | |
134 - **DIFF_AMOUNT** specify the differential genes the top #(DIFF_AMOUNT) ranked by | |
135 the 3rd column in file input via -e, genes ranked in the top # will be considered | |
136 as the differentially expressed genes. | |
137 - **CUTOFF** specify a cutoff of ks-test in the function prediction part | |
138 | |
139 | |
140 ----- | |
141 | |
142 **Script parameter list of BETA plus** | |
143 | |
144 :: | |
145 | |
146 -h, --help show this help message and exit | |
147 -p PEAKFILE, --peakfile PEAKFILE | |
148 The bed format of peaks binding sites. (BETA support 3 | |
149 or 5 columns bed format, CHROM, START, END (NAME, | |
150 SCORE)) | |
151 -e EXPREFILE, --diff_expr EXPREFILE | |
152 The differential expression file get from limma for | |
153 MicroArray ddata and cuffdiff for RNAseq data | |
154 -k {LIM,CUF,BSF,O}, --kind {LIM,CUF,BSF,O} | |
155 The kind of your expression file,this is required,it | |
156 can be LIM, CUF, BSF, O. LIM for LIMMA standard | |
157 format. CUF for CUFDIFF standard format, BSF for BETA | |
158 specific format and O for other formats, if is 'O', | |
159 columns infor required via --info | |
160 -g {hg19,mm9}, --genome {hg19,mm9} | |
161 Specify your species, hg19, mm9 | |
162 --gs GENOMEREFERNCE GenomeReference file with fasta format | |
163 --gname2 If this switch is on, gene or transcript IDs in files | |
164 given through -e will be considered as official gene | |
165 symbols, DEFAULT=FALSE | |
166 --info EXPREINFO Specify the geneID, up/down status and statistcal | |
167 values column of your expression data,NOTE: use a | |
168 comma as an connector. for example: 2,5,7 means geneID | |
169 in the 2nd column, Tscore in 5th column and FDR in 7 | |
170 column DEFAULT:2,5,7 for LIMMA; 2,10,13 for Cuffdiff | |
171 and 1,2,3 for BETA specific format | |
172 -r REFERENCE, --reference REFERENCE | |
173 The refgene info file downloaded from UCSC genome | |
174 browser.input this file only if your genome is neither | |
175 hg19 nor mm9 | |
176 -o OUTPUT, --output OUTPUT | |
177 The directory to store all the output files, if you | |
178 don't set this, files will be output into the current | |
179 directory | |
180 --bl Whether or not use CTCF boundary to filter peaks | |
181 around a gene, DEFAULT=FALSE | |
182 --bf BOUNDARYFILE CTCF conserved peaks bed file, use this only when you | |
183 set --bl and the genome is neither hg19 nor mm9 | |
184 --pn PEAKNUMBER The number of peaks you want to consider, | |
185 DEFAULT=10000 | |
186 --method {score,distance} | |
187 Define the method to do the TF/CR function prediction, | |
188 score for regulatory potential, distance for the | |
189 distance to the proximal binding peak. DEFAULT:SCORE | |
190 -n NAME, --name NAME This argument is used to name the result file.If not | |
191 set, the peakfile name will be used instead | |
192 -d DISTANCE, --distance DISTANCE | |
193 Set a number which unit is 'base'. It will get peaks | |
194 within this distance from gene TSS. default:100000 | |
195 (100kb) | |
196 --df DIFF_FDR Input a number 0~1 as a threshold to pick out the most | |
197 significant differential expressed genes by FDR, | |
198 DEFAULT = 1, that is select all the genes | |
199 --da DIFF_AMOUNT Get the most significant differential expressed genes | |
200 by the percentage(0-1) or number(larger than 1)Input a | |
201 number between 0-1, the rank based on fdr for example, | |
202 2000, so that the script will only consider top 2000 | |
203 genes as the differentially expressed genes. DEFAULT = | |
204 0.5, that is select top 50 percent genes of up and | |
205 down seprately. NOTE: if you want to use diff_fdr, | |
206 please set this parameter to 1, otherwise it will get | |
207 the intersection of these two parameters | |
208 -c CUTOFF, --cutoff CUTOFF | |
209 Input a number between 0~1 as a threshold to select | |
210 the closer target gene list(up regulate or down | |
211 regulate or both) with the p value was called by one | |
212 side ks-test, DEFAULT = 0.001 | |
213 | |
214 </help> | |
215 </tool> |