Mercurial > repos > jjohnson > cistrome_beta
comparison beta_plus.xml @ 2:9c5241259454 draft
planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/cistrome_beta commit 76ad167e754d8254ee4e9c6d2047c84c5f2da55a-dirty
author | jjohnson |
---|---|
date | Thu, 22 Mar 2018 08:33:55 -0400 |
parents | 20453b656907 |
children | 067573bac905 |
comparison
equal
deleted
inserted
replaced
1:7f023a22da15 | 2:9c5241259454 |
---|---|
1 <tool id="beta_plus" name="BETA-plus: Binding and Expression Target prediction and motif analysis" version="0.1.0"> | 1 <tool id="beta_plus" name="BETA-plus: Binding and Expression Target prediction and motif analysis" version="0.1.0"> |
2 <description>Predict the factors (TFs or CRs) direct target genes by combining the binding and expression data, then do motif analysis on target regions</description> | 2 <description>Predict the factors (TFs or CRs) direct target genes by combining the binding and expression data, then do motif analysis on target regions</description> |
3 <macros> | 3 <macros> |
4 <import>beta_macros.xml</import> | 4 <import>beta_macros.xml</import> |
5 </macros> | 5 </macros> |
6 <expand macro="requirements" /> | 6 <expand macro="requirements" /> |
7 <command> | 7 <expand macro="stdio" /> |
8 BETA plus | 8 <command><![CDATA[ |
9 #include source=$common_opts# | 9 BETA plus |
10 #include source=$genome_opts# | 10 #include source=$common_opts# |
11 #include source=$ref_genome_seq_opts# | 11 #include source=$genome_opts# |
12 #include source=$extended_opts# | 12 #include source=$ref_genome_seq_opts# |
13 --mn $motifs | 13 #include source=$extended_opts# |
14 &> $log && | 14 --mn $motifs |
15 mkdir -p $motifresult.extra_files_path && | 15 &> $log && |
16 cp BETA_OUTPUT/motifresult/betamotif.html $motifresult && | 16 mkdir -p $motifresult.extra_files_path && |
17 cp BETA_OUTPUT/motifresult/*.js $motifresult.extra_files_path && | 17 cp BETA_OUTPUT/motifresult/betamotif.html $motifresult && |
18 cp BETA_OUTPUT/motifresult/*.css $motifresult.extra_files_path && | 18 cp BETA_OUTPUT/motifresult/*.js $motifresult.extra_files_path && |
19 cp -r BETA_OUTPUT/motifresult/img $motifresult.extra_files_path | 19 cp BETA_OUTPUT/motifresult/*.css $motifresult.extra_files_path && |
20 | 20 cp -r BETA_OUTPUT/motifresult/img $motifresult.extra_files_path |
21 </command> | 21 ]]></command> |
22 <inputs> | 22 <inputs> |
23 <expand macro="common_params" /> | 23 <expand macro="common_params" /> |
24 <expand macro="genome_params" /> | 24 <expand macro="genome_params" /> |
25 <expand macro="refGenomeSourceConditional" /> | 25 <expand macro="refGenomeSourceConditional" /> |
26 <expand macro="extended_params" /> | 26 <expand macro="extended_params" /> |
27 <param name="motifs" type="float" value="10" optional="true" label="Motifs to retrieve" | 27 <param name="motifs" type="float" value="10" optional="true" label="Motifs to retrieve" |
28 help="a number between 0 and 1 as the p-value cutoff or an integer larger than 1 as the number of motifs"> | 28 help="a number between 0 and 1 as the p-value cutoff or an integer larger than 1 as the number of motifs"> |
29 <validator type="in_range" max="20000" min="0" message="A float between 0 and 1 or an integer greater than 1" /> | 29 <validator type="in_range" max="20000" min="0" message="A float between 0 and 1 or an integer greater than 1" /> |
30 </param> | 30 </param> |
31 </inputs> | 31 </inputs> |
32 <expand macro="stdio" /> | 32 <outputs> |
33 <outputs> | 33 <data format="txt" name="log" label="Log of BETA plus"/> |
34 <data format="txt" name="log" label="Log of BETA plus"/> | 34 <data format="pdf" name="functionoutput" label="BETA functional prediction on ${peakfile.name}" from_work_dir="BETA_OUTPUT/NA_function_prediction.pdf"/> |
35 <data format="pdf" name="functionoutput" label="BETA functional prediction on ${peakfile.name}" from_work_dir="BETA_OUTPUT/NA_function_prediction.pdf"/> | 35 <data format="tabular" name="uptargetsoutput" label="BETA direct targets prediction on up regulated genes" from_work_dir="BETA_OUTPUT/NA_uptarget.txt"/> |
36 <data format="tabular" name="uptargetsoutput" label="BETA direct targets prediction on up regulated genes" from_work_dir="BETA_OUTPUT/NA_uptarget.txt"/> | 36 <data format="tabular" name="downtargetsoutput" label="BETA direct targets prediction on down regulated genes" from_work_dir="BETA_OUTPUT/NA_downtarget.txt"/> |
37 <data format="tabular" name="downtargetsoutput" label="BETA direct targets prediction on down regulated genes" from_work_dir="BETA_OUTPUT/NA_downtarget.txt"/> | 37 <data format="bed" name="uptargetpeaks" label="BETA Uptarget associated peaks" from_work_dir="BETA_OUTPUT/NA_uptarget_associate_peaks.bed"/> |
38 <data format="bed" name="uptargetpeaks" label="BETA Uptarget associated peaks" from_work_dir="BETA_OUTPUT/NA_uptarget_associate_peaks.bed"/> | 38 <data format="bed" name="downtargetpeaks" label="BETA Downtarget associated peaks" from_work_dir="BETA_OUTPUT/NA_downtarget_associate_peaks.bed"/> |
39 <data format="bed" name="downtargetpeaks" label="BETA Downtarget associated peaks" from_work_dir="BETA_OUTPUT/NA_downtarget_associate_peaks.bed"/> | 39 <data format="txt" name="upmotifs" label="BETA Motifs in up-target regions" from_work_dir="BETA_OUTPUT/motifresult/UP_MOTIFS.txt" /> |
40 <data format="txt" name="upmotifs" label="BETA Motifs in up-target regions" from_work_dir="BETA_OUTPUT/motifresult/UP_MOTIFS.txt" /> | 40 <data format="txt" name="up_non_motifs" label="BETA Motifs in up-target regions versus non-target regions" from_work_dir="BETA_OUTPUT/motifresult/UP_NON_MOTIFS.txt" /> |
41 <data format="txt" name="up_non_motifs" label="BETA Motifs in up-target regions versus non-target regions" from_work_dir="BETA_OUTPUT/motifresult/UP_NON_MOTIFS.txt" /> | 41 <data format="txt" name="downmotifs" label="BETA Motifs in down-target regions" from_work_dir="BETA_OUTPUT/motifresult/DOWN_MOTIFS.txt" /> |
42 <data format="txt" name="downmotifs" label="BETA Motifs in down-target regions" from_work_dir="BETA_OUTPUT/motifresult/DOWN_MOTIFS.txt" /> | 42 <data format="txt" name="down_non_motifs" label="BETA Motifs in down-target regions versus non-target regions" from_work_dir="BETA_OUTPUT/motifresult/DOWN_NON_MOTIFS.txt" /> |
43 <data format="txt" name="down_non_motifs" label="BETA Motifs in down-target regions versus non-target regions" from_work_dir="BETA_OUTPUT/motifresult/DOWN_NON_MOTIFS.txt" /> | 43 <data format="txt" name="differentialmotifs" label="BETA Motifs up-target regions versus down-target regions" from_work_dir="BETA_OUTPUT/motifresult/DIFFERENTIAL_MOTIF_UP_DOWN.txt" /> |
44 <data format="txt" name="differentialmotifs" label="BETA Motifs up-target regions versus down-target regions" from_work_dir="BETA_OUTPUT/motifresult/DIFFERENTIAL_MOTIF_UP_DOWN.txt" /> | 44 <data format="html" name="motifresult" label="BETA Motif analysis on target regions"/> |
45 <data format="html" name="motifresult" label="BETA Motif analysis on target regions"/> | 45 </outputs> |
46 </outputs> | 46 <tests> |
47 <tests> | 47 <test> |
48 <test> | 48 <param name='peakfile' value="peaks.bed" ftype="bed" dbkey="hg19"/> |
49 <param name='peakfile' value="peaks.bed" ftype="bed" dbkey="hg19"/> | 49 <param name="distance" value="100000"/> |
50 <param name="distance" value="100000"/> | 50 <param name="peaknumber" value="10000"/> |
51 <param name="peaknumber" value="10000"/> | 51 <param name="genomeName" value="hg19"/> |
52 <param name="genomeName" value="hg19"/> | 52 <param name='exprefile' value="diff_expr.xls" ftype="tabular" dbkey="hg19"/> |
53 <param name='exprefile' value="diff_expr.xls" ftype="tabular" dbkey="hg19"/> | 53 <param name="kind" value="LIM"/> |
54 <param name="kind" value="LIM"/> | 54 <param name="expreinfo" value="2,5,7"/> |
55 <param name="expreinfo" type="text" value="2,5,7"/> | 55 <param name="gname2" value="Refseq"/> |
56 <param name="gname2" value="Refseq"/> | 56 <param name="diff_fdr" value="1.0"/> |
57 <param name="diff_fdr" value="1.0"/> | 57 <param name="diff_amount" value="0.5"/> |
58 <param name="diff_amount" value="0.5"/> | 58 <param name="method" value="score"/> |
59 <param name="method" value="score"/> | 59 <output name="log"> |
60 <output name="log"> | 60 <assert_contents> |
61 <assert_contents> | 61 <has_text_matching expression="Finished" /> |
62 <has_text_matching expression="Finished" /> | 62 </assert_contents> |
63 </assert_contents> | 63 </output> |
64 </output> | 64 <output name="uptargetsoutput"> |
65 <output name="uptargetsoutput"> | 65 <assert_contents> |
66 <assert_contents> | 66 <has_text_matching expression="NM_001002231" /> |
67 <has_text_matching expression="NM_001002231" /> | 67 </assert_contents> |
68 </assert_contents> | 68 </output> |
69 </output> | 69 <output name="downtargetsoutput"> |
70 <output name="downtargetsoutput"> | 70 <assert_contents> |
71 <assert_contents> | 71 <has_text_matching expression="NM_001280" /> |
72 <has_text_matching expression="NM_001280" /> | 72 </assert_contents> |
73 </assert_contents> | 73 </output> |
74 </output> | 74 <output name="differentialmotifs"> |
75 <output name="differentialmotifs"> | 75 <assert_contents> |
76 <assert_contents> | 76 <has_text_matching expression="CDX1\tHomeodomain Family" /> |
77 <has_text_matching expression="CDX1\tHomeodomain Family" /> | 77 </assert_contents> |
78 </assert_contents> | 78 </output> |
79 </output> | 79 </test> |
80 </test> | 80 </tests> |
81 </tests> | 81 <help><![CDATA[ |
82 <help> | |
83 ** BETA plus ** | 82 ** BETA plus ** |
84 | 83 |
85 @EXTERNAL_DOCUMENTATION@ | 84 @EXTERNAL_DOCUMENTATION@ |
86 | 85 |
87 @CITATION_SECTION@ | 86 @CITATION_SECTION@ |
88 | 87 |
89 This tool annotates the given intervals and scores with genome | 88 This tool annotates the given intervals and scores with genome |
90 features such as gene body. | 89 features such as gene body. |
91 Predicts Direct targets of TF and the active/repressive function | 90 Predicts Direct targets of TF and the active/repressive function |
92 prediction. Does motif analysis at targets region as well. | 91 prediction. Does motif analysis at targets region as well. |
93 It's the major module in CEAS package | 92 It's the major module in CEAS package |
94 which is written by Hyunjin Gene Shin, published in Bioinformatics | 93 which is written by Hyunjin Gene Shin, published in Bioinformatics |
95 (pubmed id:19689956). | 94 (pubmed id:19689956). |
96 | 95 |
97 .. class:: warningmark | 96 .. class:: warningmark |
101 ----- | 100 ----- |
102 | 101 |
103 **Parameters** | 102 **Parameters** |
104 | 103 |
105 - **PEAKFILE file** contains peaks for the experiment in a bed | 104 - **PEAKFILE file** contains peaks for the experiment in a bed |
106 format file. Normally, it's produced by the peak calling tool. It's | 105 format file. Normally, it's produced by the peak calling tool. It's |
107 required. | 106 required. |
108 - **EXPREFILE file** contains the differentially expressed genes in a tab | 107 - **EXPREFILE file** contains the differentially expressed genes in a tab |
109 delimited text file. It's required. | 108 delimited text file. It's required. |
110 - **Kind** The kind of your expression file format, LIM for LIMMA standard | 109 - **Kind** The kind of your expression file format, LIM for LIMMA standard |
111 output with Microarray, CUF for Cuffdiffs standard output with RNA-seq, | 110 output with Microarray, CUF for Cuffdiffs standard output with RNA-seq, |
112 BSF for BETA specific format, and O for other formats. | 111 BSF for BETA specific format, and O for other formats. |
113 - **genome** hg19 for human and mm9 for mouse. Others, don't set this parameter. | 112 - **genome** hg19 for human and mm9 for mouse. Others, don't set this parameter. |
114 - **genomereference** Genome reference data with fasta format | 113 - **genomereference** Genome reference data with fasta format |
115 - **gname2** If this switch is on, gene or transcript IDs in files given | 114 - **gname2** If this switch is on, gene or transcript IDs in files given |
116 through -e will be considered as official gene symbols, DEFAULT=FALSE | 115 through -e will be considered as official gene symbols, DEFAULT=FALSE |
117 - **EXPREINFO** is the columns info of the geneID, up/down status and statistcal | 116 - **EXPREINFO** is the columns info of the geneID, up/down status and statistcal |
118 values column of your expression data,NOTE: use a comma as an connector. | 117 values column of your expression data,NOTE: use a comma as an connector. |
119 for example: 2,5,7 means geneID in the 2nd column, Tscore in 5th column | 118 for example: 2,5,7 means geneID in the 2nd column, Tscore in 5th column |
120 and FDR in 7 column. | 119 and FDR in 7 column. |
121 - **REFERENCE** is the refgene info file downloaded from UCSC genome browser. | 120 - **REFERENCE** is the refgene info file downloaded from UCSC genome browser. |
122 It is a tab delimited text file with gene annotation with refseq and gene symbol. | 121 It is a tab delimited text file with gene annotation with refseq and gene symbol. |
123 Input this file only if your genome is neither hg19 nor mm9. | 122 Input this file only if your genome is neither hg19 nor mm9. |
124 profiling | 123 profiling |
125 - **OUTPUT** to specify the output files directory | 124 - **OUTPUT** to specify the output files directory |
126 - **bl** Whether or not to use CTCF boundary file to get the contributed peaks | 125 - **bl** Whether or not to use CTCF boundary file to get the contributed peaks |
127 - **BOUNDARYFILE** is the file with reasonable boundaries if --bl is on and genome | 126 - **BOUNDARYFILE** is the file with reasonable boundaries if --bl is on and genome |
128 is neither hg19 nor mm9. | 127 is neither hg19 nor mm9. |
129 - **NAME** specify the name of the output files. | 128 - **NAME** specify the name of the output files. |
130 - **DISTANCE** specify the distance wich peaks within it will be considered. | 129 - **DISTANCE** specify the distance wich peaks within it will be considered. |
131 - **DIFF_FDR** specify the differential genes by the 3rd column in file input | 130 - **DIFF_FDR** specify the differential genes by the 3rd column in file input |
132 via -e, genes with less than this value will be considered as the differentially | 131 via -e, genes with less than this value will be considered as the differentially |
133 changed genes. | 132 changed genes. |
134 - **DIFF_AMOUNT** specify the differential genes the top #(DIFF_AMOUNT) ranked by | 133 - **DIFF_AMOUNT** specify the differential genes the top #(DIFF_AMOUNT) ranked by |
135 the 3rd column in file input via -e, genes ranked in the top # will be considered | 134 the 3rd column in file input via -e, genes ranked in the top # will be considered |
136 as the differentially expressed genes. | 135 as the differentially expressed genes. |
137 - **CUTOFF** specify a cutoff of ks-test in the function prediction part | 136 - **CUTOFF** specify a cutoff of ks-test in the function prediction part |
138 | 137 |
139 | 138 |
140 ----- | 139 ----- |
141 | 140 |
142 **Script parameter list of BETA plus** | 141 **Script parameter list of BETA plus** |
143 | 142 |
144 :: | 143 :: |
145 | 144 |
146 -h, --help show this help message and exit | 145 -h, --help show this help message and exit |
147 -p PEAKFILE, --peakfile PEAKFILE | 146 -p PEAKFILE, --peakfile PEAKFILE The bed format of peaks binding sites. (BETA support 3 |
148 The bed format of peaks binding sites. (BETA support 3 | 147 or 5 columns bed format, CHROM, START, END (NAME, |
149 or 5 columns bed format, CHROM, START, END (NAME, | 148 SCORE)) |
150 SCORE)) | 149 -e EXPREFILE, --diff_expr EXPREFILE The differential expression file get from limma for |
151 -e EXPREFILE, --diff_expr EXPREFILE | 150 MicroArray ddata and cuffdiff for RNAseq data |
152 The differential expression file get from limma for | 151 -k {LIM,CUF,BSF,O}, --kind {LIM,CUF,BSF,O} The kind of your expression file,this is required,it |
153 MicroArray ddata and cuffdiff for RNAseq data | 152 can be LIM, CUF, BSF, O. LIM for LIMMA standard |
154 -k {LIM,CUF,BSF,O}, --kind {LIM,CUF,BSF,O} | 153 format. CUF for CUFDIFF standard format, BSF for BETA |
155 The kind of your expression file,this is required,it | 154 specific format and O for other formats, if is 'O', |
156 can be LIM, CUF, BSF, O. LIM for LIMMA standard | 155 columns infor required via --info |
157 format. CUF for CUFDIFF standard format, BSF for BETA | 156 -g {hg19,mm9}, --genome {hg19,mm9} Specify your species, hg19, mm9 |
158 specific format and O for other formats, if is 'O', | 157 --gs GENOMEREFERNCE GenomeReference file with fasta format |
159 columns infor required via --info | 158 --gname2 If this switch is on, gene or transcript IDs in files |
160 -g {hg19,mm9}, --genome {hg19,mm9} | 159 given through -e will be considered as official gene |
161 Specify your species, hg19, mm9 | 160 symbols, DEFAULT=FALSE |
162 --gs GENOMEREFERNCE GenomeReference file with fasta format | 161 --info EXPREINFO Specify the geneID, up/down status and statistcal |
163 --gname2 If this switch is on, gene or transcript IDs in files | 162 values column of your expression data,NOTE: use a |
164 given through -e will be considered as official gene | 163 comma as an connector. for example: 2,5,7 means geneID |
165 symbols, DEFAULT=FALSE | 164 in the 2nd column, Tscore in 5th column and FDR in 7 |
166 --info EXPREINFO Specify the geneID, up/down status and statistcal | 165 column DEFAULT:2,5,7 for LIMMA; 2,10,13 for Cuffdiff |
167 values column of your expression data,NOTE: use a | 166 and 1,2,3 for BETA specific format |
168 comma as an connector. for example: 2,5,7 means geneID | 167 -r REFERENCE, --reference REFERENCE The refgene info file downloaded from UCSC genome |
169 in the 2nd column, Tscore in 5th column and FDR in 7 | 168 browser.input this file only if your genome is neither |
170 column DEFAULT:2,5,7 for LIMMA; 2,10,13 for Cuffdiff | 169 hg19 nor mm9 |
171 and 1,2,3 for BETA specific format | 170 -o OUTPUT, --output OUTPUT The directory to store all the output files, if you |
172 -r REFERENCE, --reference REFERENCE | 171 don't set this, files will be output into the current |
173 The refgene info file downloaded from UCSC genome | 172 directory |
174 browser.input this file only if your genome is neither | 173 --bl Whether or not use CTCF boundary to filter peaks |
175 hg19 nor mm9 | 174 around a gene, DEFAULT=FALSE |
176 -o OUTPUT, --output OUTPUT | 175 --bf BOUNDARYFILE CTCF conserved peaks bed file, use this only when you |
177 The directory to store all the output files, if you | 176 set --bl and the genome is neither hg19 nor mm9 |
178 don't set this, files will be output into the current | 177 --pn PEAKNUMBER The number of peaks you want to consider, |
179 directory | 178 DEFAULT=10000 |
180 --bl Whether or not use CTCF boundary to filter peaks | 179 --method {score,distance} Define the method to do the TF/CR function prediction, |
181 around a gene, DEFAULT=FALSE | 180 score for regulatory potential, distance for the |
182 --bf BOUNDARYFILE CTCF conserved peaks bed file, use this only when you | 181 distance to the proximal binding peak. DEFAULT:SCORE |
183 set --bl and the genome is neither hg19 nor mm9 | 182 -n NAME, --name NAME This argument is used to name the result file.If not |
184 --pn PEAKNUMBER The number of peaks you want to consider, | 183 set, the peakfile name will be used instead |
185 DEFAULT=10000 | 184 -d DISTANCE, --distance DISTANCE Set a number which unit is 'base'. It will get peaks |
186 --method {score,distance} | 185 within this distance from gene TSS. default:100000 |
187 Define the method to do the TF/CR function prediction, | 186 (100kb) |
188 score for regulatory potential, distance for the | 187 --df DIFF_FDR Input a number 0~1 as a threshold to pick out the most |
189 distance to the proximal binding peak. DEFAULT:SCORE | 188 significant differential expressed genes by FDR, |
190 -n NAME, --name NAME This argument is used to name the result file.If not | 189 DEFAULT = 1, that is select all the genes |
191 set, the peakfile name will be used instead | 190 --da DIFF_AMOUNT Get the most significant differential expressed genes |
192 -d DISTANCE, --distance DISTANCE | 191 by the percentage(0-1) or number(larger than 1)Input a |
193 Set a number which unit is 'base'. It will get peaks | 192 number between 0-1, the rank based on fdr for example, |
194 within this distance from gene TSS. default:100000 | 193 2000, so that the script will only consider top 2000 |
195 (100kb) | 194 genes as the differentially expressed genes. DEFAULT = |
196 --df DIFF_FDR Input a number 0~1 as a threshold to pick out the most | 195 0.5, that is select top 50 percent genes of up and |
197 significant differential expressed genes by FDR, | 196 down seprately. NOTE: if you want to use diff_fdr, |
198 DEFAULT = 1, that is select all the genes | 197 please set this parameter to 1, otherwise it will get |
199 --da DIFF_AMOUNT Get the most significant differential expressed genes | 198 the intersection of these two parameters |
200 by the percentage(0-1) or number(larger than 1)Input a | 199 -c CUTOFF, --cutoff CUTOFF Input a number between 0~1 as a threshold to select |
201 number between 0-1, the rank based on fdr for example, | 200 the closer target gene list(up regulate or down |
202 2000, so that the script will only consider top 2000 | 201 regulate or both) with the p value was called by one |
203 genes as the differentially expressed genes. DEFAULT = | 202 side ks-test, DEFAULT = 0.001 |
204 0.5, that is select top 50 percent genes of up and | 203 |
205 down seprately. NOTE: if you want to use diff_fdr, | 204 ]]></help> |
206 please set this parameter to 1, otherwise it will get | 205 <expand macro="citations" /> |
207 the intersection of these two parameters | |
208 -c CUTOFF, --cutoff CUTOFF | |
209 Input a number between 0~1 as a threshold to select | |
210 the closer target gene list(up regulate or down | |
211 regulate or both) with the p value was called by one | |
212 side ks-test, DEFAULT = 0.001 | |
213 | |
214 </help> | |
215 </tool> | 206 </tool> |