comparison beta_plus.xml @ 2:9c5241259454 draft

planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/cistrome_beta commit 76ad167e754d8254ee4e9c6d2047c84c5f2da55a-dirty
author jjohnson
date Thu, 22 Mar 2018 08:33:55 -0400
parents 20453b656907
children 067573bac905
comparison
equal deleted inserted replaced
1:7f023a22da15 2:9c5241259454
1 <tool id="beta_plus" name="BETA-plus: Binding and Expression Target prediction and motif analysis" version="0.1.0"> 1 <tool id="beta_plus" name="BETA-plus: Binding and Expression Target prediction and motif analysis" version="0.1.0">
2 <description>Predict the factors (TFs or CRs) direct target genes by combining the binding and expression data, then do motif analysis on target regions</description> 2 <description>Predict the factors (TFs or CRs) direct target genes by combining the binding and expression data, then do motif analysis on target regions</description>
3 <macros> 3 <macros>
4 <import>beta_macros.xml</import> 4 <import>beta_macros.xml</import>
5 </macros> 5 </macros>
6 <expand macro="requirements" /> 6 <expand macro="requirements" />
7 <command> 7 <expand macro="stdio" />
8 BETA plus 8 <command><![CDATA[
9 #include source=$common_opts# 9 BETA plus
10 #include source=$genome_opts# 10 #include source=$common_opts#
11 #include source=$ref_genome_seq_opts# 11 #include source=$genome_opts#
12 #include source=$extended_opts# 12 #include source=$ref_genome_seq_opts#
13 --mn $motifs 13 #include source=$extended_opts#
14 &amp;> $log &amp;&amp; 14 --mn $motifs
15 mkdir -p $motifresult.extra_files_path &amp;&amp; 15 &> $log &&
16 cp BETA_OUTPUT/motifresult/betamotif.html $motifresult &amp;&amp; 16 mkdir -p $motifresult.extra_files_path &&
17 cp BETA_OUTPUT/motifresult/*.js $motifresult.extra_files_path &amp;&amp; 17 cp BETA_OUTPUT/motifresult/betamotif.html $motifresult &&
18 cp BETA_OUTPUT/motifresult/*.css $motifresult.extra_files_path &amp;&amp; 18 cp BETA_OUTPUT/motifresult/*.js $motifresult.extra_files_path &&
19 cp -r BETA_OUTPUT/motifresult/img $motifresult.extra_files_path 19 cp BETA_OUTPUT/motifresult/*.css $motifresult.extra_files_path &&
20 20 cp -r BETA_OUTPUT/motifresult/img $motifresult.extra_files_path
21 </command> 21 ]]></command>
22 <inputs> 22 <inputs>
23 <expand macro="common_params" /> 23 <expand macro="common_params" />
24 <expand macro="genome_params" /> 24 <expand macro="genome_params" />
25 <expand macro="refGenomeSourceConditional" /> 25 <expand macro="refGenomeSourceConditional" />
26 <expand macro="extended_params" /> 26 <expand macro="extended_params" />
27 <param name="motifs" type="float" value="10" optional="true" label="Motifs to retrieve" 27 <param name="motifs" type="float" value="10" optional="true" label="Motifs to retrieve"
28 help="a number between 0 and 1 as the p-value cutoff or an integer larger than 1 as the number of motifs"> 28 help="a number between 0 and 1 as the p-value cutoff or an integer larger than 1 as the number of motifs">
29 <validator type="in_range" max="20000" min="0" message="A float between 0 and 1 or an integer greater than 1" /> 29 <validator type="in_range" max="20000" min="0" message="A float between 0 and 1 or an integer greater than 1" />
30 </param> 30 </param>
31 </inputs> 31 </inputs>
32 <expand macro="stdio" /> 32 <outputs>
33 <outputs> 33 <data format="txt" name="log" label="Log of BETA plus"/>
34 <data format="txt" name="log" label="Log of BETA plus"/> 34 <data format="pdf" name="functionoutput" label="BETA functional prediction on ${peakfile.name}" from_work_dir="BETA_OUTPUT/NA_function_prediction.pdf"/>
35 <data format="pdf" name="functionoutput" label="BETA functional prediction on ${peakfile.name}" from_work_dir="BETA_OUTPUT/NA_function_prediction.pdf"/> 35 <data format="tabular" name="uptargetsoutput" label="BETA direct targets prediction on up regulated genes" from_work_dir="BETA_OUTPUT/NA_uptarget.txt"/>
36 <data format="tabular" name="uptargetsoutput" label="BETA direct targets prediction on up regulated genes" from_work_dir="BETA_OUTPUT/NA_uptarget.txt"/> 36 <data format="tabular" name="downtargetsoutput" label="BETA direct targets prediction on down regulated genes" from_work_dir="BETA_OUTPUT/NA_downtarget.txt"/>
37 <data format="tabular" name="downtargetsoutput" label="BETA direct targets prediction on down regulated genes" from_work_dir="BETA_OUTPUT/NA_downtarget.txt"/> 37 <data format="bed" name="uptargetpeaks" label="BETA Uptarget associated peaks" from_work_dir="BETA_OUTPUT/NA_uptarget_associate_peaks.bed"/>
38 <data format="bed" name="uptargetpeaks" label="BETA Uptarget associated peaks" from_work_dir="BETA_OUTPUT/NA_uptarget_associate_peaks.bed"/> 38 <data format="bed" name="downtargetpeaks" label="BETA Downtarget associated peaks" from_work_dir="BETA_OUTPUT/NA_downtarget_associate_peaks.bed"/>
39 <data format="bed" name="downtargetpeaks" label="BETA Downtarget associated peaks" from_work_dir="BETA_OUTPUT/NA_downtarget_associate_peaks.bed"/> 39 <data format="txt" name="upmotifs" label="BETA Motifs in up-target regions" from_work_dir="BETA_OUTPUT/motifresult/UP_MOTIFS.txt" />
40 <data format="txt" name="upmotifs" label="BETA Motifs in up-target regions" from_work_dir="BETA_OUTPUT/motifresult/UP_MOTIFS.txt" /> 40 <data format="txt" name="up_non_motifs" label="BETA Motifs in up-target regions versus non-target regions" from_work_dir="BETA_OUTPUT/motifresult/UP_NON_MOTIFS.txt" />
41 <data format="txt" name="up_non_motifs" label="BETA Motifs in up-target regions versus non-target regions" from_work_dir="BETA_OUTPUT/motifresult/UP_NON_MOTIFS.txt" /> 41 <data format="txt" name="downmotifs" label="BETA Motifs in down-target regions" from_work_dir="BETA_OUTPUT/motifresult/DOWN_MOTIFS.txt" />
42 <data format="txt" name="downmotifs" label="BETA Motifs in down-target regions" from_work_dir="BETA_OUTPUT/motifresult/DOWN_MOTIFS.txt" /> 42 <data format="txt" name="down_non_motifs" label="BETA Motifs in down-target regions versus non-target regions" from_work_dir="BETA_OUTPUT/motifresult/DOWN_NON_MOTIFS.txt" />
43 <data format="txt" name="down_non_motifs" label="BETA Motifs in down-target regions versus non-target regions" from_work_dir="BETA_OUTPUT/motifresult/DOWN_NON_MOTIFS.txt" /> 43 <data format="txt" name="differentialmotifs" label="BETA Motifs up-target regions versus down-target regions" from_work_dir="BETA_OUTPUT/motifresult/DIFFERENTIAL_MOTIF_UP_DOWN.txt" />
44 <data format="txt" name="differentialmotifs" label="BETA Motifs up-target regions versus down-target regions" from_work_dir="BETA_OUTPUT/motifresult/DIFFERENTIAL_MOTIF_UP_DOWN.txt" /> 44 <data format="html" name="motifresult" label="BETA Motif analysis on target regions"/>
45 <data format="html" name="motifresult" label="BETA Motif analysis on target regions"/> 45 </outputs>
46 </outputs> 46 <tests>
47 <tests> 47 <test>
48 <test> 48 <param name='peakfile' value="peaks.bed" ftype="bed" dbkey="hg19"/>
49 <param name='peakfile' value="peaks.bed" ftype="bed" dbkey="hg19"/> 49 <param name="distance" value="100000"/>
50 <param name="distance" value="100000"/> 50 <param name="peaknumber" value="10000"/>
51 <param name="peaknumber" value="10000"/> 51 <param name="genomeName" value="hg19"/>
52 <param name="genomeName" value="hg19"/> 52 <param name='exprefile' value="diff_expr.xls" ftype="tabular" dbkey="hg19"/>
53 <param name='exprefile' value="diff_expr.xls" ftype="tabular" dbkey="hg19"/> 53 <param name="kind" value="LIM"/>
54 <param name="kind" value="LIM"/> 54 <param name="expreinfo" value="2,5,7"/>
55 <param name="expreinfo" type="text" value="2,5,7"/> 55 <param name="gname2" value="Refseq"/>
56 <param name="gname2" value="Refseq"/> 56 <param name="diff_fdr" value="1.0"/>
57 <param name="diff_fdr" value="1.0"/> 57 <param name="diff_amount" value="0.5"/>
58 <param name="diff_amount" value="0.5"/> 58 <param name="method" value="score"/>
59 <param name="method" value="score"/> 59 <output name="log">
60 <output name="log"> 60 <assert_contents>
61 <assert_contents> 61 <has_text_matching expression="Finished" />
62 <has_text_matching expression="Finished" /> 62 </assert_contents>
63 </assert_contents> 63 </output>
64 </output> 64 <output name="uptargetsoutput">
65 <output name="uptargetsoutput"> 65 <assert_contents>
66 <assert_contents> 66 <has_text_matching expression="NM_001002231" />
67 <has_text_matching expression="NM_001002231" /> 67 </assert_contents>
68 </assert_contents> 68 </output>
69 </output> 69 <output name="downtargetsoutput">
70 <output name="downtargetsoutput"> 70 <assert_contents>
71 <assert_contents> 71 <has_text_matching expression="NM_001280" />
72 <has_text_matching expression="NM_001280" /> 72 </assert_contents>
73 </assert_contents> 73 </output>
74 </output> 74 <output name="differentialmotifs">
75 <output name="differentialmotifs"> 75 <assert_contents>
76 <assert_contents> 76 <has_text_matching expression="CDX1\tHomeodomain Family" />
77 <has_text_matching expression="CDX1\tHomeodomain Family" /> 77 </assert_contents>
78 </assert_contents> 78 </output>
79 </output> 79 </test>
80 </test> 80 </tests>
81 </tests> 81 <help><![CDATA[
82 <help>
83 ** BETA plus ** 82 ** BETA plus **
84 83
85 @EXTERNAL_DOCUMENTATION@ 84 @EXTERNAL_DOCUMENTATION@
86 85
87 @CITATION_SECTION@ 86 @CITATION_SECTION@
88 87
89 This tool annotates the given intervals and scores with genome 88 This tool annotates the given intervals and scores with genome
90 features such as gene body. 89 features such as gene body.
91 Predicts Direct targets of TF and the active/repressive function 90 Predicts Direct targets of TF and the active/repressive function
92 prediction. Does motif analysis at targets region as well. 91 prediction. Does motif analysis at targets region as well.
93 It's the major module in CEAS package 92 It's the major module in CEAS package
94 which is written by Hyunjin Gene Shin, published in Bioinformatics 93 which is written by Hyunjin Gene Shin, published in Bioinformatics
95 (pubmed id:19689956). 94 (pubmed id:19689956).
96 95
97 .. class:: warningmark 96 .. class:: warningmark
101 ----- 100 -----
102 101
103 **Parameters** 102 **Parameters**
104 103
105 - **PEAKFILE file** contains peaks for the experiment in a bed 104 - **PEAKFILE file** contains peaks for the experiment in a bed
106 format file. Normally, it's produced by the peak calling tool. It's 105 format file. Normally, it's produced by the peak calling tool. It's
107 required. 106 required.
108 - **EXPREFILE file** contains the differentially expressed genes in a tab 107 - **EXPREFILE file** contains the differentially expressed genes in a tab
109 delimited text file. It's required. 108 delimited text file. It's required.
110 - **Kind** The kind of your expression file format, LIM for LIMMA standard 109 - **Kind** The kind of your expression file format, LIM for LIMMA standard
111 output with Microarray, CUF for Cuffdiffs standard output with RNA-seq, 110 output with Microarray, CUF for Cuffdiffs standard output with RNA-seq,
112 BSF for BETA specific format, and O for other formats. 111 BSF for BETA specific format, and O for other formats.
113 - **genome** hg19 for human and mm9 for mouse. Others, don't set this parameter. 112 - **genome** hg19 for human and mm9 for mouse. Others, don't set this parameter.
114 - **genomereference** Genome reference data with fasta format 113 - **genomereference** Genome reference data with fasta format
115 - **gname2** If this switch is on, gene or transcript IDs in files given 114 - **gname2** If this switch is on, gene or transcript IDs in files given
116 through -e will be considered as official gene symbols, DEFAULT=FALSE 115 through -e will be considered as official gene symbols, DEFAULT=FALSE
117 - **EXPREINFO** is the columns info of the geneID, up/down status and statistcal 116 - **EXPREINFO** is the columns info of the geneID, up/down status and statistcal
118 values column of your expression data,NOTE: use a comma as an connector. 117 values column of your expression data,NOTE: use a comma as an connector.
119 for example: 2,5,7 means geneID in the 2nd column, Tscore in 5th column 118 for example: 2,5,7 means geneID in the 2nd column, Tscore in 5th column
120 and FDR in 7 column. 119 and FDR in 7 column.
121 - **REFERENCE** is the refgene info file downloaded from UCSC genome browser. 120 - **REFERENCE** is the refgene info file downloaded from UCSC genome browser.
122 It is a tab delimited text file with gene annotation with refseq and gene symbol. 121 It is a tab delimited text file with gene annotation with refseq and gene symbol.
123 Input this file only if your genome is neither hg19 nor mm9. 122 Input this file only if your genome is neither hg19 nor mm9.
124 profiling 123 profiling
125 - **OUTPUT** to specify the output files directory 124 - **OUTPUT** to specify the output files directory
126 - **bl** Whether or not to use CTCF boundary file to get the contributed peaks 125 - **bl** Whether or not to use CTCF boundary file to get the contributed peaks
127 - **BOUNDARYFILE** is the file with reasonable boundaries if --bl is on and genome 126 - **BOUNDARYFILE** is the file with reasonable boundaries if --bl is on and genome
128 is neither hg19 nor mm9. 127 is neither hg19 nor mm9.
129 - **NAME** specify the name of the output files. 128 - **NAME** specify the name of the output files.
130 - **DISTANCE** specify the distance wich peaks within it will be considered. 129 - **DISTANCE** specify the distance wich peaks within it will be considered.
131 - **DIFF_FDR** specify the differential genes by the 3rd column in file input 130 - **DIFF_FDR** specify the differential genes by the 3rd column in file input
132 via -e, genes with less than this value will be considered as the differentially 131 via -e, genes with less than this value will be considered as the differentially
133 changed genes. 132 changed genes.
134 - **DIFF_AMOUNT** specify the differential genes the top #(DIFF_AMOUNT) ranked by 133 - **DIFF_AMOUNT** specify the differential genes the top #(DIFF_AMOUNT) ranked by
135 the 3rd column in file input via -e, genes ranked in the top # will be considered 134 the 3rd column in file input via -e, genes ranked in the top # will be considered
136 as the differentially expressed genes. 135 as the differentially expressed genes.
137 - **CUTOFF** specify a cutoff of ks-test in the function prediction part 136 - **CUTOFF** specify a cutoff of ks-test in the function prediction part
138 137
139 138
140 ----- 139 -----
141 140
142 **Script parameter list of BETA plus** 141 **Script parameter list of BETA plus**
143 142
144 :: 143 ::
145 144
146 -h, --help show this help message and exit 145 -h, --help show this help message and exit
147 -p PEAKFILE, --peakfile PEAKFILE 146 -p PEAKFILE, --peakfile PEAKFILE The bed format of peaks binding sites. (BETA support 3
148 The bed format of peaks binding sites. (BETA support 3 147 or 5 columns bed format, CHROM, START, END (NAME,
149 or 5 columns bed format, CHROM, START, END (NAME, 148 SCORE))
150 SCORE)) 149 -e EXPREFILE, --diff_expr EXPREFILE The differential expression file get from limma for
151 -e EXPREFILE, --diff_expr EXPREFILE 150 MicroArray ddata and cuffdiff for RNAseq data
152 The differential expression file get from limma for 151 -k {LIM,CUF,BSF,O}, --kind {LIM,CUF,BSF,O} The kind of your expression file,this is required,it
153 MicroArray ddata and cuffdiff for RNAseq data 152 can be LIM, CUF, BSF, O. LIM for LIMMA standard
154 -k {LIM,CUF,BSF,O}, --kind {LIM,CUF,BSF,O} 153 format. CUF for CUFDIFF standard format, BSF for BETA
155 The kind of your expression file,this is required,it 154 specific format and O for other formats, if is 'O',
156 can be LIM, CUF, BSF, O. LIM for LIMMA standard 155 columns infor required via --info
157 format. CUF for CUFDIFF standard format, BSF for BETA 156 -g {hg19,mm9}, --genome {hg19,mm9} Specify your species, hg19, mm9
158 specific format and O for other formats, if is 'O', 157 --gs GENOMEREFERNCE GenomeReference file with fasta format
159 columns infor required via --info 158 --gname2 If this switch is on, gene or transcript IDs in files
160 -g {hg19,mm9}, --genome {hg19,mm9} 159 given through -e will be considered as official gene
161 Specify your species, hg19, mm9 160 symbols, DEFAULT=FALSE
162 --gs GENOMEREFERNCE GenomeReference file with fasta format 161 --info EXPREINFO Specify the geneID, up/down status and statistcal
163 --gname2 If this switch is on, gene or transcript IDs in files 162 values column of your expression data,NOTE: use a
164 given through -e will be considered as official gene 163 comma as an connector. for example: 2,5,7 means geneID
165 symbols, DEFAULT=FALSE 164 in the 2nd column, Tscore in 5th column and FDR in 7
166 --info EXPREINFO Specify the geneID, up/down status and statistcal 165 column DEFAULT:2,5,7 for LIMMA; 2,10,13 for Cuffdiff
167 values column of your expression data,NOTE: use a 166 and 1,2,3 for BETA specific format
168 comma as an connector. for example: 2,5,7 means geneID 167 -r REFERENCE, --reference REFERENCE The refgene info file downloaded from UCSC genome
169 in the 2nd column, Tscore in 5th column and FDR in 7 168 browser.input this file only if your genome is neither
170 column DEFAULT:2,5,7 for LIMMA; 2,10,13 for Cuffdiff 169 hg19 nor mm9
171 and 1,2,3 for BETA specific format 170 -o OUTPUT, --output OUTPUT The directory to store all the output files, if you
172 -r REFERENCE, --reference REFERENCE 171 don't set this, files will be output into the current
173 The refgene info file downloaded from UCSC genome 172 directory
174 browser.input this file only if your genome is neither 173 --bl Whether or not use CTCF boundary to filter peaks
175 hg19 nor mm9 174 around a gene, DEFAULT=FALSE
176 -o OUTPUT, --output OUTPUT 175 --bf BOUNDARYFILE CTCF conserved peaks bed file, use this only when you
177 The directory to store all the output files, if you 176 set --bl and the genome is neither hg19 nor mm9
178 don't set this, files will be output into the current 177 --pn PEAKNUMBER The number of peaks you want to consider,
179 directory 178 DEFAULT=10000
180 --bl Whether or not use CTCF boundary to filter peaks 179 --method {score,distance} Define the method to do the TF/CR function prediction,
181 around a gene, DEFAULT=FALSE 180 score for regulatory potential, distance for the
182 --bf BOUNDARYFILE CTCF conserved peaks bed file, use this only when you 181 distance to the proximal binding peak. DEFAULT:SCORE
183 set --bl and the genome is neither hg19 nor mm9 182 -n NAME, --name NAME This argument is used to name the result file.If not
184 --pn PEAKNUMBER The number of peaks you want to consider, 183 set, the peakfile name will be used instead
185 DEFAULT=10000 184 -d DISTANCE, --distance DISTANCE Set a number which unit is 'base'. It will get peaks
186 --method {score,distance} 185 within this distance from gene TSS. default:100000
187 Define the method to do the TF/CR function prediction, 186 (100kb)
188 score for regulatory potential, distance for the 187 --df DIFF_FDR Input a number 0~1 as a threshold to pick out the most
189 distance to the proximal binding peak. DEFAULT:SCORE 188 significant differential expressed genes by FDR,
190 -n NAME, --name NAME This argument is used to name the result file.If not 189 DEFAULT = 1, that is select all the genes
191 set, the peakfile name will be used instead 190 --da DIFF_AMOUNT Get the most significant differential expressed genes
192 -d DISTANCE, --distance DISTANCE 191 by the percentage(0-1) or number(larger than 1)Input a
193 Set a number which unit is 'base'. It will get peaks 192 number between 0-1, the rank based on fdr for example,
194 within this distance from gene TSS. default:100000 193 2000, so that the script will only consider top 2000
195 (100kb) 194 genes as the differentially expressed genes. DEFAULT =
196 --df DIFF_FDR Input a number 0~1 as a threshold to pick out the most 195 0.5, that is select top 50 percent genes of up and
197 significant differential expressed genes by FDR, 196 down seprately. NOTE: if you want to use diff_fdr,
198 DEFAULT = 1, that is select all the genes 197 please set this parameter to 1, otherwise it will get
199 --da DIFF_AMOUNT Get the most significant differential expressed genes 198 the intersection of these two parameters
200 by the percentage(0-1) or number(larger than 1)Input a 199 -c CUTOFF, --cutoff CUTOFF Input a number between 0~1 as a threshold to select
201 number between 0-1, the rank based on fdr for example, 200 the closer target gene list(up regulate or down
202 2000, so that the script will only consider top 2000 201 regulate or both) with the p value was called by one
203 genes as the differentially expressed genes. DEFAULT = 202 side ks-test, DEFAULT = 0.001
204 0.5, that is select top 50 percent genes of up and 203
205 down seprately. NOTE: if you want to use diff_fdr, 204 ]]></help>
206 please set this parameter to 1, otherwise it will get 205 <expand macro="citations" />
207 the intersection of these two parameters
208 -c CUTOFF, --cutoff CUTOFF
209 Input a number between 0~1 as a threshold to select
210 the closer target gene list(up regulate or down
211 regulate or both) with the p value was called by one
212 side ks-test, DEFAULT = 0.001
213
214 </help>
215 </tool> 206 </tool>