comparison beta_basic.xml @ 2:9c5241259454 draft

planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/cistrome_beta commit 76ad167e754d8254ee4e9c6d2047c84c5f2da55a-dirty
author jjohnson
date Thu, 22 Mar 2018 08:33:55 -0400
parents 20453b656907
children 067573bac905
comparison
equal deleted inserted replaced
1:7f023a22da15 2:9c5241259454
1 <tool id="beta_basic" name="BETA-basic: Binding and Expression Target Analysis" version="0.1.0"> 1 <tool id="beta_basic" name="BETA-basic: Binding and Expression Target Analysis" version="0.1.0">
2 <description>Predict the factors (TFs or CRs) direct target genes by combining the binding and expression data</description> 2 <description>Predict the factors (TFs or CRs) direct target genes by combining the binding and expression data</description>
3 <macros> 3 <macros>
4 <import>beta_macros.xml</import> 4 <import>beta_macros.xml</import>
5 </macros> 5 </macros>
6 <expand macro="requirements" /> 6 <expand macro="requirements" />
7 <command> 7 <expand macro="stdio" />
8 BETA basic 8 <command><![CDATA[
9 #include source=$common_opts# 9 BETA basic
10 #include source=$genome_opts# 10 #include source=$common_opts#
11 #include source=$extended_opts# 11 #include source=$genome_opts#
12 &amp;> $log 12 #include source=$extended_opts#
13 </command> 13 &> $log
14 <inputs> 14 ]]></command>
15 <expand macro="common_params" /> 15 <inputs>
16 <expand macro="genome_params" /> 16 <expand macro="common_params" />
17 <expand macro="extended_params" /> 17 <expand macro="genome_params" />
18 </inputs> 18 <expand macro="extended_params" />
19 <expand macro="stdio" /> 19 </inputs>
20 <outputs> 20 <outputs>
21 <data format="txt" name="log" label="Log of BETA basic"/> 21 <data format="txt" name="log" label="Log of BETA basic"/>
22 <data format="pdf" name="functionoutput" label="BETA functional prediction on ${peakfile.name}" from_work_dir="BETA_OUTPUT/NA_function_prediction.pdf"/> 22 <data format="pdf" name="functionoutput" label="BETA functional prediction on ${peakfile.name}" from_work_dir="BETA_OUTPUT/NA_function_prediction.pdf"/>
23 <data format="tabular" name="uptargetsoutput" label="BETA direct targets prediction on up regulated genes" from_work_dir="BETA_OUTPUT/NA_uptarget.txt"/> 23 <data format="tabular" name="uptargetsoutput" label="BETA direct targets prediction on up regulated genes" from_work_dir="BETA_OUTPUT/NA_uptarget.txt"/>
24 <data format="tabular" name="downtargetsoutput" label="BETA direct targets prediction on down regulated genes" from_work_dir="BETA_OUTPUT/NA_downtarget.txt"/> 24 <data format="tabular" name="downtargetsoutput" label="BETA direct targets prediction on down regulated genes" from_work_dir="BETA_OUTPUT/NA_downtarget.txt"/>
25 <data format="bed" name="uptargetpeaks" label="BETA Uptarget associated peaks" from_work_dir="BETA_OUTPUT/NA_uptarget_associate_peaks.bed"/> 25 <data format="bed" name="uptargetpeaks" label="BETA Uptarget associated peaks" from_work_dir="BETA_OUTPUT/NA_uptarget_associate_peaks.bed"/>
26 <data format="bed" name="downtargetpeaks" label="BETA Downtarget associated peaks" from_work_dir="BETA_OUTPUT/NA_downtarget_associate_peaks.bed"/> 26 <data format="bed" name="downtargetpeaks" label="BETA Downtarget associated peaks" from_work_dir="BETA_OUTPUT/NA_downtarget_associate_peaks.bed"/>
27 </outputs> 27 </outputs>
28 <tests> 28 <tests>
29 <test> 29 <test>
30 <param name='peakfile' value="peaks.bed" ftype="bed" dbkey="hg19"/> 30 <param name='peakfile' value="peaks.bed" ftype="bed" dbkey="hg19"/>
31 <param name="distance" value="100000"/> 31 <param name="distance" value="100000"/>
32 <param name="peaknumber" value="10000"/> 32 <param name="peaknumber" value="10000"/>
33 <param name="genomeName" value="hg19"/> 33 <param name="genomeName" value="hg19"/>
34 <param name='exprefile' value="diff_expr.xls" ftype="tabular" dbkey="hg19"/> 34 <param name='exprefile' value="diff_expr.xls" ftype="tabular" dbkey="hg19"/>
35 <param name="kind" value="LIM"/> 35 <param name="kind" value="LIM"/>
36 <param name="expreinfo" type="text" value="2,5,7"/> 36 <param name="expreinfo" value="2,5,7"/>
37 <param name="gname2" value="Refseq"/> 37 <param name="gname2" value="Refseq"/>
38 <param name="diff_fdr" value="1.0"/> 38 <param name="diff_fdr" value="1.0"/>
39 <param name="diff_amount" value="0.5"/> 39 <param name="diff_amount" value="0.5"/>
40 <param name="method" value="score"/> 40 <param name="method" value="score"/>
41 <output name="log"> 41 <output name="log">
42 <assert_contents> 42 <assert_contents>
43 <has_text_matching expression="Finished" /> 43 <has_text_matching expression="Finished" />
44 </assert_contents> 44 </assert_contents>
45 </output> 45 </output>
46 <output name="targetsoutput"> 46 <output name="uptargetsoutput">
47 <assert_contents> 47 <assert_contents>
48 <has_text_matching expression="chr19\t4675243\t4723855\tNM_139159\t1.1.*\t-\tDPP" /> 48 <has_text_matching expression="chr19\t4675243\t4723855\tNM_139159\t1.1.*\t-\tDPP" />
49 </assert_contents> 49 </assert_contents>
50 </output> 50 </output>
51 <output name="targetpeaks"> 51 <output name="uptargetpeaks">
52 <assert_contents> 52 <assert_contents>
53 <has_text_matching expression="chr19\t4723422\t4724314\tregion_9\tNM_139159\tDPP9\t13\t0.6.*" /> 53 <has_text_matching expression="chr19\t4723422\t4724314\tregion_9\tNM_139159\tDPP9\t13\t0.6.*" />
54 </assert_contents> 54 </assert_contents>
55 </output> 55 </output>
56 </test> 56 </test>
57 </tests> 57 </tests>
58 <help> 58 <help><![CDATA[
59 ** BETA basic ** 59 ** BETA basic **
60 60
61 @EXTERNAL_DOCUMENTATION@ 61 @EXTERNAL_DOCUMENTATION@
62 62
63 @CITATION_SECTION@ 63 @CITATION_SECTION@
74 ----- 74 -----
75 75
76 **Parameters** 76 **Parameters**
77 77
78 - **PEAKFILE file** contains peaks for the experiment in a bed 78 - **PEAKFILE file** contains peaks for the experiment in a bed
79 format file. Normally, it's produced by the peak calling tool. It's 79 format file. Normally, it's produced by the peak calling tool. It's
80 required. 80 required.
81 - **EXPREFILE file** contains the differentially expressed genes in a tab 81 - **EXPREFILE file** contains the differentially expressed genes in a tab
82 delimited text file. It's required. 82 delimited text file. It's required.
83 - **Kind** The kind of your expression file format, LIM for LIMMA standard 83 - **Kind** The kind of your expression file format, LIM for LIMMA standard
84 output with Microarray, CUF for Cuffdiffs standard output with RNA-seq, 84 output with Microarray, CUF for Cuffdiffs standard output with RNA-seq,
85 BSF for BETA specific format, and O for other formats. 85 BSF for BETA specific format, and O for other formats.
86 - **genome** hg19 for human and mm9 for mouse. Others, don't set this parameter. 86 - **genome** hg19 for human and mm9 for mouse. Others, don't set this parameter.
87 - **gname2** If this switch is on, gene or transcript IDs in files given 87 - **gname2** If this switch is on, gene or transcript IDs in files given
88 through -e will be considered as official gene symbols, DEFAULT=FALSE 88 through -e will be considered as official gene symbols, DEFAULT=FALSE
89 - **EXPREINFO** is the columns info of the geneID, up/down status and statistcal 89 - **EXPREINFO** is the columns info of the geneID, up/down status and statistcal
90 values column of your expression data,NOTE: use a comma as an connector. 90 values column of your expression data,NOTE: use a comma as an connector.
91 for example: 2,5,7 means geneID in the 2nd column, Tscore in 5th column 91 for example: 2,5,7 means geneID in the 2nd column, Tscore in 5th column
92 and FDR in 7 column. 92 and FDR in 7 column.
93 - **REFERENCE** is the refgene info file downloaded from UCSC genome browser. 93 - **REFERENCE** is the refgene info file downloaded from UCSC genome browser.
94 It is a tab delimited text file with gene annotation with refseq and gene symbol. 94 It is a tab delimited text file with gene annotation with refseq and gene symbol.
95 Input this file only if your genome is neither hg19 nor mm9. 95 Input this file only if your genome is neither hg19 nor mm9.
96 profiling 96 profiling
97 - **OUTPUT** to specify the output files directory 97 - **OUTPUT** to specify the output files directory
98 - **bl** Whether or not to use CTCF boundary file to get the contributed peaks 98 - **bl** Whether or not to use CTCF boundary file to get the contributed peaks
99 - **BOUNDARYFILE** is the file with reasonable boundaries if --bl is on and genome 99 - **BOUNDARYFILE** is the file with reasonable boundaries if --bl is on and genome
100 is neither hg19 nor mm9. 100 is neither hg19 nor mm9.
101 - **NAME** specify the name of the output files. 101 - **NAME** specify the name of the output files.
102 - **DISTANCE** specify the distance wich peaks within it will be considered. 102 - **DISTANCE** specify the distance wich peaks within it will be considered.
103 - **DIFF_FDR** specify the differential genes by the 3rd column in file input 103 - **DIFF_FDR** specify the differential genes by the 3rd column in file input
104 via -e, genes with less than this value will be considered as the differentially 104 via -e, genes with less than this value will be considered as the differentially
105 changed genes. 105 changed genes.
106 - **DIFF_AMOUNT** specify the differential genes the top #(DIFF_AMOUNT) ranked by 106 - **DIFF_AMOUNT** specify the differential genes the top #(DIFF_AMOUNT) ranked by
107 the 3rd column in file input via -e, genes ranked in the top # will be considered 107 the 3rd column in file input via -e, genes ranked in the top # will be considered
108 as the differentially expressed genes. 108 as the differentially expressed genes.
109 - **CUTOFF** specify a cutoff of ks-test in the function prediction part 109 - **CUTOFF** specify a cutoff of ks-test in the function prediction part
110 110
111 ----- 111 -----
112 112
113 **Script parameter list of BETA basic** 113 **Script parameter list of BETA basic**
114 114
115 :: 115 ::
116 116
117 -h, --help show this help message and exit 117 -h, --help show this help message and exit
118 -p PEAKFILE, --peakfile PEAKFILE 118 -p PEAKFILE, --peakfile PEAKFILE The bed format of peaks binding sites. (BETA support 3
119 The bed format of peaks binding sites. (BETA support 3 119 or 5 columns bed format, CHROM, START, END (NAME,
120 or 5 columns bed format, CHROM, START, END (NAME, 120 SCORE))
121 SCORE)) 121 -e EXPREFILE, --diff_expr EXPREFILE The differential expression file get from limma for
122 -e EXPREFILE, --diff_expr EXPREFILE 122 MicroArray ddata and cuffdiff for RNAseq data
123 The differential expression file get from limma for 123 -k {LIM,CUF,BSF,O}, --kind {LIM,CUF,BSF,O} The kind of your expression file,this is required,it
124 MicroArray ddata and cuffdiff for RNAseq data 124 can be LIM, CUF, BSF, O. LIM for LIMMA standard
125 -k {LIM,CUF,BSF,O}, --kind {LIM,CUF,BSF,O} 125 format. CUF for CUFDIFF standard format, BSF for BETA
126 The kind of your expression file,this is required,it 126 specific format and O for other formats, if is 'O',
127 can be LIM, CUF, BSF, O. LIM for LIMMA standard 127 columns infor required via --info
128 format. CUF for CUFDIFF standard format, BSF for BETA 128 -g {hg19,mm9}, --genome {hg19,mm9} Specify your species, hg19, mm9. For other genome
129 specific format and O for other formats, if is 'O', 129 assembily versions of human and mouse or other
130 columns infor required via --info 130 species, ignore this parameter.
131 -g {hg19,mm9}, --genome {hg19,mm9} 131 --gname2 If this switch is on, gene or transcript IDs in files
132 Specify your species, hg19, mm9. For other genome 132 given through -e will be considered as official gene
133 assembily versions of human and mouse or other 133 symbols, DEFAULT=FALSE
134 species, ignore this parameter. 134 --info EXPREINFO Specify the geneID, up/down status and statistcal
135 --gname2 If this switch is on, gene or transcript IDs in files 135 values column of your expression data,NOTE: use a
136 given through -e will be considered as official gene 136 comma as an connector. for example: 2,5,7 means geneID
137 symbols, DEFAULT=FALSE 137 in the 2nd column, Tscore in 5th column and FDR in 7
138 --info EXPREINFO Specify the geneID, up/down status and statistcal 138 column DEFAULT:2,5,7 for LIMMA; 2,10,13 for Cuffdiff
139 values column of your expression data,NOTE: use a 139 and 1,2,3 for BETA specific format
140 comma as an connector. for example: 2,5,7 means geneID 140 -r REFERENCE, --reference REFERENCE The refgene info file downloaded from UCSC genome
141 in the 2nd column, Tscore in 5th column and FDR in 7 141 browser.input this file only if your genome is neither
142 column DEFAULT:2,5,7 for LIMMA; 2,10,13 for Cuffdiff 142 hg19 nor mm9
143 and 1,2,3 for BETA specific format 143 -o OUTPUT, --output OUTPUT The directory to store all the output files, if you
144 -r REFERENCE, --reference REFERENCE 144 don't set this, files will be output into the current
145 The refgene info file downloaded from UCSC genome 145 directory
146 browser.input this file only if your genome is neither 146 --bl Whether or not use CTCF boundary to filter peaks
147 hg19 nor mm9 147 around a gene, DEFAULT=FALSE
148 -o OUTPUT, --output OUTPUT 148 --bf BOUNDARYFILE CTCF conserved peaks bed file, use this only when you
149 The directory to store all the output files, if you 149 set --bl and the genome is neither hg19 nor mm9
150 don't set this, files will be output into the current 150 --pn PEAKNUMBER The number of peaks you want to consider,
151 directory 151 DEFAULT=10000
152 --bl Whether or not use CTCF boundary to filter peaks 152 --method {score,distance} Define the method to do the TF/CR function prediction,
153 around a gene, DEFAULT=FALSE 153 score for regulatory potential, distance for the
154 --bf BOUNDARYFILE CTCF conserved peaks bed file, use this only when you 154 distance to the proximal binding peak. DEFAULT:SCORE
155 set --bl and the genome is neither hg19 nor mm9 155 -n NAME, --name NAME This argument is used to name the result file.If not
156 --pn PEAKNUMBER The number of peaks you want to consider, 156 set, the peakfile name will be used instead
157 DEFAULT=10000 157 -d DISTANCE, --distance DISTANCE Set a number which unit is 'base'. It will get peaks
158 --method {score,distance} 158 within this distance from gene TSS. default:100000
159 Define the method to do the TF/CR function prediction, 159 (100kb)
160 score for regulatory potential, distance for the 160 --df DIFF_FDR Input a number 0~1 as a threshold to pick out the most
161 distance to the proximal binding peak. DEFAULT:SCORE 161 significant differential expressed genes by FDR,
162 -n NAME, --name NAME This argument is used to name the result file.If not 162 DEFAULT = 1, that is select all the genes
163 set, the peakfile name will be used instead 163 --da DIFF_AMOUNT Get the most significant differential expressed genes
164 -d DISTANCE, --distance DISTANCE 164 by the percentage(0-1) or number(larger than 1)Input a
165 Set a number which unit is 'base'. It will get peaks 165 number between 0-1, the rank based on fdr for example,
166 within this distance from gene TSS. default:100000 166 2000, so that the script will only consider top 2000
167 (100kb) 167 genes as the differentially expressed genes. DEFAULT =
168 --df DIFF_FDR Input a number 0~1 as a threshold to pick out the most 168 0.5, that is select top 50 percent genes of up and
169 significant differential expressed genes by FDR, 169 down seprately. NOTE: if you want to use diff_fdr,
170 DEFAULT = 1, that is select all the genes 170 please set this parameter to 1, otherwise it will get
171 --da DIFF_AMOUNT Get the most significant differential expressed genes 171 the intersection of these two parameters
172 by the percentage(0-1) or number(larger than 1)Input a 172 -c CUTOFF, --cutoff CUTOFF Input a number between 0~1 as a threshold to select
173 number between 0-1, the rank based on fdr for example, 173 the closer target gene list(up regulate or down
174 2000, so that the script will only consider top 2000 174 regulate or both) with the p value was called by one
175 genes as the differentially expressed genes. DEFAULT = 175 side ks-test, DEFAULT = 0.001
176 0.5, that is select top 50 percent genes of up and
177 down seprately. NOTE: if you want to use diff_fdr,
178 please set this parameter to 1, otherwise it will get
179 the intersection of these two parameters
180 -c CUTOFF, --cutoff CUTOFF
181 Input a number between 0~1 as a threshold to select
182 the closer target gene list(up regulate or down
183 regulate or both) with the p value was called by one
184 side ks-test, DEFAULT = 0.001
185 176
186 </help> 177 ]]></help>
187 178 <expand macro="citations" />
188 </tool> 179 </tool>