Mercurial > repos > jjohnson > cistrome_beta
comparison beta_basic.xml @ 2:9c5241259454 draft
planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/cistrome_beta commit 76ad167e754d8254ee4e9c6d2047c84c5f2da55a-dirty
author | jjohnson |
---|---|
date | Thu, 22 Mar 2018 08:33:55 -0400 |
parents | 20453b656907 |
children | 067573bac905 |
comparison
equal
deleted
inserted
replaced
1:7f023a22da15 | 2:9c5241259454 |
---|---|
1 <tool id="beta_basic" name="BETA-basic: Binding and Expression Target Analysis" version="0.1.0"> | 1 <tool id="beta_basic" name="BETA-basic: Binding and Expression Target Analysis" version="0.1.0"> |
2 <description>Predict the factors (TFs or CRs) direct target genes by combining the binding and expression data</description> | 2 <description>Predict the factors (TFs or CRs) direct target genes by combining the binding and expression data</description> |
3 <macros> | 3 <macros> |
4 <import>beta_macros.xml</import> | 4 <import>beta_macros.xml</import> |
5 </macros> | 5 </macros> |
6 <expand macro="requirements" /> | 6 <expand macro="requirements" /> |
7 <command> | 7 <expand macro="stdio" /> |
8 BETA basic | 8 <command><![CDATA[ |
9 #include source=$common_opts# | 9 BETA basic |
10 #include source=$genome_opts# | 10 #include source=$common_opts# |
11 #include source=$extended_opts# | 11 #include source=$genome_opts# |
12 &> $log | 12 #include source=$extended_opts# |
13 </command> | 13 &> $log |
14 <inputs> | 14 ]]></command> |
15 <expand macro="common_params" /> | 15 <inputs> |
16 <expand macro="genome_params" /> | 16 <expand macro="common_params" /> |
17 <expand macro="extended_params" /> | 17 <expand macro="genome_params" /> |
18 </inputs> | 18 <expand macro="extended_params" /> |
19 <expand macro="stdio" /> | 19 </inputs> |
20 <outputs> | 20 <outputs> |
21 <data format="txt" name="log" label="Log of BETA basic"/> | 21 <data format="txt" name="log" label="Log of BETA basic"/> |
22 <data format="pdf" name="functionoutput" label="BETA functional prediction on ${peakfile.name}" from_work_dir="BETA_OUTPUT/NA_function_prediction.pdf"/> | 22 <data format="pdf" name="functionoutput" label="BETA functional prediction on ${peakfile.name}" from_work_dir="BETA_OUTPUT/NA_function_prediction.pdf"/> |
23 <data format="tabular" name="uptargetsoutput" label="BETA direct targets prediction on up regulated genes" from_work_dir="BETA_OUTPUT/NA_uptarget.txt"/> | 23 <data format="tabular" name="uptargetsoutput" label="BETA direct targets prediction on up regulated genes" from_work_dir="BETA_OUTPUT/NA_uptarget.txt"/> |
24 <data format="tabular" name="downtargetsoutput" label="BETA direct targets prediction on down regulated genes" from_work_dir="BETA_OUTPUT/NA_downtarget.txt"/> | 24 <data format="tabular" name="downtargetsoutput" label="BETA direct targets prediction on down regulated genes" from_work_dir="BETA_OUTPUT/NA_downtarget.txt"/> |
25 <data format="bed" name="uptargetpeaks" label="BETA Uptarget associated peaks" from_work_dir="BETA_OUTPUT/NA_uptarget_associate_peaks.bed"/> | 25 <data format="bed" name="uptargetpeaks" label="BETA Uptarget associated peaks" from_work_dir="BETA_OUTPUT/NA_uptarget_associate_peaks.bed"/> |
26 <data format="bed" name="downtargetpeaks" label="BETA Downtarget associated peaks" from_work_dir="BETA_OUTPUT/NA_downtarget_associate_peaks.bed"/> | 26 <data format="bed" name="downtargetpeaks" label="BETA Downtarget associated peaks" from_work_dir="BETA_OUTPUT/NA_downtarget_associate_peaks.bed"/> |
27 </outputs> | 27 </outputs> |
28 <tests> | 28 <tests> |
29 <test> | 29 <test> |
30 <param name='peakfile' value="peaks.bed" ftype="bed" dbkey="hg19"/> | 30 <param name='peakfile' value="peaks.bed" ftype="bed" dbkey="hg19"/> |
31 <param name="distance" value="100000"/> | 31 <param name="distance" value="100000"/> |
32 <param name="peaknumber" value="10000"/> | 32 <param name="peaknumber" value="10000"/> |
33 <param name="genomeName" value="hg19"/> | 33 <param name="genomeName" value="hg19"/> |
34 <param name='exprefile' value="diff_expr.xls" ftype="tabular" dbkey="hg19"/> | 34 <param name='exprefile' value="diff_expr.xls" ftype="tabular" dbkey="hg19"/> |
35 <param name="kind" value="LIM"/> | 35 <param name="kind" value="LIM"/> |
36 <param name="expreinfo" type="text" value="2,5,7"/> | 36 <param name="expreinfo" value="2,5,7"/> |
37 <param name="gname2" value="Refseq"/> | 37 <param name="gname2" value="Refseq"/> |
38 <param name="diff_fdr" value="1.0"/> | 38 <param name="diff_fdr" value="1.0"/> |
39 <param name="diff_amount" value="0.5"/> | 39 <param name="diff_amount" value="0.5"/> |
40 <param name="method" value="score"/> | 40 <param name="method" value="score"/> |
41 <output name="log"> | 41 <output name="log"> |
42 <assert_contents> | 42 <assert_contents> |
43 <has_text_matching expression="Finished" /> | 43 <has_text_matching expression="Finished" /> |
44 </assert_contents> | 44 </assert_contents> |
45 </output> | 45 </output> |
46 <output name="targetsoutput"> | 46 <output name="uptargetsoutput"> |
47 <assert_contents> | 47 <assert_contents> |
48 <has_text_matching expression="chr19\t4675243\t4723855\tNM_139159\t1.1.*\t-\tDPP" /> | 48 <has_text_matching expression="chr19\t4675243\t4723855\tNM_139159\t1.1.*\t-\tDPP" /> |
49 </assert_contents> | 49 </assert_contents> |
50 </output> | 50 </output> |
51 <output name="targetpeaks"> | 51 <output name="uptargetpeaks"> |
52 <assert_contents> | 52 <assert_contents> |
53 <has_text_matching expression="chr19\t4723422\t4724314\tregion_9\tNM_139159\tDPP9\t13\t0.6.*" /> | 53 <has_text_matching expression="chr19\t4723422\t4724314\tregion_9\tNM_139159\tDPP9\t13\t0.6.*" /> |
54 </assert_contents> | 54 </assert_contents> |
55 </output> | 55 </output> |
56 </test> | 56 </test> |
57 </tests> | 57 </tests> |
58 <help> | 58 <help><![CDATA[ |
59 ** BETA basic ** | 59 ** BETA basic ** |
60 | 60 |
61 @EXTERNAL_DOCUMENTATION@ | 61 @EXTERNAL_DOCUMENTATION@ |
62 | 62 |
63 @CITATION_SECTION@ | 63 @CITATION_SECTION@ |
74 ----- | 74 ----- |
75 | 75 |
76 **Parameters** | 76 **Parameters** |
77 | 77 |
78 - **PEAKFILE file** contains peaks for the experiment in a bed | 78 - **PEAKFILE file** contains peaks for the experiment in a bed |
79 format file. Normally, it's produced by the peak calling tool. It's | 79 format file. Normally, it's produced by the peak calling tool. It's |
80 required. | 80 required. |
81 - **EXPREFILE file** contains the differentially expressed genes in a tab | 81 - **EXPREFILE file** contains the differentially expressed genes in a tab |
82 delimited text file. It's required. | 82 delimited text file. It's required. |
83 - **Kind** The kind of your expression file format, LIM for LIMMA standard | 83 - **Kind** The kind of your expression file format, LIM for LIMMA standard |
84 output with Microarray, CUF for Cuffdiffs standard output with RNA-seq, | 84 output with Microarray, CUF for Cuffdiffs standard output with RNA-seq, |
85 BSF for BETA specific format, and O for other formats. | 85 BSF for BETA specific format, and O for other formats. |
86 - **genome** hg19 for human and mm9 for mouse. Others, don't set this parameter. | 86 - **genome** hg19 for human and mm9 for mouse. Others, don't set this parameter. |
87 - **gname2** If this switch is on, gene or transcript IDs in files given | 87 - **gname2** If this switch is on, gene or transcript IDs in files given |
88 through -e will be considered as official gene symbols, DEFAULT=FALSE | 88 through -e will be considered as official gene symbols, DEFAULT=FALSE |
89 - **EXPREINFO** is the columns info of the geneID, up/down status and statistcal | 89 - **EXPREINFO** is the columns info of the geneID, up/down status and statistcal |
90 values column of your expression data,NOTE: use a comma as an connector. | 90 values column of your expression data,NOTE: use a comma as an connector. |
91 for example: 2,5,7 means geneID in the 2nd column, Tscore in 5th column | 91 for example: 2,5,7 means geneID in the 2nd column, Tscore in 5th column |
92 and FDR in 7 column. | 92 and FDR in 7 column. |
93 - **REFERENCE** is the refgene info file downloaded from UCSC genome browser. | 93 - **REFERENCE** is the refgene info file downloaded from UCSC genome browser. |
94 It is a tab delimited text file with gene annotation with refseq and gene symbol. | 94 It is a tab delimited text file with gene annotation with refseq and gene symbol. |
95 Input this file only if your genome is neither hg19 nor mm9. | 95 Input this file only if your genome is neither hg19 nor mm9. |
96 profiling | 96 profiling |
97 - **OUTPUT** to specify the output files directory | 97 - **OUTPUT** to specify the output files directory |
98 - **bl** Whether or not to use CTCF boundary file to get the contributed peaks | 98 - **bl** Whether or not to use CTCF boundary file to get the contributed peaks |
99 - **BOUNDARYFILE** is the file with reasonable boundaries if --bl is on and genome | 99 - **BOUNDARYFILE** is the file with reasonable boundaries if --bl is on and genome |
100 is neither hg19 nor mm9. | 100 is neither hg19 nor mm9. |
101 - **NAME** specify the name of the output files. | 101 - **NAME** specify the name of the output files. |
102 - **DISTANCE** specify the distance wich peaks within it will be considered. | 102 - **DISTANCE** specify the distance wich peaks within it will be considered. |
103 - **DIFF_FDR** specify the differential genes by the 3rd column in file input | 103 - **DIFF_FDR** specify the differential genes by the 3rd column in file input |
104 via -e, genes with less than this value will be considered as the differentially | 104 via -e, genes with less than this value will be considered as the differentially |
105 changed genes. | 105 changed genes. |
106 - **DIFF_AMOUNT** specify the differential genes the top #(DIFF_AMOUNT) ranked by | 106 - **DIFF_AMOUNT** specify the differential genes the top #(DIFF_AMOUNT) ranked by |
107 the 3rd column in file input via -e, genes ranked in the top # will be considered | 107 the 3rd column in file input via -e, genes ranked in the top # will be considered |
108 as the differentially expressed genes. | 108 as the differentially expressed genes. |
109 - **CUTOFF** specify a cutoff of ks-test in the function prediction part | 109 - **CUTOFF** specify a cutoff of ks-test in the function prediction part |
110 | 110 |
111 ----- | 111 ----- |
112 | 112 |
113 **Script parameter list of BETA basic** | 113 **Script parameter list of BETA basic** |
114 | 114 |
115 :: | 115 :: |
116 | 116 |
117 -h, --help show this help message and exit | 117 -h, --help show this help message and exit |
118 -p PEAKFILE, --peakfile PEAKFILE | 118 -p PEAKFILE, --peakfile PEAKFILE The bed format of peaks binding sites. (BETA support 3 |
119 The bed format of peaks binding sites. (BETA support 3 | 119 or 5 columns bed format, CHROM, START, END (NAME, |
120 or 5 columns bed format, CHROM, START, END (NAME, | 120 SCORE)) |
121 SCORE)) | 121 -e EXPREFILE, --diff_expr EXPREFILE The differential expression file get from limma for |
122 -e EXPREFILE, --diff_expr EXPREFILE | 122 MicroArray ddata and cuffdiff for RNAseq data |
123 The differential expression file get from limma for | 123 -k {LIM,CUF,BSF,O}, --kind {LIM,CUF,BSF,O} The kind of your expression file,this is required,it |
124 MicroArray ddata and cuffdiff for RNAseq data | 124 can be LIM, CUF, BSF, O. LIM for LIMMA standard |
125 -k {LIM,CUF,BSF,O}, --kind {LIM,CUF,BSF,O} | 125 format. CUF for CUFDIFF standard format, BSF for BETA |
126 The kind of your expression file,this is required,it | 126 specific format and O for other formats, if is 'O', |
127 can be LIM, CUF, BSF, O. LIM for LIMMA standard | 127 columns infor required via --info |
128 format. CUF for CUFDIFF standard format, BSF for BETA | 128 -g {hg19,mm9}, --genome {hg19,mm9} Specify your species, hg19, mm9. For other genome |
129 specific format and O for other formats, if is 'O', | 129 assembily versions of human and mouse or other |
130 columns infor required via --info | 130 species, ignore this parameter. |
131 -g {hg19,mm9}, --genome {hg19,mm9} | 131 --gname2 If this switch is on, gene or transcript IDs in files |
132 Specify your species, hg19, mm9. For other genome | 132 given through -e will be considered as official gene |
133 assembily versions of human and mouse or other | 133 symbols, DEFAULT=FALSE |
134 species, ignore this parameter. | 134 --info EXPREINFO Specify the geneID, up/down status and statistcal |
135 --gname2 If this switch is on, gene or transcript IDs in files | 135 values column of your expression data,NOTE: use a |
136 given through -e will be considered as official gene | 136 comma as an connector. for example: 2,5,7 means geneID |
137 symbols, DEFAULT=FALSE | 137 in the 2nd column, Tscore in 5th column and FDR in 7 |
138 --info EXPREINFO Specify the geneID, up/down status and statistcal | 138 column DEFAULT:2,5,7 for LIMMA; 2,10,13 for Cuffdiff |
139 values column of your expression data,NOTE: use a | 139 and 1,2,3 for BETA specific format |
140 comma as an connector. for example: 2,5,7 means geneID | 140 -r REFERENCE, --reference REFERENCE The refgene info file downloaded from UCSC genome |
141 in the 2nd column, Tscore in 5th column and FDR in 7 | 141 browser.input this file only if your genome is neither |
142 column DEFAULT:2,5,7 for LIMMA; 2,10,13 for Cuffdiff | 142 hg19 nor mm9 |
143 and 1,2,3 for BETA specific format | 143 -o OUTPUT, --output OUTPUT The directory to store all the output files, if you |
144 -r REFERENCE, --reference REFERENCE | 144 don't set this, files will be output into the current |
145 The refgene info file downloaded from UCSC genome | 145 directory |
146 browser.input this file only if your genome is neither | 146 --bl Whether or not use CTCF boundary to filter peaks |
147 hg19 nor mm9 | 147 around a gene, DEFAULT=FALSE |
148 -o OUTPUT, --output OUTPUT | 148 --bf BOUNDARYFILE CTCF conserved peaks bed file, use this only when you |
149 The directory to store all the output files, if you | 149 set --bl and the genome is neither hg19 nor mm9 |
150 don't set this, files will be output into the current | 150 --pn PEAKNUMBER The number of peaks you want to consider, |
151 directory | 151 DEFAULT=10000 |
152 --bl Whether or not use CTCF boundary to filter peaks | 152 --method {score,distance} Define the method to do the TF/CR function prediction, |
153 around a gene, DEFAULT=FALSE | 153 score for regulatory potential, distance for the |
154 --bf BOUNDARYFILE CTCF conserved peaks bed file, use this only when you | 154 distance to the proximal binding peak. DEFAULT:SCORE |
155 set --bl and the genome is neither hg19 nor mm9 | 155 -n NAME, --name NAME This argument is used to name the result file.If not |
156 --pn PEAKNUMBER The number of peaks you want to consider, | 156 set, the peakfile name will be used instead |
157 DEFAULT=10000 | 157 -d DISTANCE, --distance DISTANCE Set a number which unit is 'base'. It will get peaks |
158 --method {score,distance} | 158 within this distance from gene TSS. default:100000 |
159 Define the method to do the TF/CR function prediction, | 159 (100kb) |
160 score for regulatory potential, distance for the | 160 --df DIFF_FDR Input a number 0~1 as a threshold to pick out the most |
161 distance to the proximal binding peak. DEFAULT:SCORE | 161 significant differential expressed genes by FDR, |
162 -n NAME, --name NAME This argument is used to name the result file.If not | 162 DEFAULT = 1, that is select all the genes |
163 set, the peakfile name will be used instead | 163 --da DIFF_AMOUNT Get the most significant differential expressed genes |
164 -d DISTANCE, --distance DISTANCE | 164 by the percentage(0-1) or number(larger than 1)Input a |
165 Set a number which unit is 'base'. It will get peaks | 165 number between 0-1, the rank based on fdr for example, |
166 within this distance from gene TSS. default:100000 | 166 2000, so that the script will only consider top 2000 |
167 (100kb) | 167 genes as the differentially expressed genes. DEFAULT = |
168 --df DIFF_FDR Input a number 0~1 as a threshold to pick out the most | 168 0.5, that is select top 50 percent genes of up and |
169 significant differential expressed genes by FDR, | 169 down seprately. NOTE: if you want to use diff_fdr, |
170 DEFAULT = 1, that is select all the genes | 170 please set this parameter to 1, otherwise it will get |
171 --da DIFF_AMOUNT Get the most significant differential expressed genes | 171 the intersection of these two parameters |
172 by the percentage(0-1) or number(larger than 1)Input a | 172 -c CUTOFF, --cutoff CUTOFF Input a number between 0~1 as a threshold to select |
173 number between 0-1, the rank based on fdr for example, | 173 the closer target gene list(up regulate or down |
174 2000, so that the script will only consider top 2000 | 174 regulate or both) with the p value was called by one |
175 genes as the differentially expressed genes. DEFAULT = | 175 side ks-test, DEFAULT = 0.001 |
176 0.5, that is select top 50 percent genes of up and | |
177 down seprately. NOTE: if you want to use diff_fdr, | |
178 please set this parameter to 1, otherwise it will get | |
179 the intersection of these two parameters | |
180 -c CUTOFF, --cutoff CUTOFF | |
181 Input a number between 0~1 as a threshold to select | |
182 the closer target gene list(up regulate or down | |
183 regulate or both) with the p value was called by one | |
184 side ks-test, DEFAULT = 0.001 | |
185 | 176 |
186 </help> | 177 ]]></help> |
187 | 178 <expand macro="citations" /> |
188 </tool> | 179 </tool> |