Mercurial > repos > jjohnson > cistrome_beta
view beta_plus.xml @ 2:9c5241259454 draft
planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/cistrome_beta commit 76ad167e754d8254ee4e9c6d2047c84c5f2da55a-dirty
author | jjohnson |
---|---|
date | Thu, 22 Mar 2018 08:33:55 -0400 |
parents | 20453b656907 |
children | 067573bac905 |
line wrap: on
line source
<tool id="beta_plus" name="BETA-plus: Binding and Expression Target prediction and motif analysis" version="0.1.0"> <description>Predict the factors (TFs or CRs) direct target genes by combining the binding and expression data, then do motif analysis on target regions</description> <macros> <import>beta_macros.xml</import> </macros> <expand macro="requirements" /> <expand macro="stdio" /> <command><![CDATA[ BETA plus #include source=$common_opts# #include source=$genome_opts# #include source=$ref_genome_seq_opts# #include source=$extended_opts# --mn $motifs &> $log && mkdir -p $motifresult.extra_files_path && cp BETA_OUTPUT/motifresult/betamotif.html $motifresult && cp BETA_OUTPUT/motifresult/*.js $motifresult.extra_files_path && cp BETA_OUTPUT/motifresult/*.css $motifresult.extra_files_path && cp -r BETA_OUTPUT/motifresult/img $motifresult.extra_files_path ]]></command> <inputs> <expand macro="common_params" /> <expand macro="genome_params" /> <expand macro="refGenomeSourceConditional" /> <expand macro="extended_params" /> <param name="motifs" type="float" value="10" optional="true" label="Motifs to retrieve" help="a number between 0 and 1 as the p-value cutoff or an integer larger than 1 as the number of motifs"> <validator type="in_range" max="20000" min="0" message="A float between 0 and 1 or an integer greater than 1" /> </param> </inputs> <outputs> <data format="txt" name="log" label="Log of BETA plus"/> <data format="pdf" name="functionoutput" label="BETA functional prediction on ${peakfile.name}" from_work_dir="BETA_OUTPUT/NA_function_prediction.pdf"/> <data format="tabular" name="uptargetsoutput" label="BETA direct targets prediction on up regulated genes" from_work_dir="BETA_OUTPUT/NA_uptarget.txt"/> <data format="tabular" name="downtargetsoutput" label="BETA direct targets prediction on down regulated genes" from_work_dir="BETA_OUTPUT/NA_downtarget.txt"/> <data format="bed" name="uptargetpeaks" label="BETA Uptarget associated peaks" from_work_dir="BETA_OUTPUT/NA_uptarget_associate_peaks.bed"/> <data format="bed" name="downtargetpeaks" label="BETA Downtarget associated peaks" from_work_dir="BETA_OUTPUT/NA_downtarget_associate_peaks.bed"/> <data format="txt" name="upmotifs" label="BETA Motifs in up-target regions" from_work_dir="BETA_OUTPUT/motifresult/UP_MOTIFS.txt" /> <data format="txt" name="up_non_motifs" label="BETA Motifs in up-target regions versus non-target regions" from_work_dir="BETA_OUTPUT/motifresult/UP_NON_MOTIFS.txt" /> <data format="txt" name="downmotifs" label="BETA Motifs in down-target regions" from_work_dir="BETA_OUTPUT/motifresult/DOWN_MOTIFS.txt" /> <data format="txt" name="down_non_motifs" label="BETA Motifs in down-target regions versus non-target regions" from_work_dir="BETA_OUTPUT/motifresult/DOWN_NON_MOTIFS.txt" /> <data format="txt" name="differentialmotifs" label="BETA Motifs up-target regions versus down-target regions" from_work_dir="BETA_OUTPUT/motifresult/DIFFERENTIAL_MOTIF_UP_DOWN.txt" /> <data format="html" name="motifresult" label="BETA Motif analysis on target regions"/> </outputs> <tests> <test> <param name='peakfile' value="peaks.bed" ftype="bed" dbkey="hg19"/> <param name="distance" value="100000"/> <param name="peaknumber" value="10000"/> <param name="genomeName" value="hg19"/> <param name='exprefile' value="diff_expr.xls" ftype="tabular" dbkey="hg19"/> <param name="kind" value="LIM"/> <param name="expreinfo" value="2,5,7"/> <param name="gname2" value="Refseq"/> <param name="diff_fdr" value="1.0"/> <param name="diff_amount" value="0.5"/> <param name="method" value="score"/> <output name="log"> <assert_contents> <has_text_matching expression="Finished" /> </assert_contents> </output> <output name="uptargetsoutput"> <assert_contents> <has_text_matching expression="NM_001002231" /> </assert_contents> </output> <output name="downtargetsoutput"> <assert_contents> <has_text_matching expression="NM_001280" /> </assert_contents> </output> <output name="differentialmotifs"> <assert_contents> <has_text_matching expression="CDX1\tHomeodomain Family" /> </assert_contents> </output> </test> </tests> <help><![CDATA[ ** BETA plus ** @EXTERNAL_DOCUMENTATION@ @CITATION_SECTION@ This tool annotates the given intervals and scores with genome features such as gene body. Predicts Direct targets of TF and the active/repressive function prediction. Does motif analysis at targets region as well. It's the major module in CEAS package which is written by Hyunjin Gene Shin, published in Bioinformatics (pubmed id:19689956). .. class:: warningmark **NEED IMPROVEMENT** ----- **Parameters** - **PEAKFILE file** contains peaks for the experiment in a bed format file. Normally, it's produced by the peak calling tool. It's required. - **EXPREFILE file** contains the differentially expressed genes in a tab delimited text file. It's required. - **Kind** The kind of your expression file format, LIM for LIMMA standard output with Microarray, CUF for Cuffdiffs standard output with RNA-seq, BSF for BETA specific format, and O for other formats. - **genome** hg19 for human and mm9 for mouse. Others, don't set this parameter. - **genomereference** Genome reference data with fasta format - **gname2** If this switch is on, gene or transcript IDs in files given through -e will be considered as official gene symbols, DEFAULT=FALSE - **EXPREINFO** is the columns info of the geneID, up/down status and statistcal values column of your expression data,NOTE: use a comma as an connector. for example: 2,5,7 means geneID in the 2nd column, Tscore in 5th column and FDR in 7 column. - **REFERENCE** is the refgene info file downloaded from UCSC genome browser. It is a tab delimited text file with gene annotation with refseq and gene symbol. Input this file only if your genome is neither hg19 nor mm9. profiling - **OUTPUT** to specify the output files directory - **bl** Whether or not to use CTCF boundary file to get the contributed peaks - **BOUNDARYFILE** is the file with reasonable boundaries if --bl is on and genome is neither hg19 nor mm9. - **NAME** specify the name of the output files. - **DISTANCE** specify the distance wich peaks within it will be considered. - **DIFF_FDR** specify the differential genes by the 3rd column in file input via -e, genes with less than this value will be considered as the differentially changed genes. - **DIFF_AMOUNT** specify the differential genes the top #(DIFF_AMOUNT) ranked by the 3rd column in file input via -e, genes ranked in the top # will be considered as the differentially expressed genes. - **CUTOFF** specify a cutoff of ks-test in the function prediction part ----- **Script parameter list of BETA plus** :: -h, --help show this help message and exit -p PEAKFILE, --peakfile PEAKFILE The bed format of peaks binding sites. (BETA support 3 or 5 columns bed format, CHROM, START, END (NAME, SCORE)) -e EXPREFILE, --diff_expr EXPREFILE The differential expression file get from limma for MicroArray ddata and cuffdiff for RNAseq data -k {LIM,CUF,BSF,O}, --kind {LIM,CUF,BSF,O} The kind of your expression file,this is required,it can be LIM, CUF, BSF, O. LIM for LIMMA standard format. CUF for CUFDIFF standard format, BSF for BETA specific format and O for other formats, if is 'O', columns infor required via --info -g {hg19,mm9}, --genome {hg19,mm9} Specify your species, hg19, mm9 --gs GENOMEREFERNCE GenomeReference file with fasta format --gname2 If this switch is on, gene or transcript IDs in files given through -e will be considered as official gene symbols, DEFAULT=FALSE --info EXPREINFO Specify the geneID, up/down status and statistcal values column of your expression data,NOTE: use a comma as an connector. for example: 2,5,7 means geneID in the 2nd column, Tscore in 5th column and FDR in 7 column DEFAULT:2,5,7 for LIMMA; 2,10,13 for Cuffdiff and 1,2,3 for BETA specific format -r REFERENCE, --reference REFERENCE The refgene info file downloaded from UCSC genome browser.input this file only if your genome is neither hg19 nor mm9 -o OUTPUT, --output OUTPUT The directory to store all the output files, if you don't set this, files will be output into the current directory --bl Whether or not use CTCF boundary to filter peaks around a gene, DEFAULT=FALSE --bf BOUNDARYFILE CTCF conserved peaks bed file, use this only when you set --bl and the genome is neither hg19 nor mm9 --pn PEAKNUMBER The number of peaks you want to consider, DEFAULT=10000 --method {score,distance} Define the method to do the TF/CR function prediction, score for regulatory potential, distance for the distance to the proximal binding peak. DEFAULT:SCORE -n NAME, --name NAME This argument is used to name the result file.If not set, the peakfile name will be used instead -d DISTANCE, --distance DISTANCE Set a number which unit is 'base'. It will get peaks within this distance from gene TSS. default:100000 (100kb) --df DIFF_FDR Input a number 0~1 as a threshold to pick out the most significant differential expressed genes by FDR, DEFAULT = 1, that is select all the genes --da DIFF_AMOUNT Get the most significant differential expressed genes by the percentage(0-1) or number(larger than 1)Input a number between 0-1, the rank based on fdr for example, 2000, so that the script will only consider top 2000 genes as the differentially expressed genes. DEFAULT = 0.5, that is select top 50 percent genes of up and down seprately. NOTE: if you want to use diff_fdr, please set this parameter to 1, otherwise it will get the intersection of these two parameters -c CUTOFF, --cutoff CUTOFF Input a number between 0~1 as a threshold to select the closer target gene list(up regulate or down regulate or both) with the p value was called by one side ks-test, DEFAULT = 0.001 ]]></help> <expand macro="citations" /> </tool>