Mercurial > repos > grau > dimont
changeset 4:23b912162e3b draft
Uploaded
author | grau |
---|---|
date | Wed, 06 Nov 2013 11:54:03 -0500 |
parents | 9076b1e4dcbf |
children | 456153e573d6 |
files | ._DimontWeb.jar DimontWeb.jar DimontWeb.xml galaxy/DimontWeb.jar galaxy/DimontWeb.xml galaxy/tool_dependencies.xml tool_dependencies.xml |
diffstat | 7 files changed, 143 insertions(+), 143 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/DimontWeb.xml Wed Nov 06 11:54:03 2013 -0500 @@ -0,0 +1,137 @@ +<tool id="Dimont" name="Dimont" version="0.1" force_history_refresh="true"> +<description>Dimont, a universal tool for de-novo motif discovery (beta).</description> +<command>java -Xms256M -Xmx2G -jar \$JAR_PATH/DimontWeb.jar --run $script_file $summary $summary.id $__new_file_path__ $summary.extra_files_path</command> +<inputs> +<param type="text" size="40" name="Dimont_jobname" label="Job name" value="" optional="true" help="Please enter a name for your job that should be used in the history (optional)"> +</param> +<param type="data" format="fasta" name="Dimont_ps_Input_sequences" label="<hr />Input sequences" help="The input sequences for de-novo motif discovery (can be uploaded using "GetData" -> "Upload File"), annotated FastA format. The required format is described in the help section." value="" optional="false"> +</param> + +<param type="text" size="40" name="Dimont_ps_Position_tag" label="Position tag" help="The tag for the position information in the FastA-annotation of the input file" value="" optional="false"> +</param> + +<param type="text" size="40" name="Dimont_ps_Value_tag" label="Value tag" help="The tag for the value information in the FastA-annotation of the input file" value="" optional="false"> +</param> + +<param type="float" name="Dimont_ps_Standard_deviation" label="Standard deviation" help="The standard deviation of the position distribution centered at the position specified by the position tag" value="75.0" optional="false"> +<validator type="in_range" min="1.0" max="10000.0" message="Value is not in the specified range [1.0, 10000.0]."/></param> + +<param type="text" size="40" name="Dimont_ps_Weighting_factor" label="Weighting factor" help="The value for weighting the data; either a value between 0 and 1, or a description relative to the standard deviation (e.g. +4sd)" value="0.2" optional="false"> +</param> + +<param type="integer" name="Dimont_ps_Starts" label="<hr />Starts" help="The number of pre-optimization runs." value="20" optional="false"> +<validator type="in_range" min="1" max="100" message="Value is not in the specified range [1, 100]."/></param> + +<param type="integer" name="Dimont_ps_Initial_motif_width" label="<hr />Initial motif width" help="The motif width that is used initially, may be adjusted during optimization." value="15" optional="false"> +<validator type="in_range" min="1" max="50" message="Value is not in the specified range [1, 50]."/></param> + +<param type="integer" name="Dimont_ps_Markov_order_of_motif_model" label="Markov order of motif model" help="The Markov order of the model for the motif." value="0" optional="false"> +<validator type="in_range" min="0" max="3" message="Value is not in the specified range [0, 3]."/></param> + +<param type="integer" name="Dimont_ps_Markov_order_of_background_model" label="Markov order of background model" help="The Markov order of the model for the background sequence and the background sequence, -1 defines uniform distribution." value="-1" optional="false"> +<validator type="in_range" min="-1" max="5" message="Value is not in the specified range [-1, 5]."/></param> + +<param type="float" name="Dimont_ps_Equivalent_sample_size" label="<hr />Equivalent sample size" help="Reflects the strength of the prior on the model parameters." value="4.0" optional="false"> +<validator type="in_range" min="0.0" max="Infinity" message="Value is not in the specified range [0.0, Infinity]."/></param> + +<param type="boolean" name="Dimont_ps_Delete_BSs_from_profile" label="Delete BSs from profile" help="A switch for deleting binding site positions of discovered motifs from the profile before searching for futher motifs." checked="True" optional="false"> +</param> + +</inputs> +<requirements> + <requirement type="set_environment">JAR_PATH</requirement> +</requirements> +<configfiles> +<configfile name="script_file"> +<Dimont_ps_Input_sequences> +<value> +${Dimont_ps_Input_sequences}</value> +<extension> +${Dimont_ps_Input_sequences.ext}</extension> +</Dimont_ps_Input_sequences> + +<Dimont_ps_Position_tag> +${Dimont_ps_Position_tag}</Dimont_ps_Position_tag> + +<Dimont_ps_Value_tag> +${Dimont_ps_Value_tag}</Dimont_ps_Value_tag> + +<Dimont_ps_Standard_deviation> +${Dimont_ps_Standard_deviation}</Dimont_ps_Standard_deviation> + +<Dimont_ps_Weighting_factor> +${Dimont_ps_Weighting_factor}</Dimont_ps_Weighting_factor> + +<Dimont_ps_Starts> +${Dimont_ps_Starts}</Dimont_ps_Starts> + +<Dimont_ps_Initial_motif_width> +${Dimont_ps_Initial_motif_width}</Dimont_ps_Initial_motif_width> + +<Dimont_ps_Markov_order_of_motif_model> +${Dimont_ps_Markov_order_of_motif_model}</Dimont_ps_Markov_order_of_motif_model> + +<Dimont_ps_Markov_order_of_background_model> +${Dimont_ps_Markov_order_of_background_model}</Dimont_ps_Markov_order_of_background_model> + +<Dimont_ps_Equivalent_sample_size> +${Dimont_ps_Equivalent_sample_size}</Dimont_ps_Equivalent_sample_size> + +<Dimont_ps_Delete_BSs_from_profile> +${Dimont_ps_Delete_BSs_from_profile}</Dimont_ps_Delete_BSs_from_profile> + +</configfile> +</configfiles> +<outputs> +<data format="html" name="summary" label="#if str($Dimont_jobname) == '' then $tool.name + ' on ' + $on_string else $Dimont_jobname#"> +</data> +</outputs> +<help> +**Dimont** is a universal tool for de-novo motif discovery. Dimont has successfully been applied to ChIP-seq, ChIP-exo and protein-binding microarray (PBM) data. + +Input sequences must be supplied in an annotated FastA format as a file uploaded by the "Upload File" task in section "Get Data" of Galaxy. +In the annotation of each sequence, you need to provide a value that reflects the confidence that this sequence is bound by the factor of interest. +Such confidences may be peak statistics (e.g., number of fragments under a peak) for ChIP data or signal intensities for PBM data. In addition, you need to provide an anchor position within the sequence. +In case of ChIP data, this anchor position could for instance be the peak summit. +For instance, an annotated FastA file for ChIP-exo data comprising sequences of length 100 centered around the peak summit could look like:: + + > peak: 50; signal: 515 + ggccatgtgtatttttttaaatttccac... + > peak: 50; signal: 199 + GGTCCCCTGGGAGGATGGGGACGTGCTG... + ... + +where the anchor point is given as 50 for the first two sequences, and the confidence amounts to 515 and 199, respectively. +The FastA comment may contain additional annotations of the format ``key1 : value1; key2: value2;...``. +We also provide an example_ input file and a Perl script_ for preparing data in the format required by Dimont. + +Accordingly, you would need to set the parameter "Position tag" to ``peak`` and the parameter "Value tag" to ``signal`` for the input file. + +For the standard deviation of the position prior, the initial motif length and the number of pre-optimization runs, we provide default values that worked well in our studies on ChIP and PBM data. +However, you may want adjust these parameters to meet your prior information. + +The parameter "Markov order of the motif model" sets the order of the inhomogeneous Markov model used for modeling the motif. If this parameter is set to ``0``, you obtain a position weight matrix (PWM) model. +If it is set to ``1``, you obtain a weight array matrix (WAM) model. You can set the order of the motif model to at most ``3``. + +The parameter "Markov order of the background model" sets the order of the homogeneous Markov model used for modeling positions not covered by a motif. +If this parameter is set to ``-1``, you obtain a uniform distribution, which worked well for ChIP data. For PBM data, orders of up to ``4`` resulted in an increased prediction performance in our case studies. The maximum allowed value is ``5``. + +The parameter "Weighting factor" defines the proportion of sequences that you expect to be bound by the targeted factor with high confidence. For ChIP data, the default value of ``0.2`` typically works well. +For PBM data, containing a large number of unspecific probes, this parameter should be set to a lower value, e.g. ``0.01``. + +The "Equivalent sample size" reflects the strength of the influence of the prior on the model parameters, where higher values smooth out the parameters to a greater extent. + +The parameter "Delete BSs from profile" defines if BSs of already discovered motifs should be deleted, i.e., "blanked out", from the sequence before searching for futher motifs. + +You can also install this web-application within your local Galaxy server. Instructions can be found at the Dimont_ page of Jstacs. +There you can also download a command line version of Dimont. + +If you experience problems using Dimont, please contact_ us. + +.. _example: http://www.jstacs.de/downloads/dimont-example.fa +.. _script: http://www.jstacs.de/index.php/Dimont#Data_preparation +.. _Dimont: http://jstacs.de/index.php/Dimont +.. _contact: mailto:grau@informatik.uni-halle.de +</help> +</tool> +
--- a/galaxy/DimontWeb.xml Wed Nov 06 11:52:25 2013 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,137 +0,0 @@ -<tool id="Dimont" name="Dimont" version="0.1" force_history_refresh="true"> -<description>Dimont, a universal tool for de-novo motif discovery (beta).</description> -<command>java -Xms256M -Xmx2G -jar \$JAR_PATH/DimontWeb.jar --run $script_file $summary $summary.id $__new_file_path__ $summary.extra_files_path</command> -<inputs> -<param type="text" size="40" name="Dimont_jobname" label="Job name" value="" optional="true" help="Please enter a name for your job that should be used in the history (optional)"> -</param> -<param type="data" format="fasta" name="Dimont_ps_Input_sequences" label="<hr />Input sequences" help="The input sequences for de-novo motif discovery (can be uploaded using "GetData" -> "Upload File"), annotated FastA format. The required format is described in the help section." value="" optional="false"> -</param> - -<param type="text" size="40" name="Dimont_ps_Position_tag" label="Position tag" help="The tag for the position information in the FastA-annotation of the input file" value="" optional="false"> -</param> - -<param type="text" size="40" name="Dimont_ps_Value_tag" label="Value tag" help="The tag for the value information in the FastA-annotation of the input file" value="" optional="false"> -</param> - -<param type="float" name="Dimont_ps_Standard_deviation" label="Standard deviation" help="The standard deviation of the position distribution centered at the position specified by the position tag" value="75.0" optional="false"> -<validator type="in_range" min="1.0" max="10000.0" message="Value is not in the specified range [1.0, 10000.0]."/></param> - -<param type="text" size="40" name="Dimont_ps_Weighting_factor" label="Weighting factor" help="The value for weighting the data; either a value between 0 and 1, or a description relative to the standard deviation (e.g. +4sd)" value="0.2" optional="false"> -</param> - -<param type="integer" name="Dimont_ps_Starts" label="<hr />Starts" help="The number of pre-optimization runs." value="20" optional="false"> -<validator type="in_range" min="1" max="100" message="Value is not in the specified range [1, 100]."/></param> - -<param type="integer" name="Dimont_ps_Initial_motif_width" label="<hr />Initial motif width" help="The motif width that is used initially, may be adjusted during optimization." value="15" optional="false"> -<validator type="in_range" min="1" max="50" message="Value is not in the specified range [1, 50]."/></param> - -<param type="integer" name="Dimont_ps_Markov_order_of_motif_model" label="Markov order of motif model" help="The Markov order of the model for the motif." value="0" optional="false"> -<validator type="in_range" min="0" max="3" message="Value is not in the specified range [0, 3]."/></param> - -<param type="integer" name="Dimont_ps_Markov_order_of_background_model" label="Markov order of background model" help="The Markov order of the model for the background sequence and the background sequence, -1 defines uniform distribution." value="-1" optional="false"> -<validator type="in_range" min="-1" max="5" message="Value is not in the specified range [-1, 5]."/></param> - -<param type="float" name="Dimont_ps_Equivalent_sample_size" label="<hr />Equivalent sample size" help="Reflects the strength of the prior on the model parameters." value="4.0" optional="false"> -<validator type="in_range" min="0.0" max="Infinity" message="Value is not in the specified range [0.0, Infinity]."/></param> - -<param type="boolean" name="Dimont_ps_Delete_BSs_from_profile" label="Delete BSs from profile" help="A switch for deleting binding site positions of discovered motifs from the profile before searching for futher motifs." checked="True" optional="false"> -</param> - -</inputs> -<requirements> - <requirement type="set_environment">JAR_PATH</requirement> -</requirements> -<configfiles> -<configfile name="script_file"> -<Dimont_ps_Input_sequences> -<value> -${Dimont_ps_Input_sequences}</value> -<extension> -${Dimont_ps_Input_sequences.ext}</extension> -</Dimont_ps_Input_sequences> - -<Dimont_ps_Position_tag> -${Dimont_ps_Position_tag}</Dimont_ps_Position_tag> - -<Dimont_ps_Value_tag> -${Dimont_ps_Value_tag}</Dimont_ps_Value_tag> - -<Dimont_ps_Standard_deviation> -${Dimont_ps_Standard_deviation}</Dimont_ps_Standard_deviation> - -<Dimont_ps_Weighting_factor> -${Dimont_ps_Weighting_factor}</Dimont_ps_Weighting_factor> - -<Dimont_ps_Starts> -${Dimont_ps_Starts}</Dimont_ps_Starts> - -<Dimont_ps_Initial_motif_width> -${Dimont_ps_Initial_motif_width}</Dimont_ps_Initial_motif_width> - -<Dimont_ps_Markov_order_of_motif_model> -${Dimont_ps_Markov_order_of_motif_model}</Dimont_ps_Markov_order_of_motif_model> - -<Dimont_ps_Markov_order_of_background_model> -${Dimont_ps_Markov_order_of_background_model}</Dimont_ps_Markov_order_of_background_model> - -<Dimont_ps_Equivalent_sample_size> -${Dimont_ps_Equivalent_sample_size}</Dimont_ps_Equivalent_sample_size> - -<Dimont_ps_Delete_BSs_from_profile> -${Dimont_ps_Delete_BSs_from_profile}</Dimont_ps_Delete_BSs_from_profile> - -</configfile> -</configfiles> -<outputs> -<data format="html" name="summary" label="#if str($Dimont_jobname) == '' then $tool.name + ' on ' + $on_string else $Dimont_jobname#"> -</data> -</outputs> -<help> -**Dimont** is a universal tool for de-novo motif discovery. Dimont has successfully been applied to ChIP-seq, ChIP-exo and protein-binding microarray (PBM) data. - -Input sequences must be supplied in an annotated FastA format as a file uploaded by the "Upload File" task in section "Get Data" of Galaxy. -In the annotation of each sequence, you need to provide a value that reflects the confidence that this sequence is bound by the factor of interest. -Such confidences may be peak statistics (e.g., number of fragments under a peak) for ChIP data or signal intensities for PBM data. In addition, you need to provide an anchor position within the sequence. -In case of ChIP data, this anchor position could for instance be the peak summit. -For instance, an annotated FastA file for ChIP-exo data comprising sequences of length 100 centered around the peak summit could look like:: - - > peak: 50; signal: 515 - ggccatgtgtatttttttaaatttccac... - > peak: 50; signal: 199 - GGTCCCCTGGGAGGATGGGGACGTGCTG... - ... - -where the anchor point is given as 50 for the first two sequences, and the confidence amounts to 515 and 199, respectively. -The FastA comment may contain additional annotations of the format ``key1 : value1; key2: value2;...``. -We also provide an example_ input file and a Perl script_ for preparing data in the format required by Dimont. - -Accordingly, you would need to set the parameter "Position tag" to ``peak`` and the parameter "Value tag" to ``signal`` for the input file. - -For the standard deviation of the position prior, the initial motif length and the number of pre-optimization runs, we provide default values that worked well in our studies on ChIP and PBM data. -However, you may want adjust these parameters to meet your prior information. - -The parameter "Markov order of the motif model" sets the order of the inhomogeneous Markov model used for modeling the motif. If this parameter is set to ``0``, you obtain a position weight matrix (PWM) model. -If it is set to ``1``, you obtain a weight array matrix (WAM) model. You can set the order of the motif model to at most ``3``. - -The parameter "Markov order of the background model" sets the order of the homogeneous Markov model used for modeling positions not covered by a motif. -If this parameter is set to ``-1``, you obtain a uniform distribution, which worked well for ChIP data. For PBM data, orders of up to ``4`` resulted in an increased prediction performance in our case studies. The maximum allowed value is ``5``. - -The parameter "Weighting factor" defines the proportion of sequences that you expect to be bound by the targeted factor with high confidence. For ChIP data, the default value of ``0.2`` typically works well. -For PBM data, containing a large number of unspecific probes, this parameter should be set to a lower value, e.g. ``0.01``. - -The "Equivalent sample size" reflects the strength of the influence of the prior on the model parameters, where higher values smooth out the parameters to a greater extent. - -The parameter "Delete BSs from profile" defines if BSs of already discovered motifs should be deleted, i.e., "blanked out", from the sequence before searching for futher motifs. - -You can also install this web-application within your local Galaxy server. Instructions can be found at the Dimont_ page of Jstacs. -There you can also download a command line version of Dimont. - -If you experience problems using Dimont, please contact_ us. - -.. _example: http://www.jstacs.de/downloads/dimont-example.fa -.. _script: http://www.jstacs.de/index.php/Dimont#Data_preparation -.. _Dimont: http://jstacs.de/index.php/Dimont -.. _contact: mailto:grau@informatik.uni-halle.de -</help> -</tool> -
--- a/galaxy/tool_dependencies.xml Wed Nov 06 11:52:25 2013 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,6 +0,0 @@ -<?xml version="1.0"?> -<tool_dependency> - <set_environment version="1.0"> - <environment_variable name="JAR_PATH" action="set_to">$REPOSITORY_INSTALL_DIR</environment_variable> - </set_environment> -</tool_dependency> \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_dependencies.xml Wed Nov 06 11:54:03 2013 -0500 @@ -0,0 +1,6 @@ +<?xml version="1.0"?> +<tool_dependency> + <set_environment version="1.0"> + <environment_variable name="JAR_PATH" action="set_to">$REPOSITORY_INSTALL_DIR</environment_variable> + </set_environment> +</tool_dependency> \ No newline at end of file